Commit 0910e767ad5609c2efcf65ddc80a5b7bede434d0

Authored by Jay Berkenbilt
1 parent 8c718b7e

QPDFJob increment: basic QPDFJob structure

Move most of the methods called from qpdf.cc after argument parsing
into QPDFJob. In this increment, enough QPDFJob API has been added to
handle the branch of QPDFJob::run() that creates output with an
appropriate division between qpdf.cc and QPDFJob.

There are temporary bits of code to enable everything to compile and
pass the test suite, including some duplication and hard-coded values.
cSpell.json
... ... @@ -333,6 +333,7 @@
333 333 "qpdffake",
334 334 "qpdffilespecobjecthelper",
335 335 "qpdfformfieldobjecthelper",
  336 + "qpdfjob",
336 337 "qpdfmatrix",
337 338 "qpdfnametreeobjecthelper",
338 339 "qpdfnumbertreeobjecthelper",
... ...
include/qpdf/Constants.h
... ... @@ -176,4 +176,11 @@ enum pdf_annotation_flag_e
176 176 an_locked_contents = 1 << 9
177 177 };
178 178  
  179 +/* Encryption/password status for QPDFJob */
  180 +enum qpdf_encryption_status_e
  181 +{
  182 + qpdf_es_encrypted = 1 << 0,
  183 + qpdf_es_password_incorrect = 1 << 1
  184 +};
  185 +
179 186 #endif /* QPDFCONSTANTS_H */
... ...
include/qpdf/QPDFJob.hh 0 → 100644
  1 +// Copyright (c) 2005-2021 Jay Berkenbilt
  2 +//
  3 +// This file is part of qpdf.
  4 +//
  5 +// Licensed under the Apache License, Version 2.0 (the "License");
  6 +// you may not use this file except in compliance with the License.
  7 +// You may obtain a copy of the License at
  8 +//
  9 +// http://www.apache.org/licenses/LICENSE-2.0
  10 +//
  11 +// Unless required by applicable law or agreed to in writing, software
  12 +// distributed under the License is distributed on an "AS IS" BASIS,
  13 +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14 +// See the License for the specific language governing permissions and
  15 +// limitations under the License.
  16 +//
  17 +// Versions of qpdf prior to version 7 were released under the terms
  18 +// of version 2.0 of the Artistic License. At your option, you may
  19 +// continue to consider qpdf to be licensed under those terms. Please
  20 +// see the manual for additional information.
  21 +
  22 +#ifndef QPDFJOB_HH
  23 +#define QPDFJOB_HH
  24 +
  25 +#include <qpdf/DLL.h>
  26 +#include <qpdf/PointerHolder.hh>
  27 +#include <qpdf/QPDF.hh>
  28 +#include <qpdf/Constants.h>
  29 +
  30 +#include <string>
  31 +#include <list>
  32 +#include <vector>
  33 +#include <set>
  34 +#include <map>
  35 +#include <iostream>
  36 +
  37 +class QPDFWriter;
  38 +
  39 +class QPDFJob
  40 +{
  41 + public:
  42 + QPDF_DLL
  43 + QPDFJob();
  44 +
  45 + QPDF_DLL
  46 + void setOutputStreams(std::ostream* out_stream, std::ostream* err_stream);
  47 +
  48 + QPDF_DLL
  49 + void run();
  50 +
  51 + QPDF_DLL
  52 + bool hasWarnings();
  53 +
  54 + QPDF_DLL
  55 + bool createsOutput();
  56 +
  57 + QPDF_DLL
  58 + bool suppressWarnings();
  59 +
  60 + QPDF_DLL
  61 + bool checkRequiresPassword();
  62 +
  63 + QPDF_DLL
  64 + bool checkIsEncrypted();
  65 +
  66 +
  67 + // Return value is bitwise OR of values from qpdf_encryption_status_e
  68 + QPDF_DLL
  69 + unsigned long getEncryptionStatus();
  70 +
  71 + // QXXXQ From here to END-PUBLIC should all be private
  72 + public:
  73 +
  74 + QPDF_DLL
  75 + static JSON json_schema(std::set<std::string>* keys = 0);
  76 + QPDF_DLL
  77 + static void parse_object_id(
  78 + std::string const& objspec, bool& trailer, int& obj, int& gen);
  79 +
  80 + struct PageSpec
  81 + {
  82 + PageSpec(std::string const& filename,
  83 + char const* password,
  84 + char const* range) :
  85 + filename(filename),
  86 + password(password),
  87 + range(range)
  88 + {
  89 + }
  90 +
  91 + std::string filename;
  92 + char const* password;
  93 + char const* range;
  94 + };
  95 +
  96 + struct RotationSpec
  97 + {
  98 + RotationSpec(int angle = 0, bool relative = false) :
  99 + angle(angle),
  100 + relative(relative)
  101 + {
  102 + }
  103 +
  104 + int angle;
  105 + bool relative;
  106 + };
  107 +
  108 + enum password_mode_e { pm_bytes, pm_hex_bytes, pm_unicode, pm_auto };
  109 +
  110 + struct UnderOverlay
  111 + {
  112 + UnderOverlay(char const* which) :
  113 + which(which),
  114 + filename(0),
  115 + password(0),
  116 + to_nr("1-z"),
  117 + from_nr("1-z"),
  118 + repeat_nr("")
  119 + {
  120 + }
  121 +
  122 + std::string which;
  123 + char const* filename;
  124 + char const* password;
  125 + char const* to_nr;
  126 + char const* from_nr;
  127 + char const* repeat_nr;
  128 + PointerHolder<QPDF> pdf;
  129 + std::vector<int> to_pagenos;
  130 + std::vector<int> from_pagenos;
  131 + std::vector<int> repeat_pagenos;
  132 + };
  133 +
  134 + struct AddAttachment
  135 + {
  136 + AddAttachment() :
  137 + replace(false)
  138 + {
  139 + }
  140 +
  141 + std::string path;
  142 + std::string key;
  143 + std::string filename;
  144 + std::string creationdate;
  145 + std::string moddate;
  146 + std::string mimetype;
  147 + std::string description;
  148 + bool replace;
  149 + };
  150 +
  151 + struct CopyAttachmentFrom
  152 + {
  153 + std::string path;
  154 + std::string password;
  155 + std::string prefix;
  156 + };
  157 +
  158 + PointerHolder<QPDF> processFile(
  159 + char const* filename, char const* password);
  160 + void validateUnderOverlay(QPDF& pdf, QPDFJob::UnderOverlay* uo);
  161 + void handleUnderOverlay(QPDF& pdf);
  162 + void copyAttachments(QPDF& pdf);
  163 + void handleTransformations(QPDF& pdf);
  164 + void addAttachments(QPDF& pdf);
  165 + void setWriterOptions(QPDF& pdf, QPDFWriter& w);
  166 + void doSplitPages(QPDF& pdf, bool& warnings);
  167 + void writeOutfile(QPDF& pdf);
  168 +
  169 + enum remove_unref_e { re_auto, re_yes, re_no };
  170 +
  171 + char const* password;
  172 + std::shared_ptr<char> password_alloc;
  173 + bool linearize;
  174 + bool decrypt;
  175 + int split_pages;
  176 + bool verbose;
  177 + bool progress;
  178 + bool suppress_warnings;
  179 + bool copy_encryption;
  180 + char const* encryption_file;
  181 + char const* encryption_file_password;
  182 + bool encrypt;
  183 + bool password_is_hex_key;
  184 + bool suppress_password_recovery;
  185 + password_mode_e password_mode;
  186 + bool allow_insecure;
  187 + bool allow_weak_crypto;
  188 + std::string user_password;
  189 + std::string owner_password;
  190 + int keylen;
  191 + bool r2_print;
  192 + bool r2_modify;
  193 + bool r2_extract;
  194 + bool r2_annotate;
  195 + bool r3_accessibility;
  196 + bool r3_extract;
  197 + bool r3_assemble;
  198 + bool r3_annotate_and_form;
  199 + bool r3_form_filling;
  200 + bool r3_modify_other;
  201 + qpdf_r3_print_e r3_print;
  202 + bool force_V4;
  203 + bool force_R5;
  204 + bool cleartext_metadata;
  205 + bool use_aes;
  206 + bool stream_data_set;
  207 + qpdf_stream_data_e stream_data_mode;
  208 + bool compress_streams;
  209 + bool compress_streams_set;
  210 + bool recompress_flate;
  211 + bool recompress_flate_set;
  212 + int compression_level;
  213 + qpdf_stream_decode_level_e decode_level;
  214 + bool decode_level_set;
  215 + bool normalize_set;
  216 + bool normalize;
  217 + bool suppress_recovery;
  218 + bool object_stream_set;
  219 + qpdf_object_stream_e object_stream_mode;
  220 + bool ignore_xref_streams;
  221 + bool qdf_mode;
  222 + bool preserve_unreferenced_objects;
  223 + remove_unref_e remove_unreferenced_page_resources;
  224 + bool keep_files_open;
  225 + bool keep_files_open_set;
  226 + size_t keep_files_open_threshold;
  227 + bool newline_before_endstream;
  228 + std::string linearize_pass1;
  229 + bool coalesce_contents;
  230 + bool flatten_annotations;
  231 + int flatten_annotations_required;
  232 + int flatten_annotations_forbidden;
  233 + bool generate_appearances;
  234 + std::string min_version;
  235 + std::string force_version;
  236 + bool show_npages;
  237 + bool deterministic_id;
  238 + bool static_id;
  239 + bool static_aes_iv;
  240 + bool suppress_original_object_id;
  241 + bool show_encryption;
  242 + bool show_encryption_key;
  243 + bool check_linearization;
  244 + bool show_linearization;
  245 + bool show_xref;
  246 + bool show_trailer;
  247 + int show_obj;
  248 + int show_gen;
  249 + bool show_raw_stream_data;
  250 + bool show_filtered_stream_data;
  251 + bool show_pages;
  252 + bool show_page_images;
  253 + size_t collate;
  254 + bool flatten_rotation;
  255 + bool list_attachments;
  256 + std::string attachment_to_show;
  257 + std::list<std::string> attachments_to_remove;
  258 + std::list<AddAttachment> attachments_to_add;
  259 + std::list<CopyAttachmentFrom> attachments_to_copy;
  260 + bool json;
  261 + std::set<std::string> json_keys;
  262 + std::set<std::string> json_objects;
  263 + bool check;
  264 + bool optimize_images;
  265 + bool externalize_inline_images;
  266 + bool keep_inline_images;
  267 + bool remove_page_labels;
  268 + size_t oi_min_width;
  269 + size_t oi_min_height;
  270 + size_t oi_min_area;
  271 + size_t ii_min_bytes;
  272 + UnderOverlay underlay;
  273 + UnderOverlay overlay;
  274 + UnderOverlay* under_overlay;
  275 + std::vector<PageSpec> page_specs;
  276 + std::map<std::string, RotationSpec> rotations;
  277 + bool require_outfile;
  278 + bool replace_input;
  279 + bool check_is_encrypted;
  280 + bool check_requires_password;
  281 + char const* infilename;
  282 + char const* outfilename;
  283 + // QXXXQ END-PUBLIC
  284 +
  285 + private:
  286 + class Members
  287 + {
  288 + friend class QPDFJob;
  289 +
  290 + public:
  291 + QPDF_DLL
  292 + ~Members() = default;
  293 +
  294 + private:
  295 + Members();
  296 + Members(Members const&) = delete;
  297 +
  298 + bool warnings;
  299 + bool creates_output;
  300 + std::ostream* out_stream;
  301 + std::ostream* err_stream;
  302 + unsigned long encryption_status;
  303 + };
  304 + PointerHolder<Members> m;
  305 +};
  306 +
  307 +#endif // QPDFOBJECT_HH
... ...
libqpdf/QPDFArgParser.cc
... ... @@ -24,6 +24,11 @@ QPDFArgParser::Members::Members(
24 24 option_table(nullptr),
25 25 final_check_handler(nullptr)
26 26 {
  27 + // Remove prefix added by libtool for consistency during testing.
  28 + if (strncmp(whoami, "lt-", 3) == 0)
  29 + {
  30 + whoami += 3;
  31 + }
27 32 }
28 33  
29 34 QPDFArgParser::QPDFArgParser(int argc, char* argv[], char const* progname_env) :
... ...
libqpdf/QPDFJob.cc 0 → 100644
  1 +#include <qpdf/QPDFJob.hh>
  2 +
  3 +#include <cstdio>
  4 +#include <cstdlib>
  5 +#include <cstring>
  6 +#include <ctype.h>
  7 +#include <fcntl.h>
  8 +#include <iostream>
  9 +#include <memory>
  10 +
  11 +#include <qpdf/QUtil.hh>
  12 +#include <qpdf/QTC.hh>
  13 +#include <qpdf/ClosedFileInputSource.hh>
  14 +#include <qpdf/FileInputSource.hh>
  15 +#include <qpdf/Pl_StdioFile.hh>
  16 +#include <qpdf/Pl_Discard.hh>
  17 +#include <qpdf/Pl_DCT.hh>
  18 +#include <qpdf/Pl_Count.hh>
  19 +#include <qpdf/Pl_Flate.hh>
  20 +#include <qpdf/PointerHolder.hh>
  21 +
  22 +#include <qpdf/QPDF.hh>
  23 +#include <qpdf/QPDFPageDocumentHelper.hh>
  24 +#include <qpdf/QPDFPageObjectHelper.hh>
  25 +#include <qpdf/QPDFPageLabelDocumentHelper.hh>
  26 +#include <qpdf/QPDFOutlineDocumentHelper.hh>
  27 +#include <qpdf/QPDFAcroFormDocumentHelper.hh>
  28 +#include <qpdf/QPDFExc.hh>
  29 +#include <qpdf/QPDFSystemError.hh>
  30 +#include <qpdf/QPDFCryptoProvider.hh>
  31 +#include <qpdf/QPDFEmbeddedFileDocumentHelper.hh>
  32 +#include <qpdf/QPDFArgParser.hh>
  33 +
  34 +#include <qpdf/QPDFWriter.hh>
  35 +#include <qpdf/QIntC.hh>
  36 +
  37 +// QXXXQ temporary for compilation
  38 +static int constexpr EXIT_ERROR = 2;
  39 +static int EXIT_WARNING = 3; // may be changed to 0 at runtime
  40 +static char const* whoami = "qpdf";
  41 +// /QXXXQ
  42 +
  43 +namespace
  44 +{
  45 + class ImageOptimizer: public QPDFObjectHandle::StreamDataProvider
  46 + {
  47 + public:
  48 + ImageOptimizer(QPDFJob& o, QPDFObjectHandle& image);
  49 + virtual ~ImageOptimizer()
  50 + {
  51 + }
  52 + virtual void provideStreamData(int objid, int generation,
  53 + Pipeline* pipeline);
  54 + PointerHolder<Pipeline> makePipeline(
  55 + std::string const& description, Pipeline* next);
  56 + bool evaluate(std::string const& description);
  57 +
  58 + private:
  59 + QPDFJob& o;
  60 + QPDFObjectHandle image;
  61 + };
  62 +
  63 + class DiscardContents: public QPDFObjectHandle::ParserCallbacks
  64 + {
  65 + public:
  66 + virtual ~DiscardContents() {}
  67 + virtual void handleObject(QPDFObjectHandle) {}
  68 + virtual void handleEOF() {}
  69 + };
  70 +
  71 + struct QPDFPageData
  72 + {
  73 + QPDFPageData(std::string const& filename, QPDF* qpdf, char const* range);
  74 + QPDFPageData(QPDFPageData const& other, int page);
  75 +
  76 + std::string filename;
  77 + QPDF* qpdf;
  78 + std::vector<QPDFObjectHandle> orig_pages;
  79 + std::vector<int> selected_pages;
  80 + };
  81 +
  82 + class ProgressReporter: public QPDFWriter::ProgressReporter
  83 + {
  84 + public:
  85 + ProgressReporter(char const* filename) :
  86 + filename(filename)
  87 + {
  88 + }
  89 + virtual ~ProgressReporter()
  90 + {
  91 + }
  92 +
  93 + virtual void reportProgress(int);
  94 + private:
  95 + std::string filename;
  96 + };
  97 +}
  98 +
  99 +QPDFPageData::QPDFPageData(std::string const& filename,
  100 + QPDF* qpdf,
  101 + char const* range) :
  102 + filename(filename),
  103 + qpdf(qpdf),
  104 + orig_pages(qpdf->getAllPages())
  105 +{
  106 + try
  107 + {
  108 + this->selected_pages =
  109 + QUtil::parse_numrange(range,
  110 + QIntC::to_int(this->orig_pages.size()));
  111 + }
  112 + catch (std::runtime_error& e)
  113 + {
  114 + throw std::runtime_error(
  115 + "parsing numeric range for " + filename + ": " + e.what());
  116 + }
  117 +}
  118 +
  119 +QPDFPageData::QPDFPageData(QPDFPageData const& other, int page) :
  120 + filename(other.filename),
  121 + qpdf(other.qpdf),
  122 + orig_pages(other.orig_pages)
  123 +{
  124 + this->selected_pages.push_back(page);
  125 +}
  126 +
  127 +void
  128 +ProgressReporter::reportProgress(int percentage)
  129 +{
  130 + std::cout << whoami << ": " << filename << ": write progress: "
  131 + << percentage << "%" << std::endl;
  132 +}
  133 +
  134 +
  135 +QPDFJob::Members::Members() :
  136 + warnings(false),
  137 + creates_output(false),
  138 + out_stream(&std::cout),
  139 + err_stream(&std::cerr),
  140 + encryption_status(0)
  141 +{
  142 +}
  143 +
  144 +QPDFJob::QPDFJob() :
  145 + password(0),
  146 + linearize(false),
  147 + decrypt(false),
  148 + split_pages(0),
  149 + verbose(false),
  150 + progress(false),
  151 + suppress_warnings(false),
  152 + copy_encryption(false),
  153 + encryption_file(0),
  154 + encryption_file_password(0),
  155 + encrypt(false),
  156 + password_is_hex_key(false),
  157 + suppress_password_recovery(false),
  158 + password_mode(pm_auto),
  159 + allow_insecure(false),
  160 + allow_weak_crypto(false),
  161 + keylen(0),
  162 + r2_print(true),
  163 + r2_modify(true),
  164 + r2_extract(true),
  165 + r2_annotate(true),
  166 + r3_accessibility(true),
  167 + r3_extract(true),
  168 + r3_assemble(true),
  169 + r3_annotate_and_form(true),
  170 + r3_form_filling(true),
  171 + r3_modify_other(true),
  172 + r3_print(qpdf_r3p_full),
  173 + force_V4(false),
  174 + force_R5(false),
  175 + cleartext_metadata(false),
  176 + use_aes(false),
  177 + stream_data_set(false),
  178 + stream_data_mode(qpdf_s_compress),
  179 + compress_streams(true),
  180 + compress_streams_set(false),
  181 + recompress_flate(false),
  182 + recompress_flate_set(false),
  183 + compression_level(-1),
  184 + decode_level(qpdf_dl_generalized),
  185 + decode_level_set(false),
  186 + normalize_set(false),
  187 + normalize(false),
  188 + suppress_recovery(false),
  189 + object_stream_set(false),
  190 + object_stream_mode(qpdf_o_preserve),
  191 + ignore_xref_streams(false),
  192 + qdf_mode(false),
  193 + preserve_unreferenced_objects(false),
  194 + remove_unreferenced_page_resources(re_auto),
  195 + keep_files_open(true),
  196 + keep_files_open_set(false),
  197 + keep_files_open_threshold(200), // default known in help and docs
  198 + newline_before_endstream(false),
  199 + coalesce_contents(false),
  200 + flatten_annotations(false),
  201 + flatten_annotations_required(0),
  202 + flatten_annotations_forbidden(an_invisible | an_hidden),
  203 + generate_appearances(false),
  204 + show_npages(false),
  205 + deterministic_id(false),
  206 + static_id(false),
  207 + static_aes_iv(false),
  208 + suppress_original_object_id(false),
  209 + show_encryption(false),
  210 + show_encryption_key(false),
  211 + check_linearization(false),
  212 + show_linearization(false),
  213 + show_xref(false),
  214 + show_trailer(false),
  215 + show_obj(0),
  216 + show_gen(0),
  217 + show_raw_stream_data(false),
  218 + show_filtered_stream_data(false),
  219 + show_pages(false),
  220 + show_page_images(false),
  221 + collate(0),
  222 + flatten_rotation(false),
  223 + list_attachments(false),
  224 + json(false),
  225 + check(false),
  226 + optimize_images(false),
  227 + externalize_inline_images(false),
  228 + keep_inline_images(false),
  229 + remove_page_labels(false),
  230 + oi_min_width(128), // Default values for these
  231 + oi_min_height(128), // oi flags are in --help
  232 + oi_min_area(16384), // and in the manual.
  233 + ii_min_bytes(1024), //
  234 + underlay("underlay"),
  235 + overlay("overlay"),
  236 + under_overlay(0),
  237 + require_outfile(true),
  238 + replace_input(false),
  239 + check_is_encrypted(false),
  240 + check_requires_password(false),
  241 + infilename(0),
  242 + outfilename(0),
  243 + m(new Members())
  244 +{
  245 +}
  246 +
  247 +void
  248 +QPDFJob::setOutputStreams(std::ostream* out, std::ostream* err)
  249 +{
  250 + this->m->out_stream = out ? out : &std::cout;
  251 + this->m->err_stream = err ? err : &std::cerr;
  252 +}
  253 +
  254 +static void parse_version(std::string const& full_version_string,
  255 + std::string& version, int& extension_level)
  256 +{
  257 + PointerHolder<char> vp(true, QUtil::copy_string(full_version_string));
  258 + char* v = vp.getPointer();
  259 + char* p1 = strchr(v, '.');
  260 + char* p2 = (p1 ? strchr(1 + p1, '.') : 0);
  261 + if (p2 && *(p2 + 1))
  262 + {
  263 + *p2++ = '\0';
  264 + extension_level = QUtil::string_to_int(p2);
  265 + }
  266 + version = v;
  267 +}
  268 +
  269 +static void set_qpdf_options(QPDF& pdf, QPDFJob& o)
  270 +{
  271 + if (o.ignore_xref_streams)
  272 + {
  273 + pdf.setIgnoreXRefStreams(true);
  274 + }
  275 + if (o.suppress_recovery)
  276 + {
  277 + pdf.setAttemptRecovery(false);
  278 + }
  279 + if (o.password_is_hex_key)
  280 + {
  281 + pdf.setPasswordIsHexKey(true);
  282 + }
  283 + if (o.suppress_warnings)
  284 + {
  285 + pdf.setSuppressWarnings(true);
  286 + }
  287 +}
  288 +
  289 +static std::string show_bool(bool v)
  290 +{
  291 + return v ? "allowed" : "not allowed";
  292 +}
  293 +
  294 +static std::string show_encryption_method(QPDF::encryption_method_e method)
  295 +{
  296 + std::string result = "unknown";
  297 + switch (method)
  298 + {
  299 + case QPDF::e_none:
  300 + result = "none";
  301 + break;
  302 + case QPDF::e_unknown:
  303 + result = "unknown";
  304 + break;
  305 + case QPDF::e_rc4:
  306 + result = "RC4";
  307 + break;
  308 + case QPDF::e_aes:
  309 + result = "AESv2";
  310 + break;
  311 + case QPDF::e_aesv3:
  312 + result = "AESv3";
  313 + break;
  314 + // no default so gcc will warn for missing case
  315 + }
  316 + return result;
  317 +}
  318 +
  319 +static void show_encryption(QPDF& pdf, QPDFJob& o)
  320 +{
  321 + // Extract /P from /Encrypt
  322 + int R = 0;
  323 + int P = 0;
  324 + int V = 0;
  325 + QPDF::encryption_method_e stream_method = QPDF::e_unknown;
  326 + QPDF::encryption_method_e string_method = QPDF::e_unknown;
  327 + QPDF::encryption_method_e file_method = QPDF::e_unknown;
  328 + if (! pdf.isEncrypted(R, P, V,
  329 + stream_method, string_method, file_method))
  330 + {
  331 + std::cout << "File is not encrypted" << std::endl;
  332 + }
  333 + else
  334 + {
  335 + std::cout << "R = " << R << std::endl;
  336 + std::cout << "P = " << P << std::endl;
  337 + std::string user_password = pdf.getTrimmedUserPassword();
  338 + std::string encryption_key = pdf.getEncryptionKey();
  339 + std::cout << "User password = " << user_password << std::endl;
  340 + if (o.show_encryption_key)
  341 + {
  342 + std::cout << "Encryption key = "
  343 + << QUtil::hex_encode(encryption_key) << std::endl;
  344 + }
  345 + if (pdf.ownerPasswordMatched())
  346 + {
  347 + std::cout << "Supplied password is owner password" << std::endl;
  348 + }
  349 + if (pdf.userPasswordMatched())
  350 + {
  351 + std::cout << "Supplied password is user password" << std::endl;
  352 + }
  353 + std::cout << "extract for accessibility: "
  354 + << show_bool(pdf.allowAccessibility()) << std::endl
  355 + << "extract for any purpose: "
  356 + << show_bool(pdf.allowExtractAll()) << std::endl
  357 + << "print low resolution: "
  358 + << show_bool(pdf.allowPrintLowRes()) << std::endl
  359 + << "print high resolution: "
  360 + << show_bool(pdf.allowPrintHighRes()) << std::endl
  361 + << "modify document assembly: "
  362 + << show_bool(pdf.allowModifyAssembly()) << std::endl
  363 + << "modify forms: "
  364 + << show_bool(pdf.allowModifyForm()) << std::endl
  365 + << "modify annotations: "
  366 + << show_bool(pdf.allowModifyAnnotation()) << std::endl
  367 + << "modify other: "
  368 + << show_bool(pdf.allowModifyOther()) << std::endl
  369 + << "modify anything: "
  370 + << show_bool(pdf.allowModifyAll()) << std::endl;
  371 + if (V >= 4)
  372 + {
  373 + std::cout << "stream encryption method: "
  374 + << show_encryption_method(stream_method) << std::endl
  375 + << "string encryption method: "
  376 + << show_encryption_method(string_method) << std::endl
  377 + << "file encryption method: "
  378 + << show_encryption_method(file_method) << std::endl;
  379 + }
  380 + }
  381 +}
  382 +
  383 +static void do_check(QPDF& pdf, QPDFJob& o, int& exit_code)
  384 +{
  385 + // Code below may set okay to false but not to true.
  386 + // We assume okay until we prove otherwise but may
  387 + // continue to perform additional checks after finding
  388 + // errors.
  389 + bool okay = true;
  390 + bool warnings = false;
  391 + std::cout << "checking " << o.infilename << std::endl;
  392 + try
  393 + {
  394 + int extension_level = pdf.getExtensionLevel();
  395 + std::cout << "PDF Version: " << pdf.getPDFVersion();
  396 + if (extension_level > 0)
  397 + {
  398 + std::cout << " extension level "
  399 + << pdf.getExtensionLevel();
  400 + }
  401 + std::cout << std::endl;
  402 + show_encryption(pdf, o);
  403 + if (pdf.isLinearized())
  404 + {
  405 + std::cout << "File is linearized\n";
  406 + // any errors or warnings are reported by
  407 + // checkLinearization(). We treat all issues reported here
  408 + // as warnings.
  409 + if (! pdf.checkLinearization())
  410 + {
  411 + warnings = true;
  412 + }
  413 + }
  414 + else
  415 + {
  416 + std::cout << "File is not linearized\n";
  417 + }
  418 +
  419 + // Write the file no nowhere, uncompressing
  420 + // streams. This causes full file traversal and
  421 + // decoding of all streams we can decode.
  422 + QPDFWriter w(pdf);
  423 + Pl_Discard discard;
  424 + w.setOutputPipeline(&discard);
  425 + w.setDecodeLevel(qpdf_dl_all);
  426 + w.write();
  427 +
  428 + // Parse all content streams
  429 + QPDFPageDocumentHelper dh(pdf);
  430 + std::vector<QPDFPageObjectHelper> pages = dh.getAllPages();
  431 + DiscardContents discard_contents;
  432 + int pageno = 0;
  433 + for (std::vector<QPDFPageObjectHelper>::iterator iter =
  434 + pages.begin();
  435 + iter != pages.end(); ++iter)
  436 + {
  437 + QPDFPageObjectHelper& page(*iter);
  438 + ++pageno;
  439 + try
  440 + {
  441 + page.parseContents(&discard_contents);
  442 + }
  443 + catch (QPDFExc& e)
  444 + {
  445 + okay = false;
  446 + std::cerr << "ERROR: page " << pageno << ": "
  447 + << e.what() << std::endl;
  448 + }
  449 + }
  450 + }
  451 + catch (std::exception& e)
  452 + {
  453 + std::cerr << "ERROR: " << e.what() << std::endl;
  454 + okay = false;
  455 + }
  456 + if (okay)
  457 + {
  458 + if ((! pdf.getWarnings().empty()) || warnings)
  459 + {
  460 + exit_code = EXIT_WARNING;
  461 + }
  462 + else
  463 + {
  464 + std::cout << "No syntax or stream encoding errors"
  465 + << " found; the file may still contain"
  466 + << std::endl
  467 + << "errors that qpdf cannot detect"
  468 + << std::endl;
  469 + }
  470 + }
  471 + else
  472 + {
  473 + exit_code = EXIT_ERROR;
  474 + }
  475 +}
  476 +
  477 +static void do_show_obj(QPDF& pdf, QPDFJob& o, int& exit_code)
  478 +{
  479 + QPDFObjectHandle obj;
  480 + if (o.show_trailer)
  481 + {
  482 + obj = pdf.getTrailer();
  483 + }
  484 + else
  485 + {
  486 + obj = pdf.getObjectByID(o.show_obj, o.show_gen);
  487 + }
  488 + if (obj.isStream())
  489 + {
  490 + if (o.show_raw_stream_data || o.show_filtered_stream_data)
  491 + {
  492 + bool filter = o.show_filtered_stream_data;
  493 + if (filter &&
  494 + (! obj.pipeStreamData(0, 0, qpdf_dl_all)))
  495 + {
  496 + QTC::TC("qpdf", "qpdf unable to filter");
  497 + std::cerr << "Unable to filter stream data."
  498 + << std::endl;
  499 + exit_code = EXIT_ERROR;
  500 + }
  501 + else
  502 + {
  503 + QUtil::binary_stdout();
  504 + Pl_StdioFile out("stdout", stdout);
  505 + obj.pipeStreamData(
  506 + &out,
  507 + (filter && o.normalize) ? qpdf_ef_normalize : 0,
  508 + filter ? qpdf_dl_all : qpdf_dl_none);
  509 + }
  510 + }
  511 + else
  512 + {
  513 + std::cout
  514 + << "Object is stream. Dictionary:" << std::endl
  515 + << obj.getDict().unparseResolved() << std::endl;
  516 + }
  517 + }
  518 + else
  519 + {
  520 + std::cout << obj.unparseResolved() << std::endl;
  521 + }
  522 +}
  523 +
  524 +static void do_show_pages(QPDF& pdf, QPDFJob& o)
  525 +{
  526 + QPDFPageDocumentHelper dh(pdf);
  527 + std::vector<QPDFPageObjectHelper> pages = dh.getAllPages();
  528 + int pageno = 0;
  529 + for (std::vector<QPDFPageObjectHelper>::iterator iter = pages.begin();
  530 + iter != pages.end(); ++iter)
  531 + {
  532 + QPDFPageObjectHelper& ph(*iter);
  533 + QPDFObjectHandle page = ph.getObjectHandle();
  534 + ++pageno;
  535 +
  536 + std::cout << "page " << pageno << ": "
  537 + << page.getObjectID() << " "
  538 + << page.getGeneration() << " R" << std::endl;
  539 + if (o.show_page_images)
  540 + {
  541 + std::map<std::string, QPDFObjectHandle> images = ph.getImages();
  542 + if (! images.empty())
  543 + {
  544 + std::cout << " images:" << std::endl;
  545 + for (auto const& iter2: images)
  546 + {
  547 + std::string const& name = iter2.first;
  548 + QPDFObjectHandle image = iter2.second;
  549 + QPDFObjectHandle dict = image.getDict();
  550 + int width =
  551 + dict.getKey("/Width").getIntValueAsInt();
  552 + int height =
  553 + dict.getKey("/Height").getIntValueAsInt();
  554 + std::cout << " " << name << ": "
  555 + << image.unparse()
  556 + << ", " << width << " x " << height
  557 + << std::endl;
  558 + }
  559 + }
  560 + }
  561 +
  562 + std::cout << " content:" << std::endl;
  563 + std::vector<QPDFObjectHandle> content =
  564 + ph.getPageContents();
  565 + for (auto& iter2: content)
  566 + {
  567 + std::cout << " " << iter2.unparse() << std::endl;
  568 + }
  569 + }
  570 +}
  571 +
  572 +static void do_list_attachments(QPDF& pdf, QPDFJob& o)
  573 +{
  574 + QPDFEmbeddedFileDocumentHelper efdh(pdf);
  575 + if (efdh.hasEmbeddedFiles())
  576 + {
  577 + for (auto const& i: efdh.getEmbeddedFiles())
  578 + {
  579 + std::string const& key = i.first;
  580 + auto efoh = i.second;
  581 + std::cout << key << " -> "
  582 + << efoh->getEmbeddedFileStream().getObjGen()
  583 + << std::endl;
  584 + if (o.verbose)
  585 + {
  586 + auto desc = efoh->getDescription();
  587 + if (! desc.empty())
  588 + {
  589 + std::cout << " description: " << desc << std::endl;
  590 + }
  591 + std::cout << " preferred name: " << efoh->getFilename()
  592 + << std::endl;
  593 + std::cout << " all names:" << std::endl;
  594 + for (auto const& i2: efoh->getFilenames())
  595 + {
  596 + std::cout << " " << i2.first << " -> " << i2.second
  597 + << std::endl;
  598 + }
  599 + std::cout << " all data streams:" << std::endl;
  600 + for (auto i2: efoh->getEmbeddedFileStreams().ditems())
  601 + {
  602 + std::cout << " " << i2.first << " -> "
  603 + << i2.second.getObjGen()
  604 + << std::endl;
  605 + }
  606 + }
  607 + }
  608 + }
  609 + else
  610 + {
  611 + std::cout << o.infilename << " has no embedded files" << std::endl;
  612 + }
  613 +}
  614 +
  615 +static void do_show_attachment(QPDF& pdf, QPDFJob& o, int& exit_code)
  616 +{
  617 + QPDFEmbeddedFileDocumentHelper efdh(pdf);
  618 + auto fs = efdh.getEmbeddedFile(o.attachment_to_show);
  619 + if (! fs)
  620 + {
  621 + std::cerr << whoami << ": attachment " << o.attachment_to_show
  622 + << " not found" << std::endl;
  623 + exit_code = EXIT_ERROR;
  624 + return;
  625 + }
  626 + auto efs = fs->getEmbeddedFileStream();
  627 + QUtil::binary_stdout();
  628 + Pl_StdioFile out("stdout", stdout);
  629 + efs.pipeStreamData(&out, 0, qpdf_dl_all);
  630 +}
  631 +
  632 +void
  633 +QPDFJob::parse_object_id(std::string const& objspec,
  634 + bool& trailer, int& obj, int& gen)
  635 +{
  636 + if (objspec == "trailer")
  637 + {
  638 + trailer = true;
  639 + }
  640 + else
  641 + {
  642 + trailer = false;
  643 + obj = QUtil::string_to_int(objspec.c_str());
  644 + size_t comma = objspec.find(',');
  645 + if ((comma != std::string::npos) && (comma + 1 < objspec.length()))
  646 + {
  647 + gen = QUtil::string_to_int(
  648 + objspec.substr(1 + comma, std::string::npos).c_str());
  649 + }
  650 + }
  651 +}
  652 +
  653 +static std::set<QPDFObjGen>
  654 +get_wanted_json_objects(QPDFJob& o)
  655 +{
  656 + std::set<QPDFObjGen> wanted_og;
  657 + for (auto const& iter: o.json_objects)
  658 + {
  659 + bool trailer;
  660 + int obj = 0;
  661 + int gen = 0;
  662 + QPDFJob::parse_object_id(iter, trailer, obj, gen);
  663 + if (obj)
  664 + {
  665 + wanted_og.insert(QPDFObjGen(obj, gen));
  666 + }
  667 + }
  668 + return wanted_og;
  669 +}
  670 +
  671 +static void do_json_objects(QPDF& pdf, QPDFJob& o, JSON& j)
  672 +{
  673 + // Add all objects. Do this first before other code below modifies
  674 + // things by doing stuff like calling
  675 + // pushInheritedAttributesToPage.
  676 + bool all_objects = o.json_objects.empty();
  677 + std::set<QPDFObjGen> wanted_og = get_wanted_json_objects(o);
  678 + JSON j_objects = j.addDictionaryMember("objects", JSON::makeDictionary());
  679 + if (all_objects || o.json_objects.count("trailer"))
  680 + {
  681 + j_objects.addDictionaryMember(
  682 + "trailer", pdf.getTrailer().getJSON(true));
  683 + }
  684 + std::vector<QPDFObjectHandle> objects = pdf.getAllObjects();
  685 + for (std::vector<QPDFObjectHandle>::iterator iter = objects.begin();
  686 + iter != objects.end(); ++iter)
  687 + {
  688 + if (all_objects || wanted_og.count((*iter).getObjGen()))
  689 + {
  690 + j_objects.addDictionaryMember(
  691 + (*iter).unparse(), (*iter).getJSON(true));
  692 + }
  693 + }
  694 +}
  695 +
  696 +static void do_json_objectinfo(QPDF& pdf, QPDFJob& o, JSON& j)
  697 +{
  698 + // Do this first before other code below modifies things by doing
  699 + // stuff like calling pushInheritedAttributesToPage.
  700 + bool all_objects = o.json_objects.empty();
  701 + std::set<QPDFObjGen> wanted_og = get_wanted_json_objects(o);
  702 + JSON j_objectinfo = j.addDictionaryMember(
  703 + "objectinfo", JSON::makeDictionary());
  704 + for (auto& obj: pdf.getAllObjects())
  705 + {
  706 + if (all_objects || wanted_og.count(obj.getObjGen()))
  707 + {
  708 + auto j_details = j_objectinfo.addDictionaryMember(
  709 + obj.unparse(), JSON::makeDictionary());
  710 + auto j_stream = j_details.addDictionaryMember(
  711 + "stream", JSON::makeDictionary());
  712 + bool is_stream = obj.isStream();
  713 + j_stream.addDictionaryMember(
  714 + "is", JSON::makeBool(is_stream));
  715 + j_stream.addDictionaryMember(
  716 + "length",
  717 + (is_stream
  718 + ? obj.getDict().getKey("/Length").getJSON(true)
  719 + : JSON::makeNull()));
  720 + j_stream.addDictionaryMember(
  721 + "filter",
  722 + (is_stream
  723 + ? obj.getDict().getKey("/Filter").getJSON(true)
  724 + : JSON::makeNull()));
  725 + }
  726 + }
  727 +}
  728 +
  729 +static void do_json_pages(QPDF& pdf, QPDFJob& o, JSON& j)
  730 +{
  731 + JSON j_pages = j.addDictionaryMember("pages", JSON::makeArray());
  732 + QPDFPageDocumentHelper pdh(pdf);
  733 + QPDFPageLabelDocumentHelper pldh(pdf);
  734 + QPDFOutlineDocumentHelper odh(pdf);
  735 + pdh.pushInheritedAttributesToPage();
  736 + std::vector<QPDFPageObjectHelper> pages = pdh.getAllPages();
  737 + int pageno = 0;
  738 + for (std::vector<QPDFPageObjectHelper>::iterator iter = pages.begin();
  739 + iter != pages.end(); ++iter, ++pageno)
  740 + {
  741 + JSON j_page = j_pages.addArrayElement(JSON::makeDictionary());
  742 + QPDFPageObjectHelper& ph(*iter);
  743 + QPDFObjectHandle page = ph.getObjectHandle();
  744 + j_page.addDictionaryMember("object", page.getJSON());
  745 + JSON j_images = j_page.addDictionaryMember(
  746 + "images", JSON::makeArray());
  747 + std::map<std::string, QPDFObjectHandle> images = ph.getImages();
  748 + for (auto const& iter2: images)
  749 + {
  750 + JSON j_image = j_images.addArrayElement(JSON::makeDictionary());
  751 + j_image.addDictionaryMember(
  752 + "name", JSON::makeString(iter2.first));
  753 + QPDFObjectHandle image = iter2.second;
  754 + QPDFObjectHandle dict = image.getDict();
  755 + j_image.addDictionaryMember("object", image.getJSON());
  756 + j_image.addDictionaryMember(
  757 + "width", dict.getKey("/Width").getJSON());
  758 + j_image.addDictionaryMember(
  759 + "height", dict.getKey("/Height").getJSON());
  760 + j_image.addDictionaryMember(
  761 + "colorspace", dict.getKey("/ColorSpace").getJSON());
  762 + j_image.addDictionaryMember(
  763 + "bitspercomponent", dict.getKey("/BitsPerComponent").getJSON());
  764 + QPDFObjectHandle filters = dict.getKey("/Filter").wrapInArray();
  765 + j_image.addDictionaryMember(
  766 + "filter", filters.getJSON());
  767 + QPDFObjectHandle decode_parms = dict.getKey("/DecodeParms");
  768 + QPDFObjectHandle dp_array;
  769 + if (decode_parms.isArray())
  770 + {
  771 + dp_array = decode_parms;
  772 + }
  773 + else
  774 + {
  775 + dp_array = QPDFObjectHandle::newArray();
  776 + for (int i = 0; i < filters.getArrayNItems(); ++i)
  777 + {
  778 + dp_array.appendItem(decode_parms);
  779 + }
  780 + }
  781 + j_image.addDictionaryMember("decodeparms", dp_array.getJSON());
  782 + j_image.addDictionaryMember(
  783 + "filterable",
  784 + JSON::makeBool(
  785 + image.pipeStreamData(0, 0, o.decode_level, true)));
  786 + }
  787 + j_page.addDictionaryMember("images", j_images);
  788 + JSON j_contents = j_page.addDictionaryMember(
  789 + "contents", JSON::makeArray());
  790 + std::vector<QPDFObjectHandle> content = ph.getPageContents();
  791 + for (auto& iter2: content)
  792 + {
  793 + j_contents.addArrayElement(iter2.getJSON());
  794 + }
  795 + j_page.addDictionaryMember(
  796 + "label", pldh.getLabelForPage(pageno).getJSON());
  797 + JSON j_outlines = j_page.addDictionaryMember(
  798 + "outlines", JSON::makeArray());
  799 + std::vector<QPDFOutlineObjectHelper> outlines =
  800 + odh.getOutlinesForPage(page.getObjGen());
  801 + for (std::vector<QPDFOutlineObjectHelper>::iterator oiter =
  802 + outlines.begin();
  803 + oiter != outlines.end(); ++oiter)
  804 + {
  805 + JSON j_outline = j_outlines.addArrayElement(JSON::makeDictionary());
  806 + j_outline.addDictionaryMember(
  807 + "object", (*oiter).getObjectHandle().getJSON());
  808 + j_outline.addDictionaryMember(
  809 + "title", JSON::makeString((*oiter).getTitle()));
  810 + j_outline.addDictionaryMember(
  811 + "dest", (*oiter).getDest().getJSON(true));
  812 + }
  813 + j_page.addDictionaryMember("pageposfrom1", JSON::makeInt(1 + pageno));
  814 + }
  815 +}
  816 +
  817 +static void do_json_page_labels(QPDF& pdf, QPDFJob& o, JSON& j)
  818 +{
  819 + JSON j_labels = j.addDictionaryMember("pagelabels", JSON::makeArray());
  820 + QPDFPageLabelDocumentHelper pldh(pdf);
  821 + QPDFPageDocumentHelper pdh(pdf);
  822 + std::vector<QPDFPageObjectHelper> pages = pdh.getAllPages();
  823 + if (pldh.hasPageLabels())
  824 + {
  825 + std::vector<QPDFObjectHandle> labels;
  826 + pldh.getLabelsForPageRange(
  827 + 0, QIntC::to_int(pages.size()) - 1, 0, labels);
  828 + for (std::vector<QPDFObjectHandle>::iterator iter = labels.begin();
  829 + iter != labels.end(); ++iter)
  830 + {
  831 + std::vector<QPDFObjectHandle>::iterator next = iter;
  832 + ++next;
  833 + if (next == labels.end())
  834 + {
  835 + // This can't happen, so ignore it. This could only
  836 + // happen if getLabelsForPageRange somehow returned an
  837 + // odd number of items.
  838 + break;
  839 + }
  840 + JSON j_label = j_labels.addArrayElement(JSON::makeDictionary());
  841 + j_label.addDictionaryMember("index", (*iter).getJSON());
  842 + ++iter;
  843 + j_label.addDictionaryMember("label", (*iter).getJSON());
  844 + }
  845 + }
  846 +}
  847 +
  848 +static void add_outlines_to_json(
  849 + std::vector<QPDFOutlineObjectHelper> outlines, JSON& j,
  850 + std::map<QPDFObjGen, int>& page_numbers)
  851 +{
  852 + for (std::vector<QPDFOutlineObjectHelper>::iterator iter = outlines.begin();
  853 + iter != outlines.end(); ++iter)
  854 + {
  855 + QPDFOutlineObjectHelper& ol = *iter;
  856 + JSON jo = j.addArrayElement(JSON::makeDictionary());
  857 + jo.addDictionaryMember("object", ol.getObjectHandle().getJSON());
  858 + jo.addDictionaryMember("title", JSON::makeString(ol.getTitle()));
  859 + jo.addDictionaryMember("dest", ol.getDest().getJSON(true));
  860 + jo.addDictionaryMember("open", JSON::makeBool(ol.getCount() >= 0));
  861 + QPDFObjectHandle page = ol.getDestPage();
  862 + JSON j_destpage = JSON::makeNull();
  863 + if (page.isIndirect())
  864 + {
  865 + QPDFObjGen og = page.getObjGen();
  866 + if (page_numbers.count(og))
  867 + {
  868 + j_destpage = JSON::makeInt(page_numbers[og]);
  869 + }
  870 + }
  871 + jo.addDictionaryMember("destpageposfrom1", j_destpage);
  872 + JSON j_kids = jo.addDictionaryMember("kids", JSON::makeArray());
  873 + add_outlines_to_json(ol.getKids(), j_kids, page_numbers);
  874 + }
  875 +}
  876 +
  877 +static void do_json_outlines(QPDF& pdf, QPDFJob& o, JSON& j)
  878 +{
  879 + std::map<QPDFObjGen, int> page_numbers;
  880 + QPDFPageDocumentHelper dh(pdf);
  881 + std::vector<QPDFPageObjectHelper> pages = dh.getAllPages();
  882 + int n = 0;
  883 + for (std::vector<QPDFPageObjectHelper>::iterator iter = pages.begin();
  884 + iter != pages.end(); ++iter)
  885 + {
  886 + QPDFObjectHandle oh = (*iter).getObjectHandle();
  887 + page_numbers[oh.getObjGen()] = ++n;
  888 + }
  889 +
  890 + JSON j_outlines = j.addDictionaryMember(
  891 + "outlines", JSON::makeArray());
  892 + QPDFOutlineDocumentHelper odh(pdf);
  893 + add_outlines_to_json(odh.getTopLevelOutlines(), j_outlines, page_numbers);
  894 +}
  895 +
  896 +static void do_json_acroform(QPDF& pdf, QPDFJob& o, JSON& j)
  897 +{
  898 + JSON j_acroform = j.addDictionaryMember(
  899 + "acroform", JSON::makeDictionary());
  900 + QPDFAcroFormDocumentHelper afdh(pdf);
  901 + j_acroform.addDictionaryMember(
  902 + "hasacroform",
  903 + JSON::makeBool(afdh.hasAcroForm()));
  904 + j_acroform.addDictionaryMember(
  905 + "needappearances",
  906 + JSON::makeBool(afdh.getNeedAppearances()));
  907 + JSON j_fields = j_acroform.addDictionaryMember(
  908 + "fields", JSON::makeArray());
  909 + QPDFPageDocumentHelper pdh(pdf);
  910 + std::vector<QPDFPageObjectHelper> pages = pdh.getAllPages();
  911 + int pagepos1 = 0;
  912 + for (std::vector<QPDFPageObjectHelper>::iterator page_iter =
  913 + pages.begin();
  914 + page_iter != pages.end(); ++page_iter)
  915 + {
  916 + ++pagepos1;
  917 + std::vector<QPDFAnnotationObjectHelper> annotations =
  918 + afdh.getWidgetAnnotationsForPage(*page_iter);
  919 + for (std::vector<QPDFAnnotationObjectHelper>::iterator annot_iter =
  920 + annotations.begin();
  921 + annot_iter != annotations.end(); ++annot_iter)
  922 + {
  923 + QPDFAnnotationObjectHelper& aoh = *annot_iter;
  924 + QPDFFormFieldObjectHelper ffh =
  925 + afdh.getFieldForAnnotation(aoh);
  926 + JSON j_field = j_fields.addArrayElement(
  927 + JSON::makeDictionary());
  928 + j_field.addDictionaryMember(
  929 + "object",
  930 + ffh.getObjectHandle().getJSON());
  931 + j_field.addDictionaryMember(
  932 + "parent",
  933 + ffh.getObjectHandle().getKey("/Parent").getJSON());
  934 + j_field.addDictionaryMember(
  935 + "pageposfrom1",
  936 + JSON::makeInt(pagepos1));
  937 + j_field.addDictionaryMember(
  938 + "fieldtype",
  939 + JSON::makeString(ffh.getFieldType()));
  940 + j_field.addDictionaryMember(
  941 + "fieldflags",
  942 + JSON::makeInt(ffh.getFlags()));
  943 + j_field.addDictionaryMember(
  944 + "fullname",
  945 + JSON::makeString(ffh.getFullyQualifiedName()));
  946 + j_field.addDictionaryMember(
  947 + "partialname",
  948 + JSON::makeString(ffh.getPartialName()));
  949 + j_field.addDictionaryMember(
  950 + "alternativename",
  951 + JSON::makeString(ffh.getAlternativeName()));
  952 + j_field.addDictionaryMember(
  953 + "mappingname",
  954 + JSON::makeString(ffh.getMappingName()));
  955 + j_field.addDictionaryMember(
  956 + "value",
  957 + ffh.getValue().getJSON());
  958 + j_field.addDictionaryMember(
  959 + "defaultvalue",
  960 + ffh.getDefaultValue().getJSON());
  961 + j_field.addDictionaryMember(
  962 + "quadding",
  963 + JSON::makeInt(ffh.getQuadding()));
  964 + j_field.addDictionaryMember(
  965 + "ischeckbox",
  966 + JSON::makeBool(ffh.isCheckbox()));
  967 + j_field.addDictionaryMember(
  968 + "isradiobutton",
  969 + JSON::makeBool(ffh.isRadioButton()));
  970 + j_field.addDictionaryMember(
  971 + "ischoice",
  972 + JSON::makeBool(ffh.isChoice()));
  973 + j_field.addDictionaryMember(
  974 + "istext",
  975 + JSON::makeBool(ffh.isText()));
  976 + JSON j_choices = j_field.addDictionaryMember(
  977 + "choices", JSON::makeArray());
  978 + std::vector<std::string> choices = ffh.getChoices();
  979 + for (std::vector<std::string>::iterator iter = choices.begin();
  980 + iter != choices.end(); ++iter)
  981 + {
  982 + j_choices.addArrayElement(JSON::makeString(*iter));
  983 + }
  984 + JSON j_annot = j_field.addDictionaryMember(
  985 + "annotation", JSON::makeDictionary());
  986 + j_annot.addDictionaryMember(
  987 + "object",
  988 + aoh.getObjectHandle().getJSON());
  989 + j_annot.addDictionaryMember(
  990 + "appearancestate",
  991 + JSON::makeString(aoh.getAppearanceState()));
  992 + j_annot.addDictionaryMember(
  993 + "annotationflags",
  994 + JSON::makeInt(aoh.getFlags()));
  995 + }
  996 + }
  997 +}
  998 +
  999 +static void do_json_encrypt(QPDF& pdf, QPDFJob& o, JSON& j)
  1000 +{
  1001 + int R = 0;
  1002 + int P = 0;
  1003 + int V = 0;
  1004 + QPDF::encryption_method_e stream_method = QPDF::e_none;
  1005 + QPDF::encryption_method_e string_method = QPDF::e_none;
  1006 + QPDF::encryption_method_e file_method = QPDF::e_none;
  1007 + bool is_encrypted = pdf.isEncrypted(
  1008 + R, P, V, stream_method, string_method, file_method);
  1009 + JSON j_encrypt = j.addDictionaryMember(
  1010 + "encrypt", JSON::makeDictionary());
  1011 + j_encrypt.addDictionaryMember(
  1012 + "encrypted",
  1013 + JSON::makeBool(is_encrypted));
  1014 + j_encrypt.addDictionaryMember(
  1015 + "userpasswordmatched",
  1016 + JSON::makeBool(is_encrypted && pdf.userPasswordMatched()));
  1017 + j_encrypt.addDictionaryMember(
  1018 + "ownerpasswordmatched",
  1019 + JSON::makeBool(is_encrypted && pdf.ownerPasswordMatched()));
  1020 + JSON j_capabilities = j_encrypt.addDictionaryMember(
  1021 + "capabilities", JSON::makeDictionary());
  1022 + j_capabilities.addDictionaryMember(
  1023 + "accessibility",
  1024 + JSON::makeBool(pdf.allowAccessibility()));
  1025 + j_capabilities.addDictionaryMember(
  1026 + "extract",
  1027 + JSON::makeBool(pdf.allowExtractAll()));
  1028 + j_capabilities.addDictionaryMember(
  1029 + "printlow",
  1030 + JSON::makeBool(pdf.allowPrintLowRes()));
  1031 + j_capabilities.addDictionaryMember(
  1032 + "printhigh",
  1033 + JSON::makeBool(pdf.allowPrintHighRes()));
  1034 + j_capabilities.addDictionaryMember(
  1035 + "modifyassembly",
  1036 + JSON::makeBool(pdf.allowModifyAssembly()));
  1037 + j_capabilities.addDictionaryMember(
  1038 + "modifyforms",
  1039 + JSON::makeBool(pdf.allowModifyForm()));
  1040 + j_capabilities.addDictionaryMember(
  1041 + "moddifyannotations",
  1042 + JSON::makeBool(pdf.allowModifyAnnotation()));
  1043 + j_capabilities.addDictionaryMember(
  1044 + "modifyother",
  1045 + JSON::makeBool(pdf.allowModifyOther()));
  1046 + j_capabilities.addDictionaryMember(
  1047 + "modify",
  1048 + JSON::makeBool(pdf.allowModifyAll()));
  1049 + JSON j_parameters = j_encrypt.addDictionaryMember(
  1050 + "parameters", JSON::makeDictionary());
  1051 + j_parameters.addDictionaryMember("R", JSON::makeInt(R));
  1052 + j_parameters.addDictionaryMember("V", JSON::makeInt(V));
  1053 + j_parameters.addDictionaryMember("P", JSON::makeInt(P));
  1054 + int bits = 0;
  1055 + JSON key = JSON::makeNull();
  1056 + if (is_encrypted)
  1057 + {
  1058 + std::string encryption_key = pdf.getEncryptionKey();
  1059 + bits = QIntC::to_int(encryption_key.length() * 8);
  1060 + if (o.show_encryption_key)
  1061 + {
  1062 + key = JSON::makeString(QUtil::hex_encode(encryption_key));
  1063 + }
  1064 + }
  1065 + j_parameters.addDictionaryMember("bits", JSON::makeInt(bits));
  1066 + j_parameters.addDictionaryMember("key", key);
  1067 + auto fix_method = [is_encrypted](QPDF::encryption_method_e& m) {
  1068 + if (is_encrypted && m == QPDF::e_none)
  1069 + {
  1070 + m = QPDF::e_rc4;
  1071 + }
  1072 + };
  1073 + fix_method(stream_method);
  1074 + fix_method(string_method);
  1075 + fix_method(file_method);
  1076 + std::string s_stream_method = show_encryption_method(stream_method);
  1077 + std::string s_string_method = show_encryption_method(string_method);
  1078 + std::string s_file_method = show_encryption_method(file_method);
  1079 + std::string s_overall_method;
  1080 + if ((stream_method == string_method) &&
  1081 + (stream_method == file_method))
  1082 + {
  1083 + s_overall_method = s_stream_method;
  1084 + }
  1085 + else
  1086 + {
  1087 + s_overall_method = "mixed";
  1088 + }
  1089 + j_parameters.addDictionaryMember(
  1090 + "method", JSON::makeString(s_overall_method));
  1091 + j_parameters.addDictionaryMember(
  1092 + "streammethod", JSON::makeString(s_stream_method));
  1093 + j_parameters.addDictionaryMember(
  1094 + "stringmethod", JSON::makeString(s_string_method));
  1095 + j_parameters.addDictionaryMember(
  1096 + "filemethod", JSON::makeString(s_file_method));
  1097 +}
  1098 +
  1099 +static void do_json_attachments(QPDF& pdf, QPDFJob& o, JSON& j)
  1100 +{
  1101 + JSON j_attachments = j.addDictionaryMember(
  1102 + "attachments", JSON::makeDictionary());
  1103 + QPDFEmbeddedFileDocumentHelper efdh(pdf);
  1104 + for (auto const& iter: efdh.getEmbeddedFiles())
  1105 + {
  1106 + std::string const& key = iter.first;
  1107 + auto fsoh = iter.second;
  1108 + auto j_details = j_attachments.addDictionaryMember(
  1109 + key, JSON::makeDictionary());
  1110 + j_details.addDictionaryMember(
  1111 + "filespec",
  1112 + JSON::makeString(fsoh->getObjectHandle().unparse()));
  1113 + j_details.addDictionaryMember(
  1114 + "preferredname", JSON::makeString(fsoh->getFilename()));
  1115 + j_details.addDictionaryMember(
  1116 + "preferredcontents",
  1117 + JSON::makeString(fsoh->getEmbeddedFileStream().unparse()));
  1118 + }
  1119 +}
  1120 +
  1121 +JSON
  1122 +QPDFJob::json_schema(std::set<std::string>* keys)
  1123 +{
  1124 + // Style: use all lower-case keys with no dashes or underscores.
  1125 + // Choose array or dictionary based on indexing. For example, we
  1126 + // use a dictionary for objects because we want to index by object
  1127 + // ID and an array for pages because we want to index by position.
  1128 + // The pages in the pages array contain references back to the
  1129 + // original object, which can be resolved in the objects
  1130 + // dictionary. When a PDF construct that maps back to an original
  1131 + // object is represented separately, use "object" as the key that
  1132 + // references the original object.
  1133 +
  1134 + // This JSON object doubles as a schema and as documentation for
  1135 + // our JSON output. Any schema mismatch is a bug in qpdf. This
  1136 + // helps to enforce our policy of consistently providing a known
  1137 + // structure where every documented key will always be present,
  1138 + // which makes it easier to consume our JSON. This is discussed in
  1139 + // more depth in the manual.
  1140 + JSON schema = JSON::makeDictionary();
  1141 + schema.addDictionaryMember(
  1142 + "version", JSON::makeString(
  1143 + "JSON format serial number; increased for non-compatible changes"));
  1144 + JSON j_params = schema.addDictionaryMember(
  1145 + "parameters", JSON::makeDictionary());
  1146 + j_params.addDictionaryMember(
  1147 + "decodelevel", JSON::makeString(
  1148 + "decode level used to determine stream filterability"));
  1149 +
  1150 + bool all_keys = ((keys == 0) || keys->empty());
  1151 +
  1152 + // The list of selectable top-level keys id duplicated in three
  1153 + // places: json_schema, do_json, and initOptionTable.
  1154 + if (all_keys || keys->count("objects"))
  1155 + {
  1156 + schema.addDictionaryMember(
  1157 + "objects", JSON::makeString(
  1158 + "dictionary of original objects;"
  1159 + " keys are 'trailer' or 'n n R'"));
  1160 + }
  1161 + if (all_keys || keys->count("objectinfo"))
  1162 + {
  1163 + JSON objectinfo = schema.addDictionaryMember(
  1164 + "objectinfo", JSON::makeDictionary());
  1165 + JSON details = objectinfo.addDictionaryMember(
  1166 + "<object-id>", JSON::makeDictionary());
  1167 + JSON stream = details.addDictionaryMember(
  1168 + "stream", JSON::makeDictionary());
  1169 + stream.addDictionaryMember(
  1170 + "is",
  1171 + JSON::makeString("whether the object is a stream"));
  1172 + stream.addDictionaryMember(
  1173 + "length",
  1174 + JSON::makeString("if stream, its length, otherwise null"));
  1175 + stream.addDictionaryMember(
  1176 + "filter",
  1177 + JSON::makeString("if stream, its filters, otherwise null"));
  1178 + }
  1179 + if (all_keys || keys->count("pages"))
  1180 + {
  1181 + JSON page = schema.addDictionaryMember("pages", JSON::makeArray()).
  1182 + addArrayElement(JSON::makeDictionary());
  1183 + page.addDictionaryMember(
  1184 + "object",
  1185 + JSON::makeString("reference to original page object"));
  1186 + JSON image = page.addDictionaryMember("images", JSON::makeArray()).
  1187 + addArrayElement(JSON::makeDictionary());
  1188 + image.addDictionaryMember(
  1189 + "name",
  1190 + JSON::makeString("name of image in XObject table"));
  1191 + image.addDictionaryMember(
  1192 + "object",
  1193 + JSON::makeString("reference to image stream"));
  1194 + image.addDictionaryMember(
  1195 + "width",
  1196 + JSON::makeString("image width"));
  1197 + image.addDictionaryMember(
  1198 + "height",
  1199 + JSON::makeString("image height"));
  1200 + image.addDictionaryMember(
  1201 + "colorspace",
  1202 + JSON::makeString("color space"));
  1203 + image.addDictionaryMember(
  1204 + "bitspercomponent",
  1205 + JSON::makeString("bits per component"));
  1206 + image.addDictionaryMember("filter", JSON::makeArray()).
  1207 + addArrayElement(
  1208 + JSON::makeString("filters applied to image data"));
  1209 + image.addDictionaryMember("decodeparms", JSON::makeArray()).
  1210 + addArrayElement(
  1211 + JSON::makeString("decode parameters for image data"));
  1212 + image.addDictionaryMember(
  1213 + "filterable",
  1214 + JSON::makeString("whether image data can be decoded"
  1215 + " using the decode level qpdf was invoked with"));
  1216 + page.addDictionaryMember("contents", JSON::makeArray()).
  1217 + addArrayElement(
  1218 + JSON::makeString("reference to each content stream"));
  1219 + page.addDictionaryMember(
  1220 + "label",
  1221 + JSON::makeString("page label dictionary, or null if none"));
  1222 + JSON outline = page.addDictionaryMember("outlines", JSON::makeArray()).
  1223 + addArrayElement(JSON::makeDictionary());
  1224 + outline.addDictionaryMember(
  1225 + "object",
  1226 + JSON::makeString("reference to outline that targets this page"));
  1227 + outline.addDictionaryMember(
  1228 + "title",
  1229 + JSON::makeString("outline title"));
  1230 + outline.addDictionaryMember(
  1231 + "dest",
  1232 + JSON::makeString("outline destination dictionary"));
  1233 + page.addDictionaryMember(
  1234 + "pageposfrom1",
  1235 + JSON::makeString("position of page in document numbering from 1"));
  1236 + }
  1237 + if (all_keys || keys->count("pagelabels"))
  1238 + {
  1239 + JSON labels = schema.addDictionaryMember(
  1240 + "pagelabels", JSON::makeArray()).
  1241 + addArrayElement(JSON::makeDictionary());
  1242 + labels.addDictionaryMember(
  1243 + "index",
  1244 + JSON::makeString("starting page position starting from zero"));
  1245 + labels.addDictionaryMember(
  1246 + "label",
  1247 + JSON::makeString("page label dictionary"));
  1248 + }
  1249 + if (all_keys || keys->count("outlines"))
  1250 + {
  1251 + JSON outlines = schema.addDictionaryMember(
  1252 + "outlines", JSON::makeArray()).
  1253 + addArrayElement(JSON::makeDictionary());
  1254 + outlines.addDictionaryMember(
  1255 + "object",
  1256 + JSON::makeString("reference to this outline"));
  1257 + outlines.addDictionaryMember(
  1258 + "title",
  1259 + JSON::makeString("outline title"));
  1260 + outlines.addDictionaryMember(
  1261 + "dest",
  1262 + JSON::makeString("outline destination dictionary"));
  1263 + outlines.addDictionaryMember(
  1264 + "kids",
  1265 + JSON::makeString("array of descendent outlines"));
  1266 + outlines.addDictionaryMember(
  1267 + "open",
  1268 + JSON::makeString("whether the outline is displayed expanded"));
  1269 + outlines.addDictionaryMember(
  1270 + "destpageposfrom1",
  1271 + JSON::makeString("position of destination page in document"
  1272 + " numbered from 1; null if not known"));
  1273 + }
  1274 + if (all_keys || keys->count("acroform"))
  1275 + {
  1276 + JSON acroform = schema.addDictionaryMember(
  1277 + "acroform", JSON::makeDictionary());
  1278 + acroform.addDictionaryMember(
  1279 + "hasacroform",
  1280 + JSON::makeString("whether the document has interactive forms"));
  1281 + acroform.addDictionaryMember(
  1282 + "needappearances",
  1283 + JSON::makeString("whether the form fields' appearance"
  1284 + " streams need to be regenerated"));
  1285 + JSON fields = acroform.addDictionaryMember(
  1286 + "fields", JSON::makeArray()).
  1287 + addArrayElement(JSON::makeDictionary());
  1288 + fields.addDictionaryMember(
  1289 + "object",
  1290 + JSON::makeString("reference to this form field"));
  1291 + fields.addDictionaryMember(
  1292 + "parent",
  1293 + JSON::makeString("reference to this field's parent"));
  1294 + fields.addDictionaryMember(
  1295 + "pageposfrom1",
  1296 + JSON::makeString("position of containing page numbered from 1"));
  1297 + fields.addDictionaryMember(
  1298 + "fieldtype",
  1299 + JSON::makeString("field type"));
  1300 + fields.addDictionaryMember(
  1301 + "fieldflags",
  1302 + JSON::makeString(
  1303 + "form field flags from /Ff --"
  1304 + " see pdf_form_field_flag_e in qpdf/Constants.h"));
  1305 + fields.addDictionaryMember(
  1306 + "fullname",
  1307 + JSON::makeString("full name of field"));
  1308 + fields.addDictionaryMember(
  1309 + "partialname",
  1310 + JSON::makeString("partial name of field"));
  1311 + fields.addDictionaryMember(
  1312 + "alternativename",
  1313 + JSON::makeString(
  1314 + "alternative name of field --"
  1315 + " this is the one usually shown to users"));
  1316 + fields.addDictionaryMember(
  1317 + "mappingname",
  1318 + JSON::makeString("mapping name of field"));
  1319 + fields.addDictionaryMember(
  1320 + "value",
  1321 + JSON::makeString("value of field"));
  1322 + fields.addDictionaryMember(
  1323 + "defaultvalue",
  1324 + JSON::makeString("default value of field"));
  1325 + fields.addDictionaryMember(
  1326 + "quadding",
  1327 + JSON::makeString(
  1328 + "field quadding --"
  1329 + " number indicating left, center, or right"));
  1330 + fields.addDictionaryMember(
  1331 + "ischeckbox",
  1332 + JSON::makeString("whether field is a checkbox"));
  1333 + fields.addDictionaryMember(
  1334 + "isradiobutton",
  1335 + JSON::makeString("whether field is a radio button --"
  1336 + " buttons in a single group share a parent"));
  1337 + fields.addDictionaryMember(
  1338 + "ischoice",
  1339 + JSON::makeString("whether field is a list, combo, or dropdown"));
  1340 + fields.addDictionaryMember(
  1341 + "istext",
  1342 + JSON::makeString("whether field is a text field"));
  1343 + JSON j_choices = fields.addDictionaryMember(
  1344 + "choices",
  1345 + JSON::makeString("for choices fields, the list of"
  1346 + " choices presented to the user"));
  1347 + JSON annotation = fields.addDictionaryMember(
  1348 + "annotation", JSON::makeDictionary());
  1349 + annotation.addDictionaryMember(
  1350 + "object",
  1351 + JSON::makeString("reference to the annotation object"));
  1352 + annotation.addDictionaryMember(
  1353 + "appearancestate",
  1354 + JSON::makeString("appearance state --"
  1355 + " can be used to determine value for"
  1356 + " checkboxes and radio buttons"));
  1357 + annotation.addDictionaryMember(
  1358 + "annotationflags",
  1359 + JSON::makeString(
  1360 + "annotation flags from /F --"
  1361 + " see pdf_annotation_flag_e in qpdf/Constants.h"));
  1362 + }
  1363 + if (all_keys || keys->count("encrypt"))
  1364 + {
  1365 + JSON encrypt = schema.addDictionaryMember(
  1366 + "encrypt", JSON::makeDictionary());
  1367 + encrypt.addDictionaryMember(
  1368 + "encrypted",
  1369 + JSON::makeString("whether the document is encrypted"));
  1370 + encrypt.addDictionaryMember(
  1371 + "userpasswordmatched",
  1372 + JSON::makeString("whether supplied password matched user password;"
  1373 + " always false for non-encrypted files"));
  1374 + encrypt.addDictionaryMember(
  1375 + "ownerpasswordmatched",
  1376 + JSON::makeString("whether supplied password matched owner password;"
  1377 + " always false for non-encrypted files"));
  1378 + JSON capabilities = encrypt.addDictionaryMember(
  1379 + "capabilities", JSON::makeDictionary());
  1380 + capabilities.addDictionaryMember(
  1381 + "accessibility",
  1382 + JSON::makeString("allow extraction for accessibility?"));
  1383 + capabilities.addDictionaryMember(
  1384 + "extract",
  1385 + JSON::makeString("allow extraction?"));
  1386 + capabilities.addDictionaryMember(
  1387 + "printlow",
  1388 + JSON::makeString("allow low resolution printing?"));
  1389 + capabilities.addDictionaryMember(
  1390 + "printhigh",
  1391 + JSON::makeString("allow high resolution printing?"));
  1392 + capabilities.addDictionaryMember(
  1393 + "modifyassembly",
  1394 + JSON::makeString("allow modifying document assembly?"));
  1395 + capabilities.addDictionaryMember(
  1396 + "modifyforms",
  1397 + JSON::makeString("allow modifying forms?"));
  1398 + capabilities.addDictionaryMember(
  1399 + "moddifyannotations",
  1400 + JSON::makeString("allow modifying annotations?"));
  1401 + capabilities.addDictionaryMember(
  1402 + "modifyother",
  1403 + JSON::makeString("allow other modifications?"));
  1404 + capabilities.addDictionaryMember(
  1405 + "modify",
  1406 + JSON::makeString("allow all modifications?"));
  1407 +
  1408 + JSON parameters = encrypt.addDictionaryMember(
  1409 + "parameters", JSON::makeDictionary());
  1410 + parameters.addDictionaryMember(
  1411 + "R",
  1412 + JSON::makeString("R value from Encrypt dictionary"));
  1413 + parameters.addDictionaryMember(
  1414 + "V",
  1415 + JSON::makeString("V value from Encrypt dictionary"));
  1416 + parameters.addDictionaryMember(
  1417 + "P",
  1418 + JSON::makeString("P value from Encrypt dictionary"));
  1419 + parameters.addDictionaryMember(
  1420 + "bits",
  1421 + JSON::makeString("encryption key bit length"));
  1422 + parameters.addDictionaryMember(
  1423 + "key",
  1424 + JSON::makeString("encryption key; will be null"
  1425 + " unless --show-encryption-key was specified"));
  1426 + parameters.addDictionaryMember(
  1427 + "method",
  1428 + JSON::makeString("overall encryption method:"
  1429 + " none, mixed, RC4, AESv2, AESv3"));
  1430 + parameters.addDictionaryMember(
  1431 + "streammethod",
  1432 + JSON::makeString("encryption method for streams"));
  1433 + parameters.addDictionaryMember(
  1434 + "stringmethod",
  1435 + JSON::makeString("encryption method for string"));
  1436 + parameters.addDictionaryMember(
  1437 + "filemethod",
  1438 + JSON::makeString("encryption method for attachments"));
  1439 + }
  1440 + if (all_keys || keys->count("attachments"))
  1441 + {
  1442 + JSON attachments = schema.addDictionaryMember(
  1443 + "attachments", JSON::makeDictionary());
  1444 + JSON details = attachments.addDictionaryMember(
  1445 + "<attachment-key>", JSON::makeDictionary());
  1446 + details.addDictionaryMember(
  1447 + "filespec",
  1448 + JSON::makeString("object containing the file spec"));
  1449 + details.addDictionaryMember(
  1450 + "preferredname",
  1451 + JSON::makeString("most preferred file name"));
  1452 + details.addDictionaryMember(
  1453 + "preferredcontents",
  1454 + JSON::makeString("most preferred embedded file stream"));
  1455 + }
  1456 + return schema;
  1457 +}
  1458 +
  1459 +static void do_json(QPDF& pdf, QPDFJob& o)
  1460 +{
  1461 + JSON j = JSON::makeDictionary();
  1462 + // This version is updated every time a non-backward-compatible
  1463 + // change is made to the JSON format. Clients of the JSON are to
  1464 + // ignore unrecognized keys, so we only update the version of a
  1465 + // key disappears or if its value changes meaning.
  1466 + j.addDictionaryMember("version", JSON::makeInt(1));
  1467 + JSON j_params = j.addDictionaryMember(
  1468 + "parameters", JSON::makeDictionary());
  1469 + std::string decode_level_str;
  1470 + switch (o.decode_level)
  1471 + {
  1472 + case qpdf_dl_none:
  1473 + decode_level_str = "none";
  1474 + break;
  1475 + case qpdf_dl_generalized:
  1476 + decode_level_str = "generalized";
  1477 + break;
  1478 + case qpdf_dl_specialized:
  1479 + decode_level_str = "specialized";
  1480 + break;
  1481 + case qpdf_dl_all:
  1482 + decode_level_str = "all";
  1483 + break;
  1484 + }
  1485 + j_params.addDictionaryMember(
  1486 + "decodelevel", JSON::makeString(decode_level_str));
  1487 +
  1488 + bool all_keys = o.json_keys.empty();
  1489 + // The list of selectable top-level keys id duplicated in three
  1490 + // places: json_schema, do_json, and initOptionTable.
  1491 + if (all_keys || o.json_keys.count("objects"))
  1492 + {
  1493 + do_json_objects(pdf, o, j);
  1494 + }
  1495 + if (all_keys || o.json_keys.count("objectinfo"))
  1496 + {
  1497 + do_json_objectinfo(pdf, o, j);
  1498 + }
  1499 + if (all_keys || o.json_keys.count("pages"))
  1500 + {
  1501 + do_json_pages(pdf, o, j);
  1502 + }
  1503 + if (all_keys || o.json_keys.count("pagelabels"))
  1504 + {
  1505 + do_json_page_labels(pdf, o, j);
  1506 + }
  1507 + if (all_keys || o.json_keys.count("outlines"))
  1508 + {
  1509 + do_json_outlines(pdf, o, j);
  1510 + }
  1511 + if (all_keys || o.json_keys.count("acroform"))
  1512 + {
  1513 + do_json_acroform(pdf, o, j);
  1514 + }
  1515 + if (all_keys || o.json_keys.count("encrypt"))
  1516 + {
  1517 + do_json_encrypt(pdf, o, j);
  1518 + }
  1519 + if (all_keys || o.json_keys.count("attachments"))
  1520 + {
  1521 + do_json_attachments(pdf, o, j);
  1522 + }
  1523 +
  1524 + // Check against schema
  1525 +
  1526 + JSON schema = QPDFJob::json_schema(&o.json_keys);
  1527 + std::list<std::string> errors;
  1528 + if (! j.checkSchema(schema, errors))
  1529 + {
  1530 + std::cerr
  1531 + << whoami << " didn't create JSON that complies with its own\n\
  1532 +rules. Please report this as a bug at\n\
  1533 + https://github.com/qpdf/qpdf/issues/new\n\
  1534 +ideally with the file that caused the error and the output below. Thanks!\n\
  1535 +\n";
  1536 + for (std::list<std::string>::iterator iter = errors.begin();
  1537 + iter != errors.end(); ++iter)
  1538 + {
  1539 + std::cerr << (*iter) << std::endl;
  1540 + }
  1541 + }
  1542 +
  1543 + std::cout << j.unparse() << std::endl;
  1544 +}
  1545 +
  1546 +static void do_inspection(QPDF& pdf, QPDFJob& o)
  1547 +{
  1548 + int exit_code = 0;
  1549 + if (o.check)
  1550 + {
  1551 + do_check(pdf, o, exit_code);
  1552 + }
  1553 + if (o.json)
  1554 + {
  1555 + do_json(pdf, o);
  1556 + }
  1557 + if (o.show_npages)
  1558 + {
  1559 + QTC::TC("qpdf", "qpdf npages");
  1560 + std::cout << pdf.getRoot().getKey("/Pages").
  1561 + getKey("/Count").getIntValue() << std::endl;
  1562 + }
  1563 + if (o.show_encryption)
  1564 + {
  1565 + show_encryption(pdf, o);
  1566 + }
  1567 + if (o.check_linearization)
  1568 + {
  1569 + if (pdf.checkLinearization())
  1570 + {
  1571 + std::cout << o.infilename << ": no linearization errors"
  1572 + << std::endl;
  1573 + }
  1574 + else if (exit_code != EXIT_ERROR)
  1575 + {
  1576 + exit_code = EXIT_WARNING;
  1577 + }
  1578 + }
  1579 + if (o.show_linearization)
  1580 + {
  1581 + if (pdf.isLinearized())
  1582 + {
  1583 + pdf.showLinearizationData();
  1584 + }
  1585 + else
  1586 + {
  1587 + std::cout << o.infilename << " is not linearized"
  1588 + << std::endl;
  1589 + }
  1590 + }
  1591 + if (o.show_xref)
  1592 + {
  1593 + pdf.showXRefTable();
  1594 + }
  1595 + if ((o.show_obj > 0) || o.show_trailer)
  1596 + {
  1597 + do_show_obj(pdf, o, exit_code);
  1598 + }
  1599 + if (o.show_pages)
  1600 + {
  1601 + do_show_pages(pdf, o);
  1602 + }
  1603 + if (o.list_attachments)
  1604 + {
  1605 + do_list_attachments(pdf, o);
  1606 + }
  1607 + if (! o.attachment_to_show.empty())
  1608 + {
  1609 + do_show_attachment(pdf, o, exit_code);
  1610 + }
  1611 + if ((! pdf.getWarnings().empty()) && (exit_code != EXIT_ERROR))
  1612 + {
  1613 + std::cerr << whoami
  1614 + << ": operation succeeded with warnings" << std::endl;
  1615 + exit_code = EXIT_WARNING;
  1616 + }
  1617 + if (exit_code)
  1618 + {
  1619 + exit(exit_code); // QXXXQ
  1620 + }
  1621 +}
  1622 +
  1623 +
  1624 +ImageOptimizer::ImageOptimizer(QPDFJob& o, QPDFObjectHandle& image) :
  1625 + o(o),
  1626 + image(image)
  1627 +{
  1628 +}
  1629 +
  1630 +PointerHolder<Pipeline>
  1631 +ImageOptimizer::makePipeline(std::string const& description, Pipeline* next)
  1632 +{
  1633 + PointerHolder<Pipeline> result;
  1634 + QPDFObjectHandle dict = image.getDict();
  1635 + QPDFObjectHandle w_obj = dict.getKey("/Width");
  1636 + QPDFObjectHandle h_obj = dict.getKey("/Height");
  1637 + QPDFObjectHandle colorspace_obj = dict.getKey("/ColorSpace");
  1638 + if (! (w_obj.isNumber() && h_obj.isNumber()))
  1639 + {
  1640 + if (o.verbose && (! description.empty()))
  1641 + {
  1642 + std::cout << whoami << ": " << description
  1643 + << ": not optimizing because image dictionary"
  1644 + << " is missing required keys" << std::endl;
  1645 + }
  1646 + return result;
  1647 + }
  1648 + QPDFObjectHandle components_obj = dict.getKey("/BitsPerComponent");
  1649 + if (! (components_obj.isInteger() && (components_obj.getIntValue() == 8)))
  1650 + {
  1651 + QTC::TC("qpdf", "qpdf image optimize bits per component");
  1652 + if (o.verbose && (! description.empty()))
  1653 + {
  1654 + std::cout << whoami << ": " << description
  1655 + << ": not optimizing because image has other than"
  1656 + << " 8 bits per component" << std::endl;
  1657 + }
  1658 + return result;
  1659 + }
  1660 + // Files have been seen in the wild whose width and height are
  1661 + // floating point, which is goofy, but we can deal with it.
  1662 + JDIMENSION w = 0;
  1663 + if (w_obj.isInteger())
  1664 + {
  1665 + w = w_obj.getUIntValueAsUInt();
  1666 + }
  1667 + else
  1668 + {
  1669 + w = static_cast<JDIMENSION>(w_obj.getNumericValue());
  1670 + }
  1671 + JDIMENSION h = 0;
  1672 + if (h_obj.isInteger())
  1673 + {
  1674 + h = h_obj.getUIntValueAsUInt();
  1675 + }
  1676 + else
  1677 + {
  1678 + h = static_cast<JDIMENSION>(h_obj.getNumericValue());
  1679 + }
  1680 + std::string colorspace = (colorspace_obj.isName() ?
  1681 + colorspace_obj.getName() :
  1682 + std::string());
  1683 + int components = 0;
  1684 + J_COLOR_SPACE cs = JCS_UNKNOWN;
  1685 + if (colorspace == "/DeviceRGB")
  1686 + {
  1687 + components = 3;
  1688 + cs = JCS_RGB;
  1689 + }
  1690 + else if (colorspace == "/DeviceGray")
  1691 + {
  1692 + components = 1;
  1693 + cs = JCS_GRAYSCALE;
  1694 + }
  1695 + else if (colorspace == "/DeviceCMYK")
  1696 + {
  1697 + components = 4;
  1698 + cs = JCS_CMYK;
  1699 + }
  1700 + else
  1701 + {
  1702 + QTC::TC("qpdf", "qpdf image optimize colorspace");
  1703 + if (o.verbose && (! description.empty()))
  1704 + {
  1705 + std::cout << whoami << ": " << description
  1706 + << ": not optimizing because qpdf can't optimize"
  1707 + << " images with this colorspace" << std::endl;
  1708 + }
  1709 + return result;
  1710 + }
  1711 + if (((o.oi_min_width > 0) && (w <= o.oi_min_width)) ||
  1712 + ((o.oi_min_height > 0) && (h <= o.oi_min_height)) ||
  1713 + ((o.oi_min_area > 0) && ((w * h) <= o.oi_min_area)))
  1714 + {
  1715 + QTC::TC("qpdf", "qpdf image optimize too small");
  1716 + if (o.verbose && (! description.empty()))
  1717 + {
  1718 + std::cout << whoami << ": " << description
  1719 + << ": not optimizing because image"
  1720 + << " is smaller than requested minimum dimensions"
  1721 + << std::endl;
  1722 + }
  1723 + return result;
  1724 + }
  1725 +
  1726 + result = new Pl_DCT("jpg", next, w, h, components, cs);
  1727 + return result;
  1728 +}
  1729 +
  1730 +bool
  1731 +ImageOptimizer::evaluate(std::string const& description)
  1732 +{
  1733 + if (! image.pipeStreamData(0, 0, qpdf_dl_specialized, true))
  1734 + {
  1735 + QTC::TC("qpdf", "qpdf image optimize no pipeline");
  1736 + if (o.verbose)
  1737 + {
  1738 + std::cout << whoami << ": " << description
  1739 + << ": not optimizing because unable to decode data"
  1740 + << " or data already uses DCT"
  1741 + << std::endl;
  1742 + }
  1743 + return false;
  1744 + }
  1745 + Pl_Discard d;
  1746 + Pl_Count c("count", &d);
  1747 + PointerHolder<Pipeline> p = makePipeline(description, &c);
  1748 + if (p.getPointer() == 0)
  1749 + {
  1750 + // message issued by makePipeline
  1751 + return false;
  1752 + }
  1753 + if (! image.pipeStreamData(p.getPointer(), 0, qpdf_dl_specialized))
  1754 + {
  1755 + return false;
  1756 + }
  1757 + long long orig_length = image.getDict().getKey("/Length").getIntValue();
  1758 + if (c.getCount() >= orig_length)
  1759 + {
  1760 + QTC::TC("qpdf", "qpdf image optimize no shrink");
  1761 + if (o.verbose)
  1762 + {
  1763 + std::cout << whoami << ": " << description
  1764 + << ": not optimizing because DCT compression does not"
  1765 + << " reduce image size" << std::endl;
  1766 + }
  1767 + return false;
  1768 + }
  1769 + if (o.verbose)
  1770 + {
  1771 + std::cout << whoami << ": " << description
  1772 + << ": optimizing image reduces size from "
  1773 + << orig_length << " to " << c.getCount()
  1774 + << std::endl;
  1775 + }
  1776 + return true;
  1777 +}
  1778 +
  1779 +void
  1780 +ImageOptimizer::provideStreamData(int, int, Pipeline* pipeline)
  1781 +{
  1782 + PointerHolder<Pipeline> p = makePipeline("", pipeline);
  1783 + if (p.getPointer() == 0)
  1784 + {
  1785 + // Should not be possible
  1786 + image.warnIfPossible("unable to create pipeline after previous"
  1787 + " success; image data will be lost");
  1788 + pipeline->finish();
  1789 + return;
  1790 + }
  1791 + image.pipeStreamData(p.getPointer(), 0, qpdf_dl_specialized,
  1792 + false, false);
  1793 +}
  1794 +
  1795 +template <typename T>
  1796 +static PointerHolder<QPDF> do_process_once(
  1797 + void (QPDF::*fn)(T, char const*),
  1798 + T item, char const* password,
  1799 + QPDFJob& o, bool empty)
  1800 +{
  1801 + PointerHolder<QPDF> pdf = new QPDF;
  1802 + set_qpdf_options(*pdf, o);
  1803 + if (empty)
  1804 + {
  1805 + pdf->emptyPDF();
  1806 + }
  1807 + else
  1808 + {
  1809 + ((*pdf).*fn)(item, password);
  1810 + }
  1811 + return pdf;
  1812 +}
  1813 +
  1814 +template <typename T>
  1815 +static PointerHolder<QPDF> do_process(
  1816 + void (QPDF::*fn)(T, char const*),
  1817 + T item, char const* password,
  1818 + QPDFJob& o, bool empty)
  1819 +{
  1820 + // If a password has been specified but doesn't work, try other
  1821 + // passwords that are equivalent in different character encodings.
  1822 + // This makes it possible to open PDF files that were encrypted
  1823 + // using incorrect string encodings. For example, if someone used
  1824 + // a password encoded in PDF Doc encoding or Windows code page
  1825 + // 1252 for an AES-encrypted file or a UTF-8-encoded password on
  1826 + // an RC4-encrypted file, or if the password was properly encoded
  1827 + // by the password given here was incorrectly encoded, there's a
  1828 + // good chance we'd succeed here.
  1829 +
  1830 + std::string ptemp;
  1831 + if (password && (! o.password_is_hex_key))
  1832 + {
  1833 + if (o.password_mode == QPDFJob::pm_hex_bytes)
  1834 + {
  1835 + // Special case: handle --password-mode=hex-bytes for input
  1836 + // password as well as output password
  1837 + QTC::TC("qpdf", "qpdf input password hex-bytes");
  1838 + ptemp = QUtil::hex_decode(password);
  1839 + password = ptemp.c_str();
  1840 + }
  1841 + }
  1842 + if ((password == 0) || empty || o.password_is_hex_key ||
  1843 + o.suppress_password_recovery)
  1844 + {
  1845 + // There is no password, or we're not doing recovery, so just
  1846 + // do the normal processing with the supplied password.
  1847 + return do_process_once(fn, item, password, o, empty);
  1848 + }
  1849 +
  1850 + // Get a list of otherwise encoded strings. Keep in scope for this
  1851 + // method.
  1852 + std::vector<std::string> passwords_str =
  1853 + QUtil::possible_repaired_encodings(password);
  1854 + // Represent to char const*, as required by the QPDF class.
  1855 + std::vector<char const*> passwords;
  1856 + for (std::vector<std::string>::iterator iter = passwords_str.begin();
  1857 + iter != passwords_str.end(); ++iter)
  1858 + {
  1859 + passwords.push_back((*iter).c_str());
  1860 + }
  1861 + // We always try the supplied password first because it is the
  1862 + // first string returned by possible_repaired_encodings. If there
  1863 + // is more than one option, go ahead and put the supplied password
  1864 + // at the end so that it's that decoding attempt whose exception
  1865 + // is thrown.
  1866 + if (passwords.size() > 1)
  1867 + {
  1868 + passwords.push_back(password);
  1869 + }
  1870 +
  1871 + // Try each password. If one works, return the resulting object.
  1872 + // If they all fail, throw the exception thrown by the final
  1873 + // attempt, which, like the first attempt, will be with the
  1874 + // supplied password.
  1875 + bool warned = false;
  1876 + for (std::vector<char const*>::iterator iter = passwords.begin();
  1877 + iter != passwords.end(); ++iter)
  1878 + {
  1879 + try
  1880 + {
  1881 + return do_process_once(fn, item, *iter, o, empty);
  1882 + }
  1883 + catch (QPDFExc& e)
  1884 + {
  1885 + std::vector<char const*>::iterator next = iter;
  1886 + ++next;
  1887 + if (next == passwords.end())
  1888 + {
  1889 + throw e;
  1890 + }
  1891 + }
  1892 + if ((! warned) && o.verbose)
  1893 + {
  1894 + warned = true;
  1895 + std::cout << whoami << ": supplied password didn't work;"
  1896 + << " trying other passwords based on interpreting"
  1897 + << " password with different string encodings"
  1898 + << std::endl;
  1899 + }
  1900 + }
  1901 + // Should not be reachable
  1902 + throw std::logic_error("do_process returned");
  1903 +}
  1904 +
  1905 +PointerHolder<QPDF>
  1906 +QPDFJob::processFile(char const* filename, char const* password)
  1907 +{
  1908 + QPDFJob& o = *this; // QXXXQ
  1909 + return do_process(&QPDF::processFile, filename, password, o,
  1910 + strcmp(filename, "") == 0);
  1911 +}
  1912 +
  1913 +static PointerHolder<QPDF> process_input_source(
  1914 + PointerHolder<InputSource> is, char const* password, QPDFJob& o)
  1915 +{
  1916 + return do_process(&QPDF::processInputSource, is, password, o, false);
  1917 +}
  1918 +
  1919 +void
  1920 +QPDFJob::validateUnderOverlay(QPDF& pdf, QPDFJob::UnderOverlay* uo)
  1921 +{
  1922 + if (0 == uo->filename)
  1923 + {
  1924 + return;
  1925 + }
  1926 + QPDFPageDocumentHelper main_pdh(pdf);
  1927 + int main_npages = QIntC::to_int(main_pdh.getAllPages().size());
  1928 + uo->pdf = processFile(uo->filename, uo->password);
  1929 + QPDFPageDocumentHelper uo_pdh(*(uo->pdf));
  1930 + int uo_npages = QIntC::to_int(uo_pdh.getAllPages().size());
  1931 + try
  1932 + {
  1933 + uo->to_pagenos = QUtil::parse_numrange(uo->to_nr, main_npages);
  1934 + }
  1935 + catch (std::runtime_error& e)
  1936 + {
  1937 + throw std::runtime_error(
  1938 + "parsing numeric range for " + uo->which +
  1939 + " \"to\" pages: " + e.what());
  1940 + }
  1941 + try
  1942 + {
  1943 + if (0 == strlen(uo->from_nr))
  1944 + {
  1945 + QTC::TC("qpdf", "qpdf from_nr from repeat_nr");
  1946 + uo->from_nr = uo->repeat_nr;
  1947 + }
  1948 + uo->from_pagenos = QUtil::parse_numrange(uo->from_nr, uo_npages);
  1949 + if (strlen(uo->repeat_nr))
  1950 + {
  1951 + uo->repeat_pagenos =
  1952 + QUtil::parse_numrange(uo->repeat_nr, uo_npages);
  1953 + }
  1954 + }
  1955 + catch (std::runtime_error& e)
  1956 + {
  1957 + throw std::runtime_error(
  1958 + "parsing numeric range for " + uo->which + " file " +
  1959 + uo->filename + ": " + e.what());
  1960 + }
  1961 +}
  1962 +
  1963 +static void get_uo_pagenos(QPDFJob::UnderOverlay& uo,
  1964 + std::map<int, std::vector<int> >& pagenos)
  1965 +{
  1966 + size_t idx = 0;
  1967 + size_t from_size = uo.from_pagenos.size();
  1968 + size_t repeat_size = uo.repeat_pagenos.size();
  1969 + for (std::vector<int>::iterator iter = uo.to_pagenos.begin();
  1970 + iter != uo.to_pagenos.end(); ++iter, ++idx)
  1971 + {
  1972 + if (idx < from_size)
  1973 + {
  1974 + pagenos[*iter].push_back(uo.from_pagenos.at(idx));
  1975 + }
  1976 + else if (repeat_size)
  1977 + {
  1978 + pagenos[*iter].push_back(
  1979 + uo.repeat_pagenos.at((idx - from_size) % repeat_size));
  1980 + }
  1981 + }
  1982 +}
  1983 +
  1984 +static QPDFAcroFormDocumentHelper* get_afdh_for_qpdf(
  1985 + std::map<unsigned long long,
  1986 + PointerHolder<QPDFAcroFormDocumentHelper>>& afdh_map,
  1987 + QPDF* q)
  1988 +{
  1989 + auto uid = q->getUniqueId();
  1990 + if (! afdh_map.count(uid))
  1991 + {
  1992 + afdh_map[uid] = new QPDFAcroFormDocumentHelper(*q);
  1993 + }
  1994 + return afdh_map[uid].getPointer();
  1995 +}
  1996 +
  1997 +static void do_under_overlay_for_page(
  1998 + QPDF& pdf,
  1999 + QPDFJob& o,
  2000 + QPDFJob::UnderOverlay& uo,
  2001 + std::map<int, std::vector<int> >& pagenos,
  2002 + size_t page_idx,
  2003 + std::map<int, QPDFObjectHandle>& fo,
  2004 + std::vector<QPDFPageObjectHelper>& pages,
  2005 + QPDFPageObjectHelper& dest_page,
  2006 + bool before)
  2007 +{
  2008 + int pageno = 1 + QIntC::to_int(page_idx);
  2009 + if (! pagenos.count(pageno))
  2010 + {
  2011 + return;
  2012 + }
  2013 +
  2014 + std::map<unsigned long long,
  2015 + PointerHolder<QPDFAcroFormDocumentHelper>> afdh;
  2016 + auto make_afdh = [&](QPDFPageObjectHelper& ph) {
  2017 + QPDF* q = ph.getObjectHandle().getOwningQPDF();
  2018 + return get_afdh_for_qpdf(afdh, q);
  2019 + };
  2020 + auto dest_afdh = make_afdh(dest_page);
  2021 +
  2022 + std::string content;
  2023 + int min_suffix = 1;
  2024 + QPDFObjectHandle resources = dest_page.getAttribute("/Resources", true);
  2025 + if (! resources.isDictionary())
  2026 + {
  2027 + QTC::TC("qpdf", "qpdf overlay page with no resources");
  2028 + resources = QPDFObjectHandle::newDictionary();
  2029 + dest_page.getObjectHandle().replaceKey("/Resources", resources);
  2030 + }
  2031 + for (std::vector<int>::iterator iter = pagenos[pageno].begin();
  2032 + iter != pagenos[pageno].end(); ++iter)
  2033 + {
  2034 + int from_pageno = *iter;
  2035 + if (o.verbose)
  2036 + {
  2037 + std::cout << " " << uo.which << " " << from_pageno << std::endl;
  2038 + }
  2039 + auto from_page = pages.at(QIntC::to_size(from_pageno - 1));
  2040 + if (0 == fo.count(from_pageno))
  2041 + {
  2042 + fo[from_pageno] =
  2043 + pdf.copyForeignObject(
  2044 + from_page.getFormXObjectForPage());
  2045 + }
  2046 +
  2047 + // If the same page is overlaid or underlaid multiple times,
  2048 + // we'll generate multiple names for it, but that's harmless
  2049 + // and also a pretty goofy case that's not worth coding
  2050 + // around.
  2051 + std::string name = resources.getUniqueResourceName("/Fx", min_suffix);
  2052 + QPDFMatrix cm;
  2053 + std::string new_content = dest_page.placeFormXObject(
  2054 + fo[from_pageno], name,
  2055 + dest_page.getTrimBox().getArrayAsRectangle(), cm);
  2056 + dest_page.copyAnnotations(
  2057 + from_page, cm, dest_afdh, make_afdh(from_page));
  2058 + if (! new_content.empty())
  2059 + {
  2060 + resources.mergeResources(
  2061 + QPDFObjectHandle::parse("<< /XObject << >> >>"));
  2062 + auto xobject = resources.getKey("/XObject");
  2063 + if (xobject.isDictionary())
  2064 + {
  2065 + xobject.replaceKey(name, fo[from_pageno]);
  2066 + }
  2067 + ++min_suffix;
  2068 + content += new_content;
  2069 + }
  2070 + }
  2071 + if (! content.empty())
  2072 + {
  2073 + if (before)
  2074 + {
  2075 + dest_page.addPageContents(
  2076 + QPDFObjectHandle::newStream(&pdf, content), true);
  2077 + }
  2078 + else
  2079 + {
  2080 + dest_page.addPageContents(
  2081 + QPDFObjectHandle::newStream(&pdf, "q\n"), true);
  2082 + dest_page.addPageContents(
  2083 + QPDFObjectHandle::newStream(&pdf, "\nQ\n" + content), false);
  2084 + }
  2085 + }
  2086 +}
  2087 +
  2088 +void
  2089 +QPDFJob::handleUnderOverlay(QPDF& pdf)
  2090 +{
  2091 + QPDFJob& o = *this; // QXXXQ
  2092 + validateUnderOverlay(pdf, &o.underlay);
  2093 + validateUnderOverlay(pdf, &o.overlay);
  2094 + if ((0 == o.underlay.pdf.getPointer()) &&
  2095 + (0 == o.overlay.pdf.getPointer()))
  2096 + {
  2097 + return;
  2098 + }
  2099 + std::map<int, std::vector<int> > underlay_pagenos;
  2100 + get_uo_pagenos(o.underlay, underlay_pagenos);
  2101 + std::map<int, std::vector<int> > overlay_pagenos;
  2102 + get_uo_pagenos(o.overlay, overlay_pagenos);
  2103 + std::map<int, QPDFObjectHandle> underlay_fo;
  2104 + std::map<int, QPDFObjectHandle> overlay_fo;
  2105 + std::vector<QPDFPageObjectHelper> upages;
  2106 + if (o.underlay.pdf.getPointer())
  2107 + {
  2108 + upages = QPDFPageDocumentHelper(*(o.underlay.pdf)).getAllPages();
  2109 + }
  2110 + std::vector<QPDFPageObjectHelper> opages;
  2111 + if (o.overlay.pdf.getPointer())
  2112 + {
  2113 + opages = QPDFPageDocumentHelper(*(o.overlay.pdf)).getAllPages();
  2114 + }
  2115 +
  2116 + QPDFPageDocumentHelper main_pdh(pdf);
  2117 + std::vector<QPDFPageObjectHelper> main_pages = main_pdh.getAllPages();
  2118 + size_t main_npages = main_pages.size();
  2119 + if (o.verbose)
  2120 + {
  2121 + std::cout << whoami << ": processing underlay/overlay" << std::endl;
  2122 + }
  2123 + for (size_t i = 0; i < main_npages; ++i)
  2124 + {
  2125 + if (o.verbose)
  2126 + {
  2127 + std::cout << " page " << 1+i << std::endl;
  2128 + }
  2129 + do_under_overlay_for_page(pdf, o, o.underlay, underlay_pagenos, i,
  2130 + underlay_fo, upages, main_pages.at(i),
  2131 + true);
  2132 + do_under_overlay_for_page(pdf, o, o.overlay, overlay_pagenos, i,
  2133 + overlay_fo, opages, main_pages.at(i),
  2134 + false);
  2135 + }
  2136 +}
  2137 +
  2138 +static void maybe_set_pagemode(QPDF& pdf, std::string const& pagemode)
  2139 +{
  2140 + auto root = pdf.getRoot();
  2141 + if (root.getKey("/PageMode").isNull())
  2142 + {
  2143 + root.replaceKey("/PageMode", QPDFObjectHandle::newName(pagemode));
  2144 + }
  2145 +}
  2146 +
  2147 +void
  2148 +QPDFJob::addAttachments(QPDF& pdf)
  2149 +{
  2150 + QPDFJob& o = *this; // QXXXQ
  2151 + maybe_set_pagemode(pdf, "/UseAttachments");
  2152 + QPDFEmbeddedFileDocumentHelper efdh(pdf);
  2153 + std::vector<std::string> duplicated_keys;
  2154 + for (auto const& to_add: o.attachments_to_add)
  2155 + {
  2156 + if ((! to_add.replace) && efdh.getEmbeddedFile(to_add.key))
  2157 + {
  2158 + duplicated_keys.push_back(to_add.key);
  2159 + continue;
  2160 + }
  2161 +
  2162 + auto fs = QPDFFileSpecObjectHelper::createFileSpec(
  2163 + pdf, to_add.filename, to_add.path);
  2164 + if (! to_add.description.empty())
  2165 + {
  2166 + fs.setDescription(to_add.description);
  2167 + }
  2168 + auto efs = QPDFEFStreamObjectHelper(fs.getEmbeddedFileStream());
  2169 + efs.setCreationDate(to_add.creationdate)
  2170 + .setModDate(to_add.moddate);
  2171 + if (! to_add.mimetype.empty())
  2172 + {
  2173 + efs.setSubtype(to_add.mimetype);
  2174 + }
  2175 +
  2176 + efdh.replaceEmbeddedFile(to_add.key, fs);
  2177 + if (o.verbose)
  2178 + {
  2179 + std::cout << whoami << ": attached " << to_add.path
  2180 + << " as " << to_add.filename
  2181 + << " with key " << to_add.key << std::endl;
  2182 + }
  2183 + }
  2184 +
  2185 + if (! duplicated_keys.empty())
  2186 + {
  2187 + std::string message;
  2188 + for (auto const& k: duplicated_keys)
  2189 + {
  2190 + if (! message.empty())
  2191 + {
  2192 + message += ", ";
  2193 + }
  2194 + message += k;
  2195 + }
  2196 + message = pdf.getFilename() +
  2197 + " already has attachments with the following keys: " +
  2198 + message +
  2199 + "; use --replace to replace or --key to specify a different key";
  2200 + throw std::runtime_error(message);
  2201 + }
  2202 +}
  2203 +
  2204 +void
  2205 +QPDFJob::copyAttachments(QPDF& pdf)
  2206 +{
  2207 + QPDFJob& o = *this; // QXXXQ
  2208 + maybe_set_pagemode(pdf, "/UseAttachments");
  2209 + QPDFEmbeddedFileDocumentHelper efdh(pdf);
  2210 + std::vector<std::string> duplicates;
  2211 + for (auto const& to_copy: o.attachments_to_copy)
  2212 + {
  2213 + if (o.verbose)
  2214 + {
  2215 + std::cout << whoami << ": copying attachments from "
  2216 + << to_copy.path << std::endl;
  2217 + }
  2218 + auto other = processFile(
  2219 + to_copy.path.c_str(), to_copy.password.c_str());
  2220 + QPDFEmbeddedFileDocumentHelper other_efdh(*other);
  2221 + auto other_attachments = other_efdh.getEmbeddedFiles();
  2222 + for (auto const& iter: other_attachments)
  2223 + {
  2224 + std::string new_key = to_copy.prefix + iter.first;
  2225 + if (efdh.getEmbeddedFile(new_key))
  2226 + {
  2227 + duplicates.push_back(
  2228 + "file: " + to_copy.path + ", key: " + new_key);
  2229 + }
  2230 + else
  2231 + {
  2232 + auto new_fs_oh = pdf.copyForeignObject(
  2233 + iter.second->getObjectHandle());
  2234 + efdh.replaceEmbeddedFile(
  2235 + new_key, QPDFFileSpecObjectHelper(new_fs_oh));
  2236 + if (o.verbose)
  2237 + {
  2238 + std::cout << " " << iter.first << " -> " << new_key
  2239 + << std::endl;
  2240 + }
  2241 + }
  2242 + }
  2243 +
  2244 + if (other->anyWarnings())
  2245 + {
  2246 + this->m->warnings = true;
  2247 + }
  2248 + }
  2249 +
  2250 + if (! duplicates.empty())
  2251 + {
  2252 + std::string message;
  2253 + for (auto const& i: duplicates)
  2254 + {
  2255 + if (! message.empty())
  2256 + {
  2257 + message += "; ";
  2258 + }
  2259 + message += i;
  2260 + }
  2261 + message = pdf.getFilename() +
  2262 + " already has attachments with keys that conflict with"
  2263 + " attachments from other files: " + message +
  2264 + ". Use --prefix with --copy-attachments-from"
  2265 + " or manually copy individual attachments.";
  2266 + throw std::runtime_error(message);
  2267 + }
  2268 +}
  2269 +
  2270 +void
  2271 +QPDFJob::handleTransformations(QPDF& pdf)
  2272 +{
  2273 + QPDFJob& o = *this; // QXXXQ
  2274 + QPDFPageDocumentHelper dh(pdf);
  2275 + PointerHolder<QPDFAcroFormDocumentHelper> afdh;
  2276 + auto make_afdh = [&]() {
  2277 + if (! afdh.getPointer())
  2278 + {
  2279 + afdh = new QPDFAcroFormDocumentHelper(pdf);
  2280 + }
  2281 + };
  2282 + if (o.externalize_inline_images)
  2283 + {
  2284 + std::vector<QPDFPageObjectHelper> pages = dh.getAllPages();
  2285 + for (std::vector<QPDFPageObjectHelper>::iterator iter = pages.begin();
  2286 + iter != pages.end(); ++iter)
  2287 + {
  2288 + QPDFPageObjectHelper& ph(*iter);
  2289 + ph.externalizeInlineImages(o.ii_min_bytes);
  2290 + }
  2291 + }
  2292 + if (o.optimize_images)
  2293 + {
  2294 + int pageno = 0;
  2295 + std::vector<QPDFPageObjectHelper> pages = dh.getAllPages();
  2296 + for (std::vector<QPDFPageObjectHelper>::iterator iter = pages.begin();
  2297 + iter != pages.end(); ++iter)
  2298 + {
  2299 + ++pageno;
  2300 + QPDFPageObjectHelper& ph(*iter);
  2301 + QPDFObjectHandle page = ph.getObjectHandle();
  2302 + std::map<std::string, QPDFObjectHandle> images = ph.getImages();
  2303 + for (auto& iter2: images)
  2304 + {
  2305 + std::string name = iter2.first;
  2306 + QPDFObjectHandle& image = iter2.second;
  2307 + ImageOptimizer* io = new ImageOptimizer(o, image);
  2308 + PointerHolder<QPDFObjectHandle::StreamDataProvider> sdp(io);
  2309 + if (io->evaluate("image " + name + " on page " +
  2310 + QUtil::int_to_string(pageno)))
  2311 + {
  2312 + QPDFObjectHandle new_image =
  2313 + QPDFObjectHandle::newStream(&pdf);
  2314 + new_image.replaceDict(image.getDict().shallowCopy());
  2315 + new_image.replaceStreamData(
  2316 + sdp,
  2317 + QPDFObjectHandle::newName("/DCTDecode"),
  2318 + QPDFObjectHandle::newNull());
  2319 + ph.getAttribute("/Resources", true).
  2320 + getKey("/XObject").replaceKey(
  2321 + name, new_image);
  2322 + }
  2323 + }
  2324 + }
  2325 + }
  2326 + if (o.generate_appearances)
  2327 + {
  2328 + make_afdh();
  2329 + afdh->generateAppearancesIfNeeded();
  2330 + }
  2331 + if (o.flatten_annotations)
  2332 + {
  2333 + dh.flattenAnnotations(o.flatten_annotations_required,
  2334 + o.flatten_annotations_forbidden);
  2335 + }
  2336 + if (o.coalesce_contents)
  2337 + {
  2338 + std::vector<QPDFPageObjectHelper> pages = dh.getAllPages();
  2339 + for (std::vector<QPDFPageObjectHelper>::iterator iter = pages.begin();
  2340 + iter != pages.end(); ++iter)
  2341 + {
  2342 + (*iter).coalesceContentStreams();
  2343 + }
  2344 + }
  2345 + if (o.flatten_rotation)
  2346 + {
  2347 + make_afdh();
  2348 + for (auto& page: dh.getAllPages())
  2349 + {
  2350 + page.flattenRotation(afdh.getPointer());
  2351 + }
  2352 + }
  2353 + if (o.remove_page_labels)
  2354 + {
  2355 + pdf.getRoot().removeKey("/PageLabels");
  2356 + }
  2357 + if (! o.attachments_to_remove.empty())
  2358 + {
  2359 + QPDFEmbeddedFileDocumentHelper efdh(pdf);
  2360 + for (auto const& key: o.attachments_to_remove)
  2361 + {
  2362 + if (efdh.removeEmbeddedFile(key))
  2363 + {
  2364 + if (o.verbose)
  2365 + {
  2366 + std::cout << whoami <<
  2367 + ": removed attachment " << key << std::endl;
  2368 + }
  2369 + }
  2370 + else
  2371 + {
  2372 + throw std::runtime_error("attachment " + key + " not found");
  2373 + }
  2374 + }
  2375 + }
  2376 + if (! o.attachments_to_add.empty())
  2377 + {
  2378 + addAttachments(pdf);
  2379 + }
  2380 + if (! o.attachments_to_copy.empty())
  2381 + {
  2382 + copyAttachments(pdf);
  2383 + }
  2384 +}
  2385 +
  2386 +static bool should_remove_unreferenced_resources(QPDF& pdf, QPDFJob& o)
  2387 +{
  2388 + if (o.remove_unreferenced_page_resources == QPDFJob::re_no)
  2389 + {
  2390 + return false;
  2391 + }
  2392 + else if (o.remove_unreferenced_page_resources == QPDFJob::re_yes)
  2393 + {
  2394 + return true;
  2395 + }
  2396 +
  2397 + // Unreferenced resources are common in files where resources
  2398 + // dictionaries are shared across pages. As a heuristic, we look
  2399 + // in the file for shared resources dictionaries or shared XObject
  2400 + // subkeys of resources dictionaries either on pages or on form
  2401 + // XObjects in pages. If we find any, then there is a higher
  2402 + // likelihood that the expensive process of finding unreferenced
  2403 + // resources is worth it.
  2404 +
  2405 + // Return true as soon as we find any shared resources.
  2406 +
  2407 + std::set<QPDFObjGen> resources_seen; // shared resources detection
  2408 + std::set<QPDFObjGen> nodes_seen; // loop detection
  2409 +
  2410 + if (o.verbose)
  2411 + {
  2412 + std::cout << whoami << ": " << pdf.getFilename()
  2413 + << ": checking for shared resources" << std::endl;
  2414 + }
  2415 +
  2416 + std::list<QPDFObjectHandle> queue;
  2417 + queue.push_back(pdf.getRoot().getKey("/Pages"));
  2418 + while (! queue.empty())
  2419 + {
  2420 + QPDFObjectHandle node = *queue.begin();
  2421 + queue.pop_front();
  2422 + QPDFObjGen og = node.getObjGen();
  2423 + if (nodes_seen.count(og))
  2424 + {
  2425 + continue;
  2426 + }
  2427 + nodes_seen.insert(og);
  2428 + QPDFObjectHandle dict = node.isStream() ? node.getDict() : node;
  2429 + QPDFObjectHandle kids = dict.getKey("/Kids");
  2430 + if (kids.isArray())
  2431 + {
  2432 + // This is a non-leaf node.
  2433 + if (dict.hasKey("/Resources"))
  2434 + {
  2435 + QTC::TC("qpdf", "qpdf found resources in non-leaf");
  2436 + if (o.verbose)
  2437 + {
  2438 + std::cout << " found resources in non-leaf page node "
  2439 + << og.getObj() << " " << og.getGen()
  2440 + << std::endl;
  2441 + }
  2442 + return true;
  2443 + }
  2444 + int n = kids.getArrayNItems();
  2445 + for (int i = 0; i < n; ++i)
  2446 + {
  2447 + queue.push_back(kids.getArrayItem(i));
  2448 + }
  2449 + }
  2450 + else
  2451 + {
  2452 + // This is a leaf node or a form XObject.
  2453 + QPDFObjectHandle resources = dict.getKey("/Resources");
  2454 + if (resources.isIndirect())
  2455 + {
  2456 + QPDFObjGen resources_og = resources.getObjGen();
  2457 + if (resources_seen.count(resources_og))
  2458 + {
  2459 + QTC::TC("qpdf", "qpdf found shared resources in leaf");
  2460 + if (o.verbose)
  2461 + {
  2462 + std::cout << " found shared resources in leaf node "
  2463 + << og.getObj() << " " << og.getGen()
  2464 + << ": "
  2465 + << resources_og.getObj() << " "
  2466 + << resources_og.getGen()
  2467 + << std::endl;
  2468 + }
  2469 + return true;
  2470 + }
  2471 + resources_seen.insert(resources_og);
  2472 + }
  2473 + QPDFObjectHandle xobject = (resources.isDictionary() ?
  2474 + resources.getKey("/XObject") :
  2475 + QPDFObjectHandle::newNull());
  2476 + if (xobject.isIndirect())
  2477 + {
  2478 + QPDFObjGen xobject_og = xobject.getObjGen();
  2479 + if (resources_seen.count(xobject_og))
  2480 + {
  2481 + QTC::TC("qpdf", "qpdf found shared xobject in leaf");
  2482 + if (o.verbose)
  2483 + {
  2484 + std::cout << " found shared xobject in leaf node "
  2485 + << og.getObj() << " " << og.getGen()
  2486 + << ": "
  2487 + << xobject_og.getObj() << " "
  2488 + << xobject_og.getGen()
  2489 + << std::endl;
  2490 + }
  2491 + return true;
  2492 + }
  2493 + resources_seen.insert(xobject_og);
  2494 + }
  2495 + if (xobject.isDictionary())
  2496 + {
  2497 + for (auto const& k: xobject.getKeys())
  2498 + {
  2499 + QPDFObjectHandle xobj = xobject.getKey(k);
  2500 + if (xobj.isStream() &&
  2501 + xobj.getDict().getKey("/Type").isName() &&
  2502 + ("/XObject" ==
  2503 + xobj.getDict().getKey("/Type").getName()) &&
  2504 + xobj.getDict().getKey("/Subtype").isName() &&
  2505 + ("/Form" ==
  2506 + xobj.getDict().getKey("/Subtype").getName()))
  2507 + {
  2508 + queue.push_back(xobj);
  2509 + }
  2510 + }
  2511 + }
  2512 + }
  2513 + }
  2514 +
  2515 + if (o.verbose)
  2516 + {
  2517 + std::cout << whoami << ": no shared resources found" << std::endl;
  2518 + }
  2519 + return false;
  2520 +}
  2521 +
  2522 +static QPDFObjectHandle added_page(QPDF& pdf, QPDFObjectHandle page)
  2523 +{
  2524 + QPDFObjectHandle result = page;
  2525 + if (page.getOwningQPDF() != &pdf)
  2526 + {
  2527 + // Calling copyForeignObject on an object we already copied
  2528 + // will give us the already existing copy.
  2529 + result = pdf.copyForeignObject(page);
  2530 + }
  2531 + return result;
  2532 +}
  2533 +
  2534 +static QPDFObjectHandle added_page(QPDF& pdf, QPDFPageObjectHelper page)
  2535 +{
  2536 + return added_page(pdf, page.getObjectHandle());
  2537 +}
  2538 +
  2539 +static void handle_page_specs(
  2540 + QPDF& pdf, QPDFJob& o, bool& warnings,
  2541 + std::vector<PointerHolder<QPDF>>& page_heap)
  2542 +{
  2543 + // Parse all page specifications and translate them into lists of
  2544 + // actual pages.
  2545 +
  2546 + // Handle "." as a shortcut for the input file
  2547 + for (std::vector<QPDFJob::PageSpec>::iterator iter = o.page_specs.begin();
  2548 + iter != o.page_specs.end(); ++iter)
  2549 + {
  2550 + QPDFJob::PageSpec& page_spec = *iter;
  2551 + if (page_spec.filename == ".")
  2552 + {
  2553 + page_spec.filename = o.infilename;
  2554 + }
  2555 + }
  2556 +
  2557 + if (! o.keep_files_open_set)
  2558 + {
  2559 + // Count the number of distinct files to determine whether we
  2560 + // should keep files open or not. Rather than trying to code
  2561 + // some portable heuristic based on OS limits, just hard-code
  2562 + // this at a given number and allow users to override.
  2563 + std::set<std::string> filenames;
  2564 + for (std::vector<QPDFJob::PageSpec>::iterator iter = o.page_specs.begin();
  2565 + iter != o.page_specs.end(); ++iter)
  2566 + {
  2567 + QPDFJob::PageSpec& page_spec = *iter;
  2568 + filenames.insert(page_spec.filename);
  2569 + }
  2570 + if (filenames.size() > o.keep_files_open_threshold)
  2571 + {
  2572 + QTC::TC("qpdf", "qpdf disable keep files open");
  2573 + if (o.verbose)
  2574 + {
  2575 + std::cout << whoami << ": selecting --keep-open-files=n"
  2576 + << std::endl;
  2577 + }
  2578 + o.keep_files_open = false;
  2579 + }
  2580 + else
  2581 + {
  2582 + if (o.verbose)
  2583 + {
  2584 + std::cout << whoami << ": selecting --keep-open-files=y"
  2585 + << std::endl;
  2586 + }
  2587 + o.keep_files_open = true;
  2588 + QTC::TC("qpdf", "qpdf don't disable keep files open");
  2589 + }
  2590 + }
  2591 +
  2592 + // Create a QPDF object for each file that we may take pages from.
  2593 + std::map<std::string, QPDF*> page_spec_qpdfs;
  2594 + std::map<std::string, ClosedFileInputSource*> page_spec_cfis;
  2595 + page_spec_qpdfs[o.infilename] = &pdf;
  2596 + std::vector<QPDFPageData> parsed_specs;
  2597 + std::map<unsigned long long, std::set<QPDFObjGen> > copied_pages;
  2598 + for (std::vector<QPDFJob::PageSpec>::iterator iter = o.page_specs.begin();
  2599 + iter != o.page_specs.end(); ++iter)
  2600 + {
  2601 + QPDFJob::PageSpec& page_spec = *iter;
  2602 + if (page_spec_qpdfs.count(page_spec.filename) == 0)
  2603 + {
  2604 + // Open the PDF file and store the QPDF object. Throw a
  2605 + // PointerHolder to the qpdf into a heap so that it
  2606 + // survives through copying to the output but gets cleaned up
  2607 + // automatically at the end. Do not canonicalize the file
  2608 + // name. Using two different paths to refer to the same
  2609 + // file is a document workaround for duplicating a page.
  2610 + // If you are using this an example of how to do this with
  2611 + // the API, you can just create two different QPDF objects
  2612 + // to the same underlying file with the same path to
  2613 + // achieve the same affect.
  2614 + char const* password = page_spec.password;
  2615 + if (o.encryption_file && (password == 0) &&
  2616 + (page_spec.filename == o.encryption_file))
  2617 + {
  2618 + QTC::TC("qpdf", "qpdf pages encryption password");
  2619 + password = o.encryption_file_password;
  2620 + }
  2621 + if (o.verbose)
  2622 + {
  2623 + std::cout << whoami << ": processing "
  2624 + << page_spec.filename << std::endl;
  2625 + }
  2626 + PointerHolder<InputSource> is;
  2627 + ClosedFileInputSource* cis = 0;
  2628 + if (! o.keep_files_open)
  2629 + {
  2630 + QTC::TC("qpdf", "qpdf keep files open n");
  2631 + cis = new ClosedFileInputSource(page_spec.filename.c_str());
  2632 + is = cis;
  2633 + cis->stayOpen(true);
  2634 + }
  2635 + else
  2636 + {
  2637 + QTC::TC("qpdf", "qpdf keep files open y");
  2638 + FileInputSource* fis = new FileInputSource();
  2639 + is = fis;
  2640 + fis->setFilename(page_spec.filename.c_str());
  2641 + }
  2642 + PointerHolder<QPDF> qpdf_ph = process_input_source(is, password, o);
  2643 + page_heap.push_back(qpdf_ph);
  2644 + page_spec_qpdfs[page_spec.filename] = qpdf_ph.getPointer();
  2645 + if (cis)
  2646 + {
  2647 + cis->stayOpen(false);
  2648 + page_spec_cfis[page_spec.filename] = cis;
  2649 + }
  2650 + }
  2651 +
  2652 + // Read original pages from the PDF, and parse the page range
  2653 + // associated with this occurrence of the file.
  2654 + parsed_specs.push_back(
  2655 + QPDFPageData(page_spec.filename,
  2656 + page_spec_qpdfs[page_spec.filename],
  2657 + page_spec.range));
  2658 + }
  2659 +
  2660 + std::map<unsigned long long, bool> remove_unreferenced;
  2661 + if (o.remove_unreferenced_page_resources != QPDFJob::re_no)
  2662 + {
  2663 + for (std::map<std::string, QPDF*>::iterator iter =
  2664 + page_spec_qpdfs.begin();
  2665 + iter != page_spec_qpdfs.end(); ++iter)
  2666 + {
  2667 + std::string const& filename = (*iter).first;
  2668 + ClosedFileInputSource* cis = 0;
  2669 + if (page_spec_cfis.count(filename))
  2670 + {
  2671 + cis = page_spec_cfis[filename];
  2672 + cis->stayOpen(true);
  2673 + }
  2674 + QPDF& other(*((*iter).second));
  2675 + auto other_uuid = other.getUniqueId();
  2676 + if (remove_unreferenced.count(other_uuid) == 0)
  2677 + {
  2678 + remove_unreferenced[other_uuid] =
  2679 + should_remove_unreferenced_resources(other, o);
  2680 + }
  2681 + if (cis)
  2682 + {
  2683 + cis->stayOpen(false);
  2684 + }
  2685 + }
  2686 + }
  2687 +
  2688 + // Clear all pages out of the primary QPDF's pages tree but leave
  2689 + // the objects in place in the file so they can be re-added
  2690 + // without changing their object numbers. This enables other
  2691 + // things in the original file, such as outlines, to continue to
  2692 + // work.
  2693 + if (o.verbose)
  2694 + {
  2695 + std::cout << whoami
  2696 + << ": removing unreferenced pages from primary input"
  2697 + << std::endl;
  2698 + }
  2699 + QPDFPageDocumentHelper dh(pdf);
  2700 + std::vector<QPDFPageObjectHelper> orig_pages = dh.getAllPages();
  2701 + for (std::vector<QPDFPageObjectHelper>::iterator iter =
  2702 + orig_pages.begin();
  2703 + iter != orig_pages.end(); ++iter)
  2704 + {
  2705 + dh.removePage(*iter);
  2706 + }
  2707 +
  2708 + if (o.collate && (parsed_specs.size() > 1))
  2709 + {
  2710 + // Collate the pages by selecting one page from each spec in
  2711 + // order. When a spec runs out of pages, stop selecting from
  2712 + // it.
  2713 + std::vector<QPDFPageData> new_parsed_specs;
  2714 + size_t nspecs = parsed_specs.size();
  2715 + size_t cur_page = 0;
  2716 + bool got_pages = true;
  2717 + while (got_pages)
  2718 + {
  2719 + got_pages = false;
  2720 + for (size_t i = 0; i < nspecs; ++i)
  2721 + {
  2722 + QPDFPageData& page_data = parsed_specs.at(i);
  2723 + for (size_t j = 0; j < o.collate; ++j)
  2724 + {
  2725 + if (cur_page + j < page_data.selected_pages.size())
  2726 + {
  2727 + got_pages = true;
  2728 + new_parsed_specs.push_back(
  2729 + QPDFPageData(
  2730 + page_data,
  2731 + page_data.selected_pages.at(cur_page + j)));
  2732 + }
  2733 + }
  2734 + }
  2735 + cur_page += o.collate;
  2736 + }
  2737 + parsed_specs = new_parsed_specs;
  2738 + }
  2739 +
  2740 + // Add all the pages from all the files in the order specified.
  2741 + // Keep track of any pages from the original file that we are
  2742 + // selecting.
  2743 + std::set<int> selected_from_orig;
  2744 + std::vector<QPDFObjectHandle> new_labels;
  2745 + bool any_page_labels = false;
  2746 + int out_pageno = 0;
  2747 + std::map<unsigned long long,
  2748 + PointerHolder<QPDFAcroFormDocumentHelper>> afdh_map;
  2749 + auto this_afdh = get_afdh_for_qpdf(afdh_map, &pdf);
  2750 + std::set<QPDFObjGen> referenced_fields;
  2751 + for (std::vector<QPDFPageData>::iterator iter =
  2752 + parsed_specs.begin();
  2753 + iter != parsed_specs.end(); ++iter)
  2754 + {
  2755 + QPDFPageData& page_data = *iter;
  2756 + ClosedFileInputSource* cis = 0;
  2757 + if (page_spec_cfis.count(page_data.filename))
  2758 + {
  2759 + cis = page_spec_cfis[page_data.filename];
  2760 + cis->stayOpen(true);
  2761 + }
  2762 + QPDFPageLabelDocumentHelper pldh(*page_data.qpdf);
  2763 + auto other_afdh = get_afdh_for_qpdf(afdh_map, page_data.qpdf);
  2764 + if (pldh.hasPageLabels())
  2765 + {
  2766 + any_page_labels = true;
  2767 + }
  2768 + if (o.verbose)
  2769 + {
  2770 + std::cout << whoami << ": adding pages from "
  2771 + << page_data.filename << std::endl;
  2772 + }
  2773 + for (std::vector<int>::iterator pageno_iter =
  2774 + page_data.selected_pages.begin();
  2775 + pageno_iter != page_data.selected_pages.end();
  2776 + ++pageno_iter, ++out_pageno)
  2777 + {
  2778 + // Pages are specified from 1 but numbered from 0 in the
  2779 + // vector
  2780 + int pageno = *pageno_iter - 1;
  2781 + pldh.getLabelsForPageRange(pageno, pageno, out_pageno,
  2782 + new_labels);
  2783 + QPDFPageObjectHelper to_copy =
  2784 + page_data.orig_pages.at(QIntC::to_size(pageno));
  2785 + QPDFObjGen to_copy_og = to_copy.getObjectHandle().getObjGen();
  2786 + unsigned long long from_uuid = page_data.qpdf->getUniqueId();
  2787 + if (copied_pages[from_uuid].count(to_copy_og))
  2788 + {
  2789 + QTC::TC("qpdf", "qpdf copy same page more than once",
  2790 + (page_data.qpdf == &pdf) ? 0 : 1);
  2791 + to_copy = to_copy.shallowCopyPage();
  2792 + }
  2793 + else
  2794 + {
  2795 + copied_pages[from_uuid].insert(to_copy_og);
  2796 + if (remove_unreferenced[from_uuid])
  2797 + {
  2798 + to_copy.removeUnreferencedResources();
  2799 + }
  2800 + }
  2801 + dh.addPage(to_copy, false);
  2802 + bool first_copy_from_orig = false;
  2803 + bool this_file = (page_data.qpdf == &pdf);
  2804 + if (this_file)
  2805 + {
  2806 + // This is a page from the original file. Keep track
  2807 + // of the fact that we are using it.
  2808 + first_copy_from_orig = (selected_from_orig.count(pageno) == 0);
  2809 + selected_from_orig.insert(pageno);
  2810 + }
  2811 + auto new_page = added_page(pdf, to_copy);
  2812 + // Try to avoid gratuitously renaming fields. In the case
  2813 + // of where we're just extracting a bunch of pages from
  2814 + // the original file and not copying any page more than
  2815 + // once, there's no reason to do anything with the fields.
  2816 + // Since we don't remove fields from the original file
  2817 + // until all copy operations are completed, any foreign
  2818 + // pages that conflict with original pages will be
  2819 + // adjusted. If we copy any page from the original file
  2820 + // more than once, that page would be in conflict with the
  2821 + // previous copy of itself.
  2822 + if (other_afdh->hasAcroForm() &&
  2823 + ((! this_file) || (! first_copy_from_orig)))
  2824 + {
  2825 + if (! this_file)
  2826 + {
  2827 + QTC::TC("qpdf", "qpdf copy fields not this file");
  2828 + }
  2829 + else if (! first_copy_from_orig)
  2830 + {
  2831 + QTC::TC("qpdf", "qpdf copy fields non-first from orig");
  2832 + }
  2833 + try
  2834 + {
  2835 + this_afdh->fixCopiedAnnotations(
  2836 + new_page, to_copy.getObjectHandle(), *other_afdh,
  2837 + &referenced_fields);
  2838 + }
  2839 + catch (std::exception& e)
  2840 + {
  2841 + pdf.warn(
  2842 + QPDFExc(qpdf_e_damaged_pdf, pdf.getFilename(),
  2843 + "", 0, "Exception caught while fixing copied"
  2844 + " annotations. This may be a qpdf bug. " +
  2845 + std::string("Exception: ") + e.what()));
  2846 + }
  2847 + }
  2848 + }
  2849 + if (page_data.qpdf->anyWarnings())
  2850 + {
  2851 + warnings = true;
  2852 + }
  2853 + if (cis)
  2854 + {
  2855 + cis->stayOpen(false);
  2856 + }
  2857 + }
  2858 + if (any_page_labels)
  2859 + {
  2860 + QPDFObjectHandle page_labels =
  2861 + QPDFObjectHandle::newDictionary();
  2862 + page_labels.replaceKey(
  2863 + "/Nums", QPDFObjectHandle::newArray(new_labels));
  2864 + pdf.getRoot().replaceKey("/PageLabels", page_labels);
  2865 + }
  2866 +
  2867 + // Delete page objects for unused page in primary. This prevents
  2868 + // those objects from being preserved by being referred to from
  2869 + // other places, such as the outlines dictionary. Also make sure
  2870 + // we keep form fields from pages we preserved.
  2871 + for (size_t pageno = 0; pageno < orig_pages.size(); ++pageno)
  2872 + {
  2873 + auto page = orig_pages.at(pageno);
  2874 + if (selected_from_orig.count(QIntC::to_int(pageno)))
  2875 + {
  2876 + for (auto field: this_afdh->getFormFieldsForPage(page))
  2877 + {
  2878 + QTC::TC("qpdf", "qpdf pages keeping field from original");
  2879 + referenced_fields.insert(field.getObjectHandle().getObjGen());
  2880 + }
  2881 + }
  2882 + else
  2883 + {
  2884 + pdf.replaceObject(
  2885 + page.getObjectHandle().getObjGen(),
  2886 + QPDFObjectHandle::newNull());
  2887 + }
  2888 + }
  2889 + // Remove unreferenced form fields
  2890 + if (this_afdh->hasAcroForm())
  2891 + {
  2892 + auto acroform = pdf.getRoot().getKey("/AcroForm");
  2893 + auto fields = acroform.getKey("/Fields");
  2894 + if (fields.isArray())
  2895 + {
  2896 + auto new_fields = QPDFObjectHandle::newArray();
  2897 + if (fields.isIndirect())
  2898 + {
  2899 + new_fields = pdf.makeIndirectObject(new_fields);
  2900 + }
  2901 + for (auto const& field: fields.aitems())
  2902 + {
  2903 + if (referenced_fields.count(field.getObjGen()))
  2904 + {
  2905 + new_fields.appendItem(field);
  2906 + }
  2907 + }
  2908 + if (new_fields.getArrayNItems() > 0)
  2909 + {
  2910 + QTC::TC("qpdf", "qpdf keep some fields in pages");
  2911 + acroform.replaceKey("/Fields", new_fields);
  2912 + }
  2913 + else
  2914 + {
  2915 + QTC::TC("qpdf", "qpdf no more fields in pages");
  2916 + pdf.getRoot().removeKey("/AcroForm");
  2917 + }
  2918 + }
  2919 + }
  2920 +}
  2921 +
  2922 +static void handle_rotations(QPDF& pdf, QPDFJob& o)
  2923 +{
  2924 + QPDFPageDocumentHelper dh(pdf);
  2925 + std::vector<QPDFPageObjectHelper> pages = dh.getAllPages();
  2926 + int npages = QIntC::to_int(pages.size());
  2927 + for (std::map<std::string, QPDFJob::RotationSpec>::iterator iter =
  2928 + o.rotations.begin();
  2929 + iter != o.rotations.end(); ++iter)
  2930 + {
  2931 + std::string const& range = (*iter).first;
  2932 + QPDFJob::RotationSpec const& rspec = (*iter).second;
  2933 + // range has been previously validated
  2934 + std::vector<int> to_rotate =
  2935 + QUtil::parse_numrange(range.c_str(), npages);
  2936 + for (std::vector<int>::iterator i2 = to_rotate.begin();
  2937 + i2 != to_rotate.end(); ++i2)
  2938 + {
  2939 + int pageno = *i2 - 1;
  2940 + if ((pageno >= 0) && (pageno < npages))
  2941 + {
  2942 + pages.at(QIntC::to_size(pageno)).rotatePage(
  2943 + rspec.angle, rspec.relative);
  2944 + }
  2945 + }
  2946 + }
  2947 +}
  2948 +
  2949 +static void maybe_fix_write_password(int R, QPDFJob& o, std::string& password)
  2950 +{
  2951 + switch (o.password_mode)
  2952 + {
  2953 + case QPDFJob::pm_bytes:
  2954 + QTC::TC("qpdf", "qpdf password mode bytes");
  2955 + break;
  2956 +
  2957 + case QPDFJob::pm_hex_bytes:
  2958 + QTC::TC("qpdf", "qpdf password mode hex-bytes");
  2959 + password = QUtil::hex_decode(password);
  2960 + break;
  2961 +
  2962 + case QPDFJob::pm_unicode:
  2963 + case QPDFJob::pm_auto:
  2964 + {
  2965 + bool has_8bit_chars;
  2966 + bool is_valid_utf8;
  2967 + bool is_utf16;
  2968 + QUtil::analyze_encoding(password,
  2969 + has_8bit_chars,
  2970 + is_valid_utf8,
  2971 + is_utf16);
  2972 + if (! has_8bit_chars)
  2973 + {
  2974 + return;
  2975 + }
  2976 + if (o.password_mode == QPDFJob::pm_unicode)
  2977 + {
  2978 + if (! is_valid_utf8)
  2979 + {
  2980 + QTC::TC("qpdf", "qpdf password not unicode");
  2981 + throw std::runtime_error(
  2982 + "supplied password is not valid UTF-8");
  2983 + }
  2984 + if (R < 5)
  2985 + {
  2986 + std::string encoded;
  2987 + if (! QUtil::utf8_to_pdf_doc(password, encoded))
  2988 + {
  2989 + QTC::TC("qpdf", "qpdf password not encodable");
  2990 + throw std::runtime_error(
  2991 + "supplied password cannot be encoded for"
  2992 + " 40-bit or 128-bit encryption formats");
  2993 + }
  2994 + password = encoded;
  2995 + }
  2996 + }
  2997 + else
  2998 + {
  2999 + if ((R < 5) && is_valid_utf8)
  3000 + {
  3001 + std::string encoded;
  3002 + if (QUtil::utf8_to_pdf_doc(password, encoded))
  3003 + {
  3004 + QTC::TC("qpdf", "qpdf auto-encode password");
  3005 + if (o.verbose)
  3006 + {
  3007 + std::cout
  3008 + << whoami
  3009 + << ": automatically converting Unicode"
  3010 + << " password to single-byte encoding as"
  3011 + << " required for 40-bit or 128-bit"
  3012 + << " encryption" << std::endl;
  3013 + }
  3014 + password = encoded;
  3015 + }
  3016 + else
  3017 + {
  3018 + QTC::TC("qpdf", "qpdf bytes fallback warning");
  3019 + std::cerr
  3020 + << whoami << ": WARNING: "
  3021 + << "supplied password looks like a Unicode"
  3022 + << " password with characters not allowed in"
  3023 + << " passwords for 40-bit and 128-bit encryption;"
  3024 + << " most readers will not be able to open this"
  3025 + << " file with the supplied password."
  3026 + << " (Use --password-mode=bytes to suppress this"
  3027 + << " warning and use the password anyway.)"
  3028 + << std::endl;
  3029 + }
  3030 + }
  3031 + else if ((R >= 5) && (! is_valid_utf8))
  3032 + {
  3033 + QTC::TC("qpdf", "qpdf invalid utf-8 in auto");
  3034 + throw std::runtime_error(
  3035 + "supplied password is not a valid Unicode password,"
  3036 + " which is required for 256-bit encryption; to"
  3037 + " really use this password, rerun with the"
  3038 + " --password-mode=bytes option");
  3039 + }
  3040 + }
  3041 + }
  3042 + break;
  3043 + }
  3044 +}
  3045 +
  3046 +static void set_encryption_options(QPDF& pdf, QPDFJob& o, QPDFWriter& w)
  3047 +{
  3048 + int R = 0;
  3049 + if (o.keylen == 40)
  3050 + {
  3051 + R = 2;
  3052 + }
  3053 + else if (o.keylen == 128)
  3054 + {
  3055 + if (o.force_V4 || o.cleartext_metadata || o.use_aes)
  3056 + {
  3057 + R = 4;
  3058 + }
  3059 + else
  3060 + {
  3061 + R = 3;
  3062 + }
  3063 + }
  3064 + else if (o.keylen == 256)
  3065 + {
  3066 + if (o.force_R5)
  3067 + {
  3068 + R = 5;
  3069 + }
  3070 + else
  3071 + {
  3072 + R = 6;
  3073 + }
  3074 + }
  3075 + else
  3076 + {
  3077 + throw std::logic_error("bad encryption keylen");
  3078 + }
  3079 + if ((R > 3) && (o.r3_accessibility == false))
  3080 + {
  3081 + std::cerr << whoami
  3082 + << ": -accessibility=n is ignored for modern"
  3083 + << " encryption formats" << std::endl;
  3084 + }
  3085 + maybe_fix_write_password(R, o, o.user_password);
  3086 + maybe_fix_write_password(R, o, o.owner_password);
  3087 + if ((R < 4) || ((R == 4) && (! o.use_aes)))
  3088 + {
  3089 + if (! o.allow_weak_crypto)
  3090 + {
  3091 + // Do not set exit code to EXIT_WARNING for this case as
  3092 + // this does not reflect a potential problem with the
  3093 + // input file.
  3094 + QTC::TC("qpdf", "qpdf weak crypto warning");
  3095 + std::cerr
  3096 + << whoami
  3097 + << ": writing a file with RC4, a weak cryptographic algorithm"
  3098 + << std::endl
  3099 + << "Please use 256-bit keys for better security."
  3100 + << std::endl
  3101 + << "Pass --allow-weak-crypto to suppress this warning."
  3102 + << std::endl
  3103 + << "This will become an error in a future version of qpdf."
  3104 + << std::endl;
  3105 + }
  3106 + }
  3107 + switch (R)
  3108 + {
  3109 + case 2:
  3110 + w.setR2EncryptionParameters(
  3111 + o.user_password.c_str(), o.owner_password.c_str(),
  3112 + o.r2_print, o.r2_modify, o.r2_extract, o.r2_annotate);
  3113 + break;
  3114 + case 3:
  3115 + w.setR3EncryptionParameters(
  3116 + o.user_password.c_str(), o.owner_password.c_str(),
  3117 + o.r3_accessibility, o.r3_extract,
  3118 + o.r3_assemble, o.r3_annotate_and_form,
  3119 + o.r3_form_filling, o.r3_modify_other,
  3120 + o.r3_print);
  3121 + break;
  3122 + case 4:
  3123 + w.setR4EncryptionParameters(
  3124 + o.user_password.c_str(), o.owner_password.c_str(),
  3125 + o.r3_accessibility, o.r3_extract,
  3126 + o.r3_assemble, o.r3_annotate_and_form,
  3127 + o.r3_form_filling, o.r3_modify_other,
  3128 + o.r3_print, !o.cleartext_metadata, o.use_aes);
  3129 + break;
  3130 + case 5:
  3131 + w.setR5EncryptionParameters(
  3132 + o.user_password.c_str(), o.owner_password.c_str(),
  3133 + o.r3_accessibility, o.r3_extract,
  3134 + o.r3_assemble, o.r3_annotate_and_form,
  3135 + o.r3_form_filling, o.r3_modify_other,
  3136 + o.r3_print, !o.cleartext_metadata);
  3137 + break;
  3138 + case 6:
  3139 + w.setR6EncryptionParameters(
  3140 + o.user_password.c_str(), o.owner_password.c_str(),
  3141 + o.r3_accessibility, o.r3_extract,
  3142 + o.r3_assemble, o.r3_annotate_and_form,
  3143 + o.r3_form_filling, o.r3_modify_other,
  3144 + o.r3_print, !o.cleartext_metadata);
  3145 + break;
  3146 + default:
  3147 + throw std::logic_error("bad encryption R value");
  3148 + break;
  3149 + }
  3150 +}
  3151 +
  3152 +void
  3153 +QPDFJob::setWriterOptions(QPDF& pdf, QPDFWriter& w)
  3154 +{
  3155 + QPDFJob& o = *this; // QXXXQ
  3156 + if (o.compression_level >= 0)
  3157 + {
  3158 + Pl_Flate::setCompressionLevel(o.compression_level);
  3159 + }
  3160 + if (o.qdf_mode)
  3161 + {
  3162 + w.setQDFMode(true);
  3163 + }
  3164 + if (o.preserve_unreferenced_objects)
  3165 + {
  3166 + w.setPreserveUnreferencedObjects(true);
  3167 + }
  3168 + if (o.newline_before_endstream)
  3169 + {
  3170 + w.setNewlineBeforeEndstream(true);
  3171 + }
  3172 + if (o.normalize_set)
  3173 + {
  3174 + w.setContentNormalization(o.normalize);
  3175 + }
  3176 + if (o.stream_data_set)
  3177 + {
  3178 + w.setStreamDataMode(o.stream_data_mode);
  3179 + }
  3180 + if (o.compress_streams_set)
  3181 + {
  3182 + w.setCompressStreams(o.compress_streams);
  3183 + }
  3184 + if (o.recompress_flate_set)
  3185 + {
  3186 + w.setRecompressFlate(o.recompress_flate);
  3187 + }
  3188 + if (o.decode_level_set)
  3189 + {
  3190 + w.setDecodeLevel(o.decode_level);
  3191 + }
  3192 + if (o.decrypt)
  3193 + {
  3194 + w.setPreserveEncryption(false);
  3195 + }
  3196 + if (o.deterministic_id)
  3197 + {
  3198 + w.setDeterministicID(true);
  3199 + }
  3200 + if (o.static_id)
  3201 + {
  3202 + w.setStaticID(true);
  3203 + }
  3204 + if (o.static_aes_iv)
  3205 + {
  3206 + w.setStaticAesIV(true);
  3207 + }
  3208 + if (o.suppress_original_object_id)
  3209 + {
  3210 + w.setSuppressOriginalObjectIDs(true);
  3211 + }
  3212 + if (o.copy_encryption)
  3213 + {
  3214 + PointerHolder<QPDF> encryption_pdf =
  3215 + processFile(o.encryption_file, o.encryption_file_password);
  3216 + w.copyEncryptionParameters(*encryption_pdf);
  3217 + }
  3218 + if (o.encrypt)
  3219 + {
  3220 + set_encryption_options(pdf, o, w);
  3221 + }
  3222 + if (o.linearize)
  3223 + {
  3224 + w.setLinearization(true);
  3225 + }
  3226 + if (! o.linearize_pass1.empty())
  3227 + {
  3228 + w.setLinearizationPass1Filename(o.linearize_pass1);
  3229 + }
  3230 + if (o.object_stream_set)
  3231 + {
  3232 + w.setObjectStreamMode(o.object_stream_mode);
  3233 + }
  3234 + if (! o.min_version.empty())
  3235 + {
  3236 + std::string version;
  3237 + int extension_level = 0;
  3238 + parse_version(o.min_version, version, extension_level);
  3239 + w.setMinimumPDFVersion(version, extension_level);
  3240 + }
  3241 + if (! o.force_version.empty())
  3242 + {
  3243 + std::string version;
  3244 + int extension_level = 0;
  3245 + parse_version(o.force_version, version, extension_level);
  3246 + w.forcePDFVersion(version, extension_level);
  3247 + }
  3248 + if (o.progress && o.outfilename)
  3249 + {
  3250 + w.registerProgressReporter(new ProgressReporter(o.outfilename));
  3251 + }
  3252 +}
  3253 +
  3254 +void
  3255 +QPDFJob::doSplitPages(QPDF& pdf, bool& warnings)
  3256 +{
  3257 + QPDFJob& o = *this; // QXXXQ
  3258 + // Generate output file pattern
  3259 + std::string before;
  3260 + std::string after;
  3261 + size_t len = strlen(o.outfilename);
  3262 + char* num_spot = strstr(const_cast<char*>(o.outfilename), "%d");
  3263 + if (num_spot != 0)
  3264 + {
  3265 + QTC::TC("qpdf", "qpdf split-pages %d");
  3266 + before = std::string(o.outfilename,
  3267 + QIntC::to_size(num_spot - o.outfilename));
  3268 + after = num_spot + 2;
  3269 + }
  3270 + else if ((len >= 4) &&
  3271 + (QUtil::str_compare_nocase(
  3272 + o.outfilename + len - 4, ".pdf") == 0))
  3273 + {
  3274 + QTC::TC("qpdf", "qpdf split-pages .pdf");
  3275 + before = std::string(o.outfilename, len - 4) + "-";
  3276 + after = o.outfilename + len - 4;
  3277 + }
  3278 + else
  3279 + {
  3280 + QTC::TC("qpdf", "qpdf split-pages other");
  3281 + before = std::string(o.outfilename) + "-";
  3282 + }
  3283 +
  3284 + if (should_remove_unreferenced_resources(pdf, o))
  3285 + {
  3286 + QPDFPageDocumentHelper dh(pdf);
  3287 + dh.removeUnreferencedResources();
  3288 + }
  3289 + QPDFPageLabelDocumentHelper pldh(pdf);
  3290 + QPDFAcroFormDocumentHelper afdh(pdf);
  3291 + std::vector<QPDFObjectHandle> const& pages = pdf.getAllPages();
  3292 + size_t pageno_len = QUtil::uint_to_string(pages.size()).length();
  3293 + size_t num_pages = pages.size();
  3294 + for (size_t i = 0; i < num_pages; i += QIntC::to_size(o.split_pages))
  3295 + {
  3296 + size_t first = i + 1;
  3297 + size_t last = i + QIntC::to_size(o.split_pages);
  3298 + if (last > num_pages)
  3299 + {
  3300 + last = num_pages;
  3301 + }
  3302 + QPDF outpdf;
  3303 + outpdf.emptyPDF();
  3304 + PointerHolder<QPDFAcroFormDocumentHelper> out_afdh;
  3305 + if (afdh.hasAcroForm())
  3306 + {
  3307 + out_afdh = new QPDFAcroFormDocumentHelper(outpdf);
  3308 + }
  3309 + if (o.suppress_warnings)
  3310 + {
  3311 + outpdf.setSuppressWarnings(true);
  3312 + }
  3313 + for (size_t pageno = first; pageno <= last; ++pageno)
  3314 + {
  3315 + QPDFObjectHandle page = pages.at(pageno - 1);
  3316 + outpdf.addPage(page, false);
  3317 + auto new_page = added_page(outpdf, page);
  3318 + if (out_afdh.getPointer())
  3319 + {
  3320 + QTC::TC("qpdf", "qpdf copy form fields in split_pages");
  3321 + try
  3322 + {
  3323 + out_afdh->fixCopiedAnnotations(new_page, page, afdh);
  3324 + }
  3325 + catch (std::exception& e)
  3326 + {
  3327 + pdf.warn(
  3328 + QPDFExc(qpdf_e_damaged_pdf, pdf.getFilename(),
  3329 + "", 0, "Exception caught while fixing copied"
  3330 + " annotations. This may be a qpdf bug." +
  3331 + std::string("Exception: ") + e.what()));
  3332 + }
  3333 + }
  3334 + }
  3335 + if (pldh.hasPageLabels())
  3336 + {
  3337 + std::vector<QPDFObjectHandle> labels;
  3338 + pldh.getLabelsForPageRange(
  3339 + QIntC::to_longlong(first - 1),
  3340 + QIntC::to_longlong(last - 1),
  3341 + 0, labels);
  3342 + QPDFObjectHandle page_labels =
  3343 + QPDFObjectHandle::newDictionary();
  3344 + page_labels.replaceKey(
  3345 + "/Nums", QPDFObjectHandle::newArray(labels));
  3346 + outpdf.getRoot().replaceKey("/PageLabels", page_labels);
  3347 + }
  3348 + std::string page_range =
  3349 + QUtil::uint_to_string(first, QIntC::to_int(pageno_len));
  3350 + if (o.split_pages > 1)
  3351 + {
  3352 + page_range += "-" +
  3353 + QUtil::uint_to_string(last, QIntC::to_int(pageno_len));
  3354 + }
  3355 + std::string outfile = before + page_range + after;
  3356 + if (QUtil::same_file(o.infilename, outfile.c_str()))
  3357 + {
  3358 + std::cerr << whoami
  3359 + << ": split pages would overwrite input file with "
  3360 + << outfile << std::endl;
  3361 + exit(EXIT_ERROR); // QXXXQ
  3362 + }
  3363 + QPDFWriter w(outpdf, outfile.c_str());
  3364 + setWriterOptions(outpdf, w);
  3365 + w.write();
  3366 + if (o.verbose)
  3367 + {
  3368 + std::cout << whoami << ": wrote file " << outfile << std::endl;
  3369 + }
  3370 + if (outpdf.anyWarnings())
  3371 + {
  3372 + warnings = true;
  3373 + }
  3374 + }
  3375 +}
  3376 +
  3377 +void
  3378 +QPDFJob::writeOutfile(QPDF& pdf)
  3379 +{
  3380 + QPDFJob& o = *this; // QXXXQ
  3381 + std::string temp_out;
  3382 + if (o.replace_input)
  3383 + {
  3384 + // Append but don't prepend to the path to generate a
  3385 + // temporary name. This saves us from having to split the path
  3386 + // by directory and non-directory.
  3387 + temp_out = std::string(o.infilename) + ".~qpdf-temp#";
  3388 + // o.outfilename will be restored to 0 before temp_out
  3389 + // goes out of scope.
  3390 + o.outfilename = temp_out.c_str();
  3391 + }
  3392 + else if (strcmp(o.outfilename, "-") == 0)
  3393 + {
  3394 + o.outfilename = 0;
  3395 + }
  3396 + {
  3397 + // Private scope so QPDFWriter will close the output file
  3398 + QPDFWriter w(pdf, o.outfilename);
  3399 + setWriterOptions(pdf, w);
  3400 + w.write();
  3401 + }
  3402 + if (o.verbose && o.outfilename)
  3403 + {
  3404 + std::cout << whoami << ": wrote file "
  3405 + << o.outfilename << std::endl;
  3406 + }
  3407 + if (o.replace_input)
  3408 + {
  3409 + o.outfilename = 0;
  3410 + }
  3411 + if (o.replace_input)
  3412 + {
  3413 + // We must close the input before we can rename files
  3414 + pdf.closeInputSource();
  3415 + std::string backup = std::string(o.infilename) + ".~qpdf-orig";
  3416 + bool warnings = pdf.anyWarnings();
  3417 + if (! warnings)
  3418 + {
  3419 + backup.append(1, '#');
  3420 + }
  3421 + QUtil::rename_file(o.infilename, backup.c_str());
  3422 + QUtil::rename_file(temp_out.c_str(), o.infilename);
  3423 + if (warnings)
  3424 + {
  3425 + std::cerr << whoami
  3426 + << ": there are warnings; original file kept in "
  3427 + << backup << std::endl;
  3428 + }
  3429 + else
  3430 + {
  3431 + try
  3432 + {
  3433 + QUtil::remove_file(backup.c_str());
  3434 + }
  3435 + catch (QPDFSystemError& e)
  3436 + {
  3437 + std::cerr
  3438 + << whoami
  3439 + << ": unable to delete original file ("
  3440 + << e.what() << ");"
  3441 + << " original file left in " << backup
  3442 + << ", but the input was successfully replaced"
  3443 + << std::endl;
  3444 + }
  3445 + }
  3446 + }
  3447 +}
  3448 +
  3449 +void
  3450 +QPDFJob::run()
  3451 +{
  3452 + QPDFJob& o = *this; // QXXXQ
  3453 + PointerHolder<QPDF> pdf_ph;
  3454 + try
  3455 + {
  3456 + pdf_ph = processFile(o.infilename, o.password);
  3457 + }
  3458 + catch (QPDFExc& e)
  3459 + {
  3460 + if ((e.getErrorCode() == qpdf_e_password) &&
  3461 + (o.check_is_encrypted || o.check_requires_password))
  3462 + {
  3463 + // Allow --is-encrypted and --requires-password to
  3464 + // work when an incorrect password is supplied.
  3465 + this->m->encryption_status =
  3466 + qpdf_es_encrypted |
  3467 + qpdf_es_password_incorrect;
  3468 + return;
  3469 + }
  3470 + throw e;
  3471 + }
  3472 + QPDF& pdf = *pdf_ph;
  3473 + if (pdf.isEncrypted())
  3474 + {
  3475 + this->m->encryption_status = qpdf_es_encrypted;
  3476 + }
  3477 +
  3478 + if (o.check_is_encrypted || o.check_requires_password)
  3479 + {
  3480 + return;
  3481 + }
  3482 + bool other_warnings = false;
  3483 + std::vector<PointerHolder<QPDF>> page_heap;
  3484 + if (! o.page_specs.empty())
  3485 + {
  3486 + handle_page_specs(pdf, o, other_warnings, page_heap);
  3487 + }
  3488 + if (! o.rotations.empty())
  3489 + {
  3490 + handle_rotations(pdf, o);
  3491 + }
  3492 + handleUnderOverlay(pdf);
  3493 + handleTransformations(pdf);
  3494 +
  3495 + this->m->creates_output = ((o.outfilename != nullptr) || o.replace_input);
  3496 + if (! this->m->creates_output)
  3497 + {
  3498 + do_inspection(pdf, o);
  3499 + }
  3500 + else if (o.split_pages)
  3501 + {
  3502 + doSplitPages(pdf, other_warnings);
  3503 + }
  3504 + else
  3505 + {
  3506 + writeOutfile(pdf);
  3507 + }
  3508 + if (! pdf.getWarnings().empty())
  3509 + {
  3510 + this->m->warnings = true;
  3511 + }
  3512 +}
  3513 +
  3514 +bool
  3515 +QPDFJob::hasWarnings()
  3516 +{
  3517 + return this->m->warnings;
  3518 +}
  3519 +
  3520 +bool
  3521 +QPDFJob::createsOutput()
  3522 +{
  3523 + return this->m->creates_output;
  3524 +}
  3525 +
  3526 +unsigned long
  3527 +QPDFJob::getEncryptionStatus()
  3528 +{
  3529 + return this->m->encryption_status;
  3530 +}
  3531 +
  3532 +bool
  3533 +QPDFJob::suppressWarnings()
  3534 +{
  3535 + return this->suppress_warnings;
  3536 +}
  3537 +
  3538 +bool
  3539 +QPDFJob::checkRequiresPassword()
  3540 +{
  3541 + return this->check_requires_password;
  3542 +}
  3543 +
  3544 +bool
  3545 +QPDFJob::checkIsEncrypted()
  3546 +{
  3547 + return this->check_is_encrypted;
  3548 +}
... ...
libqpdf/build.mk
... ... @@ -64,6 +64,7 @@ SRCS_libqpdf = \
64 64 libqpdf/QPDFExc.cc \
65 65 libqpdf/QPDFFileSpecObjectHelper.cc \
66 66 libqpdf/QPDFFormFieldObjectHelper.cc \
  67 + libqpdf/QPDFJob.cc \
67 68 libqpdf/QPDFMatrix.cc \
68 69 libqpdf/QPDFNameTreeObjectHelper.cc \
69 70 libqpdf/QPDFNumberTreeObjectHelper.cc \
... ...
qpdf/qpdf.cc
  1 +// QXXXQ update headers
  2 +
1 3 #include <iostream>
2 4 #include <string.h>
3 5 #include <stdlib.h>
4   -#include <fcntl.h>
5   -#include <stdio.h>
  6 +//#include <fcntl.h>
  7 +#include <cstdio>
6 8 #include <ctype.h>
7 9 #include <memory>
8 10  
9 11 #include <qpdf/QUtil.hh>
10 12 #include <qpdf/QTC.hh>
11   -#include <qpdf/ClosedFileInputSource.hh>
12   -#include <qpdf/FileInputSource.hh>
13   -#include <qpdf/Pl_StdioFile.hh>
14   -#include <qpdf/Pl_Discard.hh>
15   -#include <qpdf/Pl_DCT.hh>
16   -#include <qpdf/Pl_Count.hh>
17   -#include <qpdf/Pl_Flate.hh>
18   -#include <qpdf/PointerHolder.hh>
19   -
20   -#include <qpdf/QPDF.hh>
21   -#include <qpdf/QPDFPageDocumentHelper.hh>
22   -#include <qpdf/QPDFPageObjectHelper.hh>
23   -#include <qpdf/QPDFPageLabelDocumentHelper.hh>
24   -#include <qpdf/QPDFOutlineDocumentHelper.hh>
25   -#include <qpdf/QPDFAcroFormDocumentHelper.hh>
26   -#include <qpdf/QPDFExc.hh>
27   -#include <qpdf/QPDFSystemError.hh>
28 13 #include <qpdf/QPDFCryptoProvider.hh>
29   -#include <qpdf/QPDFEmbeddedFileDocumentHelper.hh>
30 14 #include <qpdf/QPDFArgParser.hh>
31   -
32   -#include <qpdf/QPDFWriter.hh>
  15 +#include <qpdf/QPDFJob.hh>
33 16 #include <qpdf/QIntC.hh>
34 17  
35 18 static int constexpr EXIT_ERROR = 2;
... ... @@ -43,708 +26,10 @@ static char const* whoami = 0;
43 26  
44 27 static std::string expected_version = "10.5.0";
45 28  
46   -struct PageSpec
47   -{
48   - PageSpec(std::string const& filename,
49   - char const* password,
50   - char const* range) :
51   - filename(filename),
52   - password(password),
53   - range(range)
54   - {
55   - }
56   -
57   - std::string filename;
58   - char const* password;
59   - char const* range;
60   -};
61   -
62   -struct RotationSpec
63   -{
64   - RotationSpec(int angle = 0, bool relative = false) :
65   - angle(angle),
66   - relative(relative)
67   - {
68   - }
69   -
70   - int angle;
71   - bool relative;
72   -};
73   -
74   -enum password_mode_e { pm_bytes, pm_hex_bytes, pm_unicode, pm_auto };
75   -
76   -struct UnderOverlay
77   -{
78   - UnderOverlay(char const* which) :
79   - which(which),
80   - filename(0),
81   - password(0),
82   - to_nr("1-z"),
83   - from_nr("1-z"),
84   - repeat_nr("")
85   - {
86   - }
87   -
88   - std::string which;
89   - char const* filename;
90   - char const* password;
91   - char const* to_nr;
92   - char const* from_nr;
93   - char const* repeat_nr;
94   - PointerHolder<QPDF> pdf;
95   - std::vector<int> to_pagenos;
96   - std::vector<int> from_pagenos;
97   - std::vector<int> repeat_pagenos;
98   -};
99   -
100   -struct AddAttachment
101   -{
102   - AddAttachment() :
103   - replace(false)
104   - {
105   - }
106   -
107   - std::string path;
108   - std::string key;
109   - std::string filename;
110   - std::string creationdate;
111   - std::string moddate;
112   - std::string mimetype;
113   - std::string description;
114   - bool replace;
115   -};
116   -
117   -struct CopyAttachmentFrom
118   -{
119   - std::string path;
120   - std::string password;
121   - std::string prefix;
122   -};
123   -
124   -
125   -enum remove_unref_e { re_auto, re_yes, re_no };
126   -
127   -struct Options
128   -{
129   - Options() :
130   - password(0),
131   - linearize(false),
132   - decrypt(false),
133   - split_pages(0),
134   - verbose(false),
135   - progress(false),
136   - suppress_warnings(false),
137   - copy_encryption(false),
138   - encryption_file(0),
139   - encryption_file_password(0),
140   - encrypt(false),
141   - password_is_hex_key(false),
142   - suppress_password_recovery(false),
143   - password_mode(pm_auto),
144   - allow_insecure(false),
145   - allow_weak_crypto(false),
146   - keylen(0),
147   - r2_print(true),
148   - r2_modify(true),
149   - r2_extract(true),
150   - r2_annotate(true),
151   - r3_accessibility(true),
152   - r3_extract(true),
153   - r3_assemble(true),
154   - r3_annotate_and_form(true),
155   - r3_form_filling(true),
156   - r3_modify_other(true),
157   - r3_print(qpdf_r3p_full),
158   - force_V4(false),
159   - force_R5(false),
160   - cleartext_metadata(false),
161   - use_aes(false),
162   - stream_data_set(false),
163   - stream_data_mode(qpdf_s_compress),
164   - compress_streams(true),
165   - compress_streams_set(false),
166   - recompress_flate(false),
167   - recompress_flate_set(false),
168   - compression_level(-1),
169   - decode_level(qpdf_dl_generalized),
170   - decode_level_set(false),
171   - normalize_set(false),
172   - normalize(false),
173   - suppress_recovery(false),
174   - object_stream_set(false),
175   - object_stream_mode(qpdf_o_preserve),
176   - ignore_xref_streams(false),
177   - qdf_mode(false),
178   - preserve_unreferenced_objects(false),
179   - remove_unreferenced_page_resources(re_auto),
180   - keep_files_open(true),
181   - keep_files_open_set(false),
182   - keep_files_open_threshold(200), // default known in help and docs
183   - newline_before_endstream(false),
184   - coalesce_contents(false),
185   - flatten_annotations(false),
186   - flatten_annotations_required(0),
187   - flatten_annotations_forbidden(an_invisible | an_hidden),
188   - generate_appearances(false),
189   - show_npages(false),
190   - deterministic_id(false),
191   - static_id(false),
192   - static_aes_iv(false),
193   - suppress_original_object_id(false),
194   - show_encryption(false),
195   - show_encryption_key(false),
196   - check_linearization(false),
197   - show_linearization(false),
198   - show_xref(false),
199   - show_trailer(false),
200   - show_obj(0),
201   - show_gen(0),
202   - show_raw_stream_data(false),
203   - show_filtered_stream_data(false),
204   - show_pages(false),
205   - show_page_images(false),
206   - collate(0),
207   - flatten_rotation(false),
208   - list_attachments(false),
209   - json(false),
210   - check(false),
211   - optimize_images(false),
212   - externalize_inline_images(false),
213   - keep_inline_images(false),
214   - remove_page_labels(false),
215   - oi_min_width(128), // Default values for these
216   - oi_min_height(128), // oi flags are in --help
217   - oi_min_area(16384), // and in the manual.
218   - ii_min_bytes(1024), //
219   - underlay("underlay"),
220   - overlay("overlay"),
221   - under_overlay(0),
222   - require_outfile(true),
223   - replace_input(false),
224   - check_is_encrypted(false),
225   - check_requires_password(false),
226   - infilename(0),
227   - outfilename(0)
228   - {
229   - }
230   -
231   - char const* password;
232   - std::shared_ptr<char> password_alloc;
233   - bool linearize;
234   - bool decrypt;
235   - int split_pages;
236   - bool verbose;
237   - bool progress;
238   - bool suppress_warnings;
239   - bool copy_encryption;
240   - char const* encryption_file;
241   - char const* encryption_file_password;
242   - bool encrypt;
243   - bool password_is_hex_key;
244   - bool suppress_password_recovery;
245   - password_mode_e password_mode;
246   - bool allow_insecure;
247   - bool allow_weak_crypto;
248   - std::string user_password;
249   - std::string owner_password;
250   - int keylen;
251   - bool r2_print;
252   - bool r2_modify;
253   - bool r2_extract;
254   - bool r2_annotate;
255   - bool r3_accessibility;
256   - bool r3_extract;
257   - bool r3_assemble;
258   - bool r3_annotate_and_form;
259   - bool r3_form_filling;
260   - bool r3_modify_other;
261   - qpdf_r3_print_e r3_print;
262   - bool force_V4;
263   - bool force_R5;
264   - bool cleartext_metadata;
265   - bool use_aes;
266   - bool stream_data_set;
267   - qpdf_stream_data_e stream_data_mode;
268   - bool compress_streams;
269   - bool compress_streams_set;
270   - bool recompress_flate;
271   - bool recompress_flate_set;
272   - int compression_level;
273   - qpdf_stream_decode_level_e decode_level;
274   - bool decode_level_set;
275   - bool normalize_set;
276   - bool normalize;
277   - bool suppress_recovery;
278   - bool object_stream_set;
279   - qpdf_object_stream_e object_stream_mode;
280   - bool ignore_xref_streams;
281   - bool qdf_mode;
282   - bool preserve_unreferenced_objects;
283   - remove_unref_e remove_unreferenced_page_resources;
284   - bool keep_files_open;
285   - bool keep_files_open_set;
286   - size_t keep_files_open_threshold;
287   - bool newline_before_endstream;
288   - std::string linearize_pass1;
289   - bool coalesce_contents;
290   - bool flatten_annotations;
291   - int flatten_annotations_required;
292   - int flatten_annotations_forbidden;
293   - bool generate_appearances;
294   - std::string min_version;
295   - std::string force_version;
296   - bool show_npages;
297   - bool deterministic_id;
298   - bool static_id;
299   - bool static_aes_iv;
300   - bool suppress_original_object_id;
301   - bool show_encryption;
302   - bool show_encryption_key;
303   - bool check_linearization;
304   - bool show_linearization;
305   - bool show_xref;
306   - bool show_trailer;
307   - int show_obj;
308   - int show_gen;
309   - bool show_raw_stream_data;
310   - bool show_filtered_stream_data;
311   - bool show_pages;
312   - bool show_page_images;
313   - size_t collate;
314   - bool flatten_rotation;
315   - bool list_attachments;
316   - std::string attachment_to_show;
317   - std::list<std::string> attachments_to_remove;
318   - std::list<AddAttachment> attachments_to_add;
319   - std::list<CopyAttachmentFrom> attachments_to_copy;
320   - bool json;
321   - std::set<std::string> json_keys;
322   - std::set<std::string> json_objects;
323   - bool check;
324   - bool optimize_images;
325   - bool externalize_inline_images;
326   - bool keep_inline_images;
327   - bool remove_page_labels;
328   - size_t oi_min_width;
329   - size_t oi_min_height;
330   - size_t oi_min_area;
331   - size_t ii_min_bytes;
332   - UnderOverlay underlay;
333   - UnderOverlay overlay;
334   - UnderOverlay* under_overlay;
335   - std::vector<PageSpec> page_specs;
336   - std::map<std::string, RotationSpec> rotations;
337   - bool require_outfile;
338   - bool replace_input;
339   - bool check_is_encrypted;
340   - bool check_requires_password;
341   - char const* infilename;
342   - char const* outfilename;
343   -};
344   -
345   -struct QPDFPageData
346   -{
347   - QPDFPageData(std::string const& filename, QPDF* qpdf, char const* range);
348   - QPDFPageData(QPDFPageData const& other, int page);
349   -
350   - std::string filename;
351   - QPDF* qpdf;
352   - std::vector<QPDFObjectHandle> orig_pages;
353   - std::vector<int> selected_pages;
354   -};
355   -
356   -class DiscardContents: public QPDFObjectHandle::ParserCallbacks
357   -{
358   - public:
359   - virtual ~DiscardContents() {}
360   - virtual void handleObject(QPDFObjectHandle) {}
361   - virtual void handleEOF() {}
362   -};
363   -
364   -class ProgressReporter: public QPDFWriter::ProgressReporter
365   -{
366   - public:
367   - ProgressReporter(char const* filename) :
368   - filename(filename)
369   - {
370   - }
371   - virtual ~ProgressReporter()
372   - {
373   - }
374   -
375   - virtual void reportProgress(int);
376   - private:
377   - std::string filename;
378   -};
379   -
380   -void
381   -ProgressReporter::reportProgress(int percentage)
382   -{
383   - std::cout << whoami << ": " << filename << ": write progress: "
384   - << percentage << "%" << std::endl;
385   -}
386   -
387   -static JSON json_schema(std::set<std::string>* keys = 0)
388   -{
389   - // Style: use all lower-case keys with no dashes or underscores.
390   - // Choose array or dictionary based on indexing. For example, we
391   - // use a dictionary for objects because we want to index by object
392   - // ID and an array for pages because we want to index by position.
393   - // The pages in the pages array contain references back to the
394   - // original object, which can be resolved in the objects
395   - // dictionary. When a PDF construct that maps back to an original
396   - // object is represented separately, use "object" as the key that
397   - // references the original object.
398   -
399   - // This JSON object doubles as a schema and as documentation for
400   - // our JSON output. Any schema mismatch is a bug in qpdf. This
401   - // helps to enforce our policy of consistently providing a known
402   - // structure where every documented key will always be present,
403   - // which makes it easier to consume our JSON. This is discussed in
404   - // more depth in the manual.
405   - JSON schema = JSON::makeDictionary();
406   - schema.addDictionaryMember(
407   - "version", JSON::makeString(
408   - "JSON format serial number; increased for non-compatible changes"));
409   - JSON j_params = schema.addDictionaryMember(
410   - "parameters", JSON::makeDictionary());
411   - j_params.addDictionaryMember(
412   - "decodelevel", JSON::makeString(
413   - "decode level used to determine stream filterability"));
414   -
415   - bool all_keys = ((keys == 0) || keys->empty());
416   -
417   - // The list of selectable top-level keys id duplicated in three
418   - // places: json_schema, do_json, and initOptionTable.
419   - if (all_keys || keys->count("objects"))
420   - {
421   - schema.addDictionaryMember(
422   - "objects", JSON::makeString(
423   - "dictionary of original objects;"
424   - " keys are 'trailer' or 'n n R'"));
425   - }
426   - if (all_keys || keys->count("objectinfo"))
427   - {
428   - JSON objectinfo = schema.addDictionaryMember(
429   - "objectinfo", JSON::makeDictionary());
430   - JSON details = objectinfo.addDictionaryMember(
431   - "<object-id>", JSON::makeDictionary());
432   - JSON stream = details.addDictionaryMember(
433   - "stream", JSON::makeDictionary());
434   - stream.addDictionaryMember(
435   - "is",
436   - JSON::makeString("whether the object is a stream"));
437   - stream.addDictionaryMember(
438   - "length",
439   - JSON::makeString("if stream, its length, otherwise null"));
440   - stream.addDictionaryMember(
441   - "filter",
442   - JSON::makeString("if stream, its filters, otherwise null"));
443   - }
444   - if (all_keys || keys->count("pages"))
445   - {
446   - JSON page = schema.addDictionaryMember("pages", JSON::makeArray()).
447   - addArrayElement(JSON::makeDictionary());
448   - page.addDictionaryMember(
449   - "object",
450   - JSON::makeString("reference to original page object"));
451   - JSON image = page.addDictionaryMember("images", JSON::makeArray()).
452   - addArrayElement(JSON::makeDictionary());
453   - image.addDictionaryMember(
454   - "name",
455   - JSON::makeString("name of image in XObject table"));
456   - image.addDictionaryMember(
457   - "object",
458   - JSON::makeString("reference to image stream"));
459   - image.addDictionaryMember(
460   - "width",
461   - JSON::makeString("image width"));
462   - image.addDictionaryMember(
463   - "height",
464   - JSON::makeString("image height"));
465   - image.addDictionaryMember(
466   - "colorspace",
467   - JSON::makeString("color space"));
468   - image.addDictionaryMember(
469   - "bitspercomponent",
470   - JSON::makeString("bits per component"));
471   - image.addDictionaryMember("filter", JSON::makeArray()).
472   - addArrayElement(
473   - JSON::makeString("filters applied to image data"));
474   - image.addDictionaryMember("decodeparms", JSON::makeArray()).
475   - addArrayElement(
476   - JSON::makeString("decode parameters for image data"));
477   - image.addDictionaryMember(
478   - "filterable",
479   - JSON::makeString("whether image data can be decoded"
480   - " using the decode level qpdf was invoked with"));
481   - page.addDictionaryMember("contents", JSON::makeArray()).
482   - addArrayElement(
483   - JSON::makeString("reference to each content stream"));
484   - page.addDictionaryMember(
485   - "label",
486   - JSON::makeString("page label dictionary, or null if none"));
487   - JSON outline = page.addDictionaryMember("outlines", JSON::makeArray()).
488   - addArrayElement(JSON::makeDictionary());
489   - outline.addDictionaryMember(
490   - "object",
491   - JSON::makeString("reference to outline that targets this page"));
492   - outline.addDictionaryMember(
493   - "title",
494   - JSON::makeString("outline title"));
495   - outline.addDictionaryMember(
496   - "dest",
497   - JSON::makeString("outline destination dictionary"));
498   - page.addDictionaryMember(
499   - "pageposfrom1",
500   - JSON::makeString("position of page in document numbering from 1"));
501   - }
502   - if (all_keys || keys->count("pagelabels"))
503   - {
504   - JSON labels = schema.addDictionaryMember(
505   - "pagelabels", JSON::makeArray()).
506   - addArrayElement(JSON::makeDictionary());
507   - labels.addDictionaryMember(
508   - "index",
509   - JSON::makeString("starting page position starting from zero"));
510   - labels.addDictionaryMember(
511   - "label",
512   - JSON::makeString("page label dictionary"));
513   - }
514   - if (all_keys || keys->count("outlines"))
515   - {
516   - JSON outlines = schema.addDictionaryMember(
517   - "outlines", JSON::makeArray()).
518   - addArrayElement(JSON::makeDictionary());
519   - outlines.addDictionaryMember(
520   - "object",
521   - JSON::makeString("reference to this outline"));
522   - outlines.addDictionaryMember(
523   - "title",
524   - JSON::makeString("outline title"));
525   - outlines.addDictionaryMember(
526   - "dest",
527   - JSON::makeString("outline destination dictionary"));
528   - outlines.addDictionaryMember(
529   - "kids",
530   - JSON::makeString("array of descendent outlines"));
531   - outlines.addDictionaryMember(
532   - "open",
533   - JSON::makeString("whether the outline is displayed expanded"));
534   - outlines.addDictionaryMember(
535   - "destpageposfrom1",
536   - JSON::makeString("position of destination page in document"
537   - " numbered from 1; null if not known"));
538   - }
539   - if (all_keys || keys->count("acroform"))
540   - {
541   - JSON acroform = schema.addDictionaryMember(
542   - "acroform", JSON::makeDictionary());
543   - acroform.addDictionaryMember(
544   - "hasacroform",
545   - JSON::makeString("whether the document has interactive forms"));
546   - acroform.addDictionaryMember(
547   - "needappearances",
548   - JSON::makeString("whether the form fields' appearance"
549   - " streams need to be regenerated"));
550   - JSON fields = acroform.addDictionaryMember(
551   - "fields", JSON::makeArray()).
552   - addArrayElement(JSON::makeDictionary());
553   - fields.addDictionaryMember(
554   - "object",
555   - JSON::makeString("reference to this form field"));
556   - fields.addDictionaryMember(
557   - "parent",
558   - JSON::makeString("reference to this field's parent"));
559   - fields.addDictionaryMember(
560   - "pageposfrom1",
561   - JSON::makeString("position of containing page numbered from 1"));
562   - fields.addDictionaryMember(
563   - "fieldtype",
564   - JSON::makeString("field type"));
565   - fields.addDictionaryMember(
566   - "fieldflags",
567   - JSON::makeString(
568   - "form field flags from /Ff --"
569   - " see pdf_form_field_flag_e in qpdf/Constants.h"));
570   - fields.addDictionaryMember(
571   - "fullname",
572   - JSON::makeString("full name of field"));
573   - fields.addDictionaryMember(
574   - "partialname",
575   - JSON::makeString("partial name of field"));
576   - fields.addDictionaryMember(
577   - "alternativename",
578   - JSON::makeString(
579   - "alternative name of field --"
580   - " this is the one usually shown to users"));
581   - fields.addDictionaryMember(
582   - "mappingname",
583   - JSON::makeString("mapping name of field"));
584   - fields.addDictionaryMember(
585   - "value",
586   - JSON::makeString("value of field"));
587   - fields.addDictionaryMember(
588   - "defaultvalue",
589   - JSON::makeString("default value of field"));
590   - fields.addDictionaryMember(
591   - "quadding",
592   - JSON::makeString(
593   - "field quadding --"
594   - " number indicating left, center, or right"));
595   - fields.addDictionaryMember(
596   - "ischeckbox",
597   - JSON::makeString("whether field is a checkbox"));
598   - fields.addDictionaryMember(
599   - "isradiobutton",
600   - JSON::makeString("whether field is a radio button --"
601   - " buttons in a single group share a parent"));
602   - fields.addDictionaryMember(
603   - "ischoice",
604   - JSON::makeString("whether field is a list, combo, or dropdown"));
605   - fields.addDictionaryMember(
606   - "istext",
607   - JSON::makeString("whether field is a text field"));
608   - JSON j_choices = fields.addDictionaryMember(
609   - "choices",
610   - JSON::makeString("for choices fields, the list of"
611   - " choices presented to the user"));
612   - JSON annotation = fields.addDictionaryMember(
613   - "annotation", JSON::makeDictionary());
614   - annotation.addDictionaryMember(
615   - "object",
616   - JSON::makeString("reference to the annotation object"));
617   - annotation.addDictionaryMember(
618   - "appearancestate",
619   - JSON::makeString("appearance state --"
620   - " can be used to determine value for"
621   - " checkboxes and radio buttons"));
622   - annotation.addDictionaryMember(
623   - "annotationflags",
624   - JSON::makeString(
625   - "annotation flags from /F --"
626   - " see pdf_annotation_flag_e in qpdf/Constants.h"));
627   - }
628   - if (all_keys || keys->count("encrypt"))
629   - {
630   - JSON encrypt = schema.addDictionaryMember(
631   - "encrypt", JSON::makeDictionary());
632   - encrypt.addDictionaryMember(
633   - "encrypted",
634   - JSON::makeString("whether the document is encrypted"));
635   - encrypt.addDictionaryMember(
636   - "userpasswordmatched",
637   - JSON::makeString("whether supplied password matched user password;"
638   - " always false for non-encrypted files"));
639   - encrypt.addDictionaryMember(
640   - "ownerpasswordmatched",
641   - JSON::makeString("whether supplied password matched owner password;"
642   - " always false for non-encrypted files"));
643   - JSON capabilities = encrypt.addDictionaryMember(
644   - "capabilities", JSON::makeDictionary());
645   - capabilities.addDictionaryMember(
646   - "accessibility",
647   - JSON::makeString("allow extraction for accessibility?"));
648   - capabilities.addDictionaryMember(
649   - "extract",
650   - JSON::makeString("allow extraction?"));
651   - capabilities.addDictionaryMember(
652   - "printlow",
653   - JSON::makeString("allow low resolution printing?"));
654   - capabilities.addDictionaryMember(
655   - "printhigh",
656   - JSON::makeString("allow high resolution printing?"));
657   - capabilities.addDictionaryMember(
658   - "modifyassembly",
659   - JSON::makeString("allow modifying document assembly?"));
660   - capabilities.addDictionaryMember(
661   - "modifyforms",
662   - JSON::makeString("allow modifying forms?"));
663   - capabilities.addDictionaryMember(
664   - "moddifyannotations",
665   - JSON::makeString("allow modifying annotations?"));
666   - capabilities.addDictionaryMember(
667   - "modifyother",
668   - JSON::makeString("allow other modifications?"));
669   - capabilities.addDictionaryMember(
670   - "modify",
671   - JSON::makeString("allow all modifications?"));
672   -
673   - JSON parameters = encrypt.addDictionaryMember(
674   - "parameters", JSON::makeDictionary());
675   - parameters.addDictionaryMember(
676   - "R",
677   - JSON::makeString("R value from Encrypt dictionary"));
678   - parameters.addDictionaryMember(
679   - "V",
680   - JSON::makeString("V value from Encrypt dictionary"));
681   - parameters.addDictionaryMember(
682   - "P",
683   - JSON::makeString("P value from Encrypt dictionary"));
684   - parameters.addDictionaryMember(
685   - "bits",
686   - JSON::makeString("encryption key bit length"));
687   - parameters.addDictionaryMember(
688   - "key",
689   - JSON::makeString("encryption key; will be null"
690   - " unless --show-encryption-key was specified"));
691   - parameters.addDictionaryMember(
692   - "method",
693   - JSON::makeString("overall encryption method:"
694   - " none, mixed, RC4, AESv2, AESv3"));
695   - parameters.addDictionaryMember(
696   - "streammethod",
697   - JSON::makeString("encryption method for streams"));
698   - parameters.addDictionaryMember(
699   - "stringmethod",
700   - JSON::makeString("encryption method for string"));
701   - parameters.addDictionaryMember(
702   - "filemethod",
703   - JSON::makeString("encryption method for attachments"));
704   - }
705   - if (all_keys || keys->count("attachments"))
706   - {
707   - JSON attachments = schema.addDictionaryMember(
708   - "attachments", JSON::makeDictionary());
709   - JSON details = attachments.addDictionaryMember(
710   - "<attachment-key>", JSON::makeDictionary());
711   - details.addDictionaryMember(
712   - "filespec",
713   - JSON::makeString("object containing the file spec"));
714   - details.addDictionaryMember(
715   - "preferredname",
716   - JSON::makeString("most preferred file name"));
717   - details.addDictionaryMember(
718   - "preferredcontents",
719   - JSON::makeString("most preferred embedded file stream"));
720   - }
721   - return schema;
722   -}
723   -
724   -static void parse_object_id(std::string const& objspec,
725   - bool& trailer, int& obj, int& gen)
726   -{
727   - if (objspec == "trailer")
728   - {
729   - trailer = true;
730   - }
731   - else
732   - {
733   - trailer = false;
734   - obj = QUtil::string_to_int(objspec.c_str());
735   - size_t comma = objspec.find(',');
736   - if ((comma != std::string::npos) && (comma + 1 < objspec.length()))
737   - {
738   - gen = QUtil::string_to_int(
739   - objspec.substr(1 + comma, std::string::npos).c_str());
740   - }
741   - }
742   -}
743   -
744 29 class ArgParser
745 30 {
746 31 public:
747   - ArgParser(int argc, char* argv[], Options& o);
  32 + ArgParser(int argc, char* argv[], QPDFJob& o);
748 33 void parseOptions();
749 34  
750 35 private:
... ... @@ -888,18 +173,18 @@ class ArgParser
888 173 void usage(std::string const& message);
889 174 void initOptionTable();
890 175 void doFinalChecks();
891   - void parseUnderOverlayOptions(UnderOverlay*);
  176 + void parseUnderOverlayOptions(QPDFJob::UnderOverlay*);
892 177 void parseRotationParameter(std::string const&);
893 178 std::vector<int> parseNumrange(char const* range, int max,
894 179 bool throw_error = false);
895 180  
896 181 QPDFArgParser ap;
897   - Options& o;
  182 + QPDFJob& o;
898 183 std::vector<char*> accumulated_args;
899 184 char* pages_password;
900 185 };
901 186  
902   -ArgParser::ArgParser(int argc, char* argv[], Options& o) :
  187 +ArgParser::ArgParser(int argc, char* argv[], QPDFJob& o) :
903 188 ap(argc, argv, "QPDF_EXECUTABLE"),
904 189 o(o),
905 190 pages_password(nullptr)
... ... @@ -1745,7 +1030,7 @@ ArgParser::argJsonHelp()
1745 1030 << std::endl
1746 1031 << "be null, and others will have values that apply to unencrypted files."
1747 1032 << std::endl
1748   - << json_schema().unparse()
  1033 + << QPDFJob::json_schema().unparse()
1749 1034 << std::endl;
1750 1035 }
1751 1036  
... ... @@ -1895,19 +1180,19 @@ ArgParser::argPasswordMode(char* parameter)
1895 1180 {
1896 1181 if (strcmp(parameter, "bytes") == 0)
1897 1182 {
1898   - o.password_mode = pm_bytes;
  1183 + o.password_mode = QPDFJob::pm_bytes;
1899 1184 }
1900 1185 else if (strcmp(parameter, "hex-bytes") == 0)
1901 1186 {
1902   - o.password_mode = pm_hex_bytes;
  1187 + o.password_mode = QPDFJob::pm_hex_bytes;
1903 1188 }
1904 1189 else if (strcmp(parameter, "unicode") == 0)
1905 1190 {
1906   - o.password_mode = pm_unicode;
  1191 + o.password_mode = QPDFJob::pm_unicode;
1907 1192 }
1908 1193 else if (strcmp(parameter, "auto") == 0)
1909 1194 {
1910   - o.password_mode = pm_auto;
  1195 + o.password_mode = QPDFJob::pm_auto;
1911 1196 }
1912 1197 else
1913 1198 {
... ... @@ -2050,7 +1335,7 @@ ArgParser::argPagesPositional(char* arg)
2050 1335 {
2051 1336 range = "1-z";
2052 1337 }
2053   - o.page_specs.push_back(PageSpec(file, this->pages_password, range));
  1338 + o.page_specs.push_back(QPDFJob::PageSpec(file, this->pages_password, range));
2054 1339 this->accumulated_args.clear();
2055 1340 this->pages_password = nullptr;
2056 1341 if (next_file != nullptr)
... ... @@ -2116,14 +1401,14 @@ ArgParser::argRemoveAttachment(char* parameter)
2116 1401 void
2117 1402 ArgParser::argAddAttachment()
2118 1403 {
2119   - o.attachments_to_add.push_back(AddAttachment());
  1404 + o.attachments_to_add.push_back(QPDFJob::AddAttachment());
2120 1405 this->ap.selectOptionTable(O_ATTACHMENT);
2121 1406 }
2122 1407  
2123 1408 void
2124 1409 ArgParser::argCopyAttachments()
2125 1410 {
2126   - o.attachments_to_copy.push_back(CopyAttachmentFrom());
  1411 + o.attachments_to_copy.push_back(QPDFJob::CopyAttachmentFrom());
2127 1412 this->ap.selectOptionTable(O_COPY_ATTACHMENT);
2128 1413 }
2129 1414  
... ... @@ -2257,7 +1542,7 @@ ArgParser::argPreserveUnreferenced()
2257 1542 void
2258 1543 ArgParser::argPreserveUnreferencedResources()
2259 1544 {
2260   - o.remove_unreferenced_page_resources = re_no;
  1545 + o.remove_unreferenced_page_resources = QPDFJob::re_no;
2261 1546 }
2262 1547  
2263 1548 void
... ... @@ -2265,15 +1550,15 @@ ArgParser::argRemoveUnreferencedResources(char* parameter)
2265 1550 {
2266 1551 if (strcmp(parameter, "auto") == 0)
2267 1552 {
2268   - o.remove_unreferenced_page_resources = re_auto;
  1553 + o.remove_unreferenced_page_resources = QPDFJob::re_auto;
2269 1554 }
2270 1555 else if (strcmp(parameter, "yes") == 0)
2271 1556 {
2272   - o.remove_unreferenced_page_resources = re_yes;
  1557 + o.remove_unreferenced_page_resources = QPDFJob::re_yes;
2273 1558 }
2274 1559 else if (strcmp(parameter, "no") == 0)
2275 1560 {
2276   - o.remove_unreferenced_page_resources = re_no;
  1561 + o.remove_unreferenced_page_resources = QPDFJob::re_no;
2277 1562 }
2278 1563 else
2279 1564 {
... ... @@ -2439,7 +1724,7 @@ ArgParser::argShowXref()
2439 1724 void
2440 1725 ArgParser::argShowObject(char* parameter)
2441 1726 {
2442   - parse_object_id(parameter, o.show_trailer, o.show_obj, o.show_gen);
  1727 + QPDFJob::parse_object_id(parameter, o.show_trailer, o.show_obj, o.show_gen);
2443 1728 o.require_outfile = false;
2444 1729 }
2445 1730  
... ... @@ -2911,7 +2196,7 @@ void usageExit(std::string const&amp; msg)
2911 2196 << "Usage: " << whoami << " [options] {infile | --empty} [page_selection_options] outfile" << std::endl
2912 2197 << "For detailed help, run " << whoami << " --help" << std::endl
2913 2198 << std::endl;
2914   - exit(EXIT_ERROR);
  2199 + exit(EXIT_ERROR); // QXXXQ
2915 2200 }
2916 2201  
2917 2202 void
... ... @@ -2928,100 +2213,6 @@ ArgParser::usage(std::string const&amp; message)
2928 2213 }
2929 2214 }
2930 2215  
2931   -static std::string show_bool(bool v)
2932   -{
2933   - return v ? "allowed" : "not allowed";
2934   -}
2935   -
2936   -static std::string show_encryption_method(QPDF::encryption_method_e method)
2937   -{
2938   - std::string result = "unknown";
2939   - switch (method)
2940   - {
2941   - case QPDF::e_none:
2942   - result = "none";
2943   - break;
2944   - case QPDF::e_unknown:
2945   - result = "unknown";
2946   - break;
2947   - case QPDF::e_rc4:
2948   - result = "RC4";
2949   - break;
2950   - case QPDF::e_aes:
2951   - result = "AESv2";
2952   - break;
2953   - case QPDF::e_aesv3:
2954   - result = "AESv3";
2955   - break;
2956   - // no default so gcc will warn for missing case
2957   - }
2958   - return result;
2959   -}
2960   -
2961   -static void show_encryption(QPDF& pdf, Options& o)
2962   -{
2963   - // Extract /P from /Encrypt
2964   - int R = 0;
2965   - int P = 0;
2966   - int V = 0;
2967   - QPDF::encryption_method_e stream_method = QPDF::e_unknown;
2968   - QPDF::encryption_method_e string_method = QPDF::e_unknown;
2969   - QPDF::encryption_method_e file_method = QPDF::e_unknown;
2970   - if (! pdf.isEncrypted(R, P, V,
2971   - stream_method, string_method, file_method))
2972   - {
2973   - std::cout << "File is not encrypted" << std::endl;
2974   - }
2975   - else
2976   - {
2977   - std::cout << "R = " << R << std::endl;
2978   - std::cout << "P = " << P << std::endl;
2979   - std::string user_password = pdf.getTrimmedUserPassword();
2980   - std::string encryption_key = pdf.getEncryptionKey();
2981   - std::cout << "User password = " << user_password << std::endl;
2982   - if (o.show_encryption_key)
2983   - {
2984   - std::cout << "Encryption key = "
2985   - << QUtil::hex_encode(encryption_key) << std::endl;
2986   - }
2987   - if (pdf.ownerPasswordMatched())
2988   - {
2989   - std::cout << "Supplied password is owner password" << std::endl;
2990   - }
2991   - if (pdf.userPasswordMatched())
2992   - {
2993   - std::cout << "Supplied password is user password" << std::endl;
2994   - }
2995   - std::cout << "extract for accessibility: "
2996   - << show_bool(pdf.allowAccessibility()) << std::endl
2997   - << "extract for any purpose: "
2998   - << show_bool(pdf.allowExtractAll()) << std::endl
2999   - << "print low resolution: "
3000   - << show_bool(pdf.allowPrintLowRes()) << std::endl
3001   - << "print high resolution: "
3002   - << show_bool(pdf.allowPrintHighRes()) << std::endl
3003   - << "modify document assembly: "
3004   - << show_bool(pdf.allowModifyAssembly()) << std::endl
3005   - << "modify forms: "
3006   - << show_bool(pdf.allowModifyForm()) << std::endl
3007   - << "modify annotations: "
3008   - << show_bool(pdf.allowModifyAnnotation()) << std::endl
3009   - << "modify other: "
3010   - << show_bool(pdf.allowModifyOther()) << std::endl
3011   - << "modify anything: "
3012   - << show_bool(pdf.allowModifyAll()) << std::endl;
3013   - if (V >= 4)
3014   - {
3015   - std::cout << "stream encryption method: "
3016   - << show_encryption_method(stream_method) << std::endl
3017   - << "string encryption method: "
3018   - << show_encryption_method(string_method) << std::endl
3019   - << "file encryption method: "
3020   - << show_encryption_method(file_method) << std::endl;
3021   - }
3022   - }
3023   -}
3024   -
3025 2216 std::vector<int>
3026 2217 ArgParser::parseNumrange(char const* range, int max, bool throw_error)
3027 2218 {
... ... @@ -3044,54 +2235,12 @@ ArgParser::parseNumrange(char const* range, int max, bool throw_error)
3044 2235 }
3045 2236  
3046 2237 void
3047   -ArgParser::parseUnderOverlayOptions(UnderOverlay* uo)
  2238 +ArgParser::parseUnderOverlayOptions(QPDFJob::UnderOverlay* uo)
3048 2239 {
3049 2240 o.under_overlay = uo;
3050 2241 this->ap.selectOptionTable(O_UNDER_OVERLAY);
3051 2242 }
3052 2243  
3053   -QPDFPageData::QPDFPageData(std::string const& filename,
3054   - QPDF* qpdf,
3055   - char const* range) :
3056   - filename(filename),
3057   - qpdf(qpdf),
3058   - orig_pages(qpdf->getAllPages())
3059   -{
3060   - try
3061   - {
3062   - this->selected_pages =
3063   - QUtil::parse_numrange(range,
3064   - QIntC::to_int(this->orig_pages.size()));
3065   - }
3066   - catch (std::runtime_error& e)
3067   - {
3068   - usageExit("parsing numeric range for " + filename + ": " + e.what());
3069   - }
3070   -}
3071   -
3072   -QPDFPageData::QPDFPageData(QPDFPageData const& other, int page) :
3073   - filename(other.filename),
3074   - qpdf(other.qpdf),
3075   - orig_pages(other.orig_pages)
3076   -{
3077   - this->selected_pages.push_back(page);
3078   -}
3079   -
3080   -static void parse_version(std::string const& full_version_string,
3081   - std::string& version, int& extension_level)
3082   -{
3083   - PointerHolder<char> vp(true, QUtil::copy_string(full_version_string));
3084   - char* v = vp.getPointer();
3085   - char* p1 = strchr(v, '.');
3086   - char* p2 = (p1 ? strchr(1 + p1, '.') : 0);
3087   - if (p2 && *(p2 + 1))
3088   - {
3089   - *p2++ = '\0';
3090   - extension_level = QUtil::string_to_int(p2);
3091   - }
3092   - version = v;
3093   -}
3094   -
3095 2244 void
3096 2245 ArgParser::parseRotationParameter(std::string const& parameter)
3097 2246 {
... ... @@ -3150,7 +2299,7 @@ ArgParser::parseRotationParameter(std::string const&amp; parameter)
3150 2299 {
3151 2300 angle = -angle;
3152 2301 }
3153   - o.rotations[range] = RotationSpec(angle, (relative != 0));
  2302 + o.rotations[range] = QPDFJob::RotationSpec(angle, (relative != 0));
3154 2303 }
3155 2304 else
3156 2305 {
... ... @@ -3255,2813 +2404,88 @@ ArgParser::doFinalChecks()
3255 2404 }
3256 2405 }
3257 2406  
3258   -static void set_qpdf_options(QPDF& pdf, Options& o)
  2407 +int realmain(int argc, char* argv[])
3259 2408 {
3260   - if (o.ignore_xref_streams)
3261   - {
3262   - pdf.setIgnoreXRefStreams(true);
3263   - }
3264   - if (o.suppress_recovery)
3265   - {
3266   - pdf.setAttemptRecovery(false);
3267   - }
3268   - if (o.password_is_hex_key)
3269   - {
3270   - pdf.setPasswordIsHexKey(true);
3271   - }
3272   - if (o.suppress_warnings)
  2409 + whoami = QUtil::getWhoami(argv[0]);
  2410 + QUtil::setLineBuf(stdout);
  2411 +
  2412 + // Remove prefix added by libtool for consistency during testing.
  2413 + if (strncmp(whoami, "lt-", 3) == 0)
3273 2414 {
3274   - pdf.setSuppressWarnings(true);
  2415 + whoami += 3;
3275 2416 }
3276   -}
3277 2417  
3278   -static void do_check(QPDF& pdf, Options& o, int& exit_code)
3279   -{
3280   - // Code below may set okay to false but not to true.
3281   - // We assume okay until we prove otherwise but may
3282   - // continue to perform additional checks after finding
3283   - // errors.
3284   - bool okay = true;
3285   - bool warnings = false;
3286   - std::cout << "checking " << o.infilename << std::endl;
  2418 + // ArgParser must stay in scope for the duration of qpdf's run as
  2419 + // it holds dynamic memory used for argv.
  2420 + QPDFJob j;
  2421 + ArgParser ap(argc, argv, j);
  2422 +
  2423 + bool errors = false;
3287 2424 try
3288 2425 {
3289   - int extension_level = pdf.getExtensionLevel();
3290   - std::cout << "PDF Version: " << pdf.getPDFVersion();
3291   - if (extension_level > 0)
3292   - {
3293   - std::cout << " extension level "
3294   - << pdf.getExtensionLevel();
3295   - }
3296   - std::cout << std::endl;
3297   - show_encryption(pdf, o);
3298   - if (pdf.isLinearized())
3299   - {
3300   - std::cout << "File is linearized\n";
3301   - // any errors or warnings are reported by
3302   - // checkLinearization(). We treat all issues reported here
3303   - // as warnings.
3304   - if (! pdf.checkLinearization())
3305   - {
3306   - warnings = true;
3307   - }
3308   - }
3309   - else
3310   - {
3311   - std::cout << "File is not linearized\n";
3312   - }
3313   -
3314   - // Write the file no nowhere, uncompressing
3315   - // streams. This causes full file traversal and
3316   - // decoding of all streams we can decode.
3317   - QPDFWriter w(pdf);
3318   - Pl_Discard discard;
3319   - w.setOutputPipeline(&discard);
3320   - w.setDecodeLevel(qpdf_dl_all);
3321   - w.write();
3322   -
3323   - // Parse all content streams
3324   - QPDFPageDocumentHelper dh(pdf);
3325   - std::vector<QPDFPageObjectHelper> pages = dh.getAllPages();
3326   - DiscardContents discard_contents;
3327   - int pageno = 0;
3328   - for (std::vector<QPDFPageObjectHelper>::iterator iter =
3329   - pages.begin();
3330   - iter != pages.end(); ++iter)
3331   - {
3332   - QPDFPageObjectHelper& page(*iter);
3333   - ++pageno;
3334   - try
3335   - {
3336   - page.parseContents(&discard_contents);
3337   - }
3338   - catch (QPDFExc& e)
3339   - {
3340   - okay = false;
3341   - std::cerr << "ERROR: page " << pageno << ": "
3342   - << e.what() << std::endl;
3343   - }
3344   - }
  2426 + ap.parseOptions();
  2427 + j.run();
3345 2428 }
3346 2429 catch (std::exception& e)
3347 2430 {
3348   - std::cerr << "ERROR: " << e.what() << std::endl;
3349   - okay = false;
3350   - }
3351   - if (okay)
3352   - {
3353   - if ((! pdf.getWarnings().empty()) || warnings)
3354   - {
3355   - exit_code = EXIT_WARNING;
3356   - }
3357   - else
3358   - {
3359   - std::cout << "No syntax or stream encoding errors"
3360   - << " found; the file may still contain"
3361   - << std::endl
3362   - << "errors that qpdf cannot detect"
3363   - << std::endl;
3364   - }
3365   - }
3366   - else
3367   - {
3368   - exit_code = EXIT_ERROR;
3369   - }
3370   -}
3371   -
3372   -static void do_show_obj(QPDF& pdf, Options& o, int& exit_code)
3373   -{
3374   - QPDFObjectHandle obj;
3375   - if (o.show_trailer)
3376   - {
3377   - obj = pdf.getTrailer();
3378   - }
3379   - else
3380   - {
3381   - obj = pdf.getObjectByID(o.show_obj, o.show_gen);
3382   - }
3383   - if (obj.isStream())
3384   - {
3385   - if (o.show_raw_stream_data || o.show_filtered_stream_data)
3386   - {
3387   - bool filter = o.show_filtered_stream_data;
3388   - if (filter &&
3389   - (! obj.pipeStreamData(0, 0, qpdf_dl_all)))
3390   - {
3391   - QTC::TC("qpdf", "qpdf unable to filter");
3392   - std::cerr << "Unable to filter stream data."
3393   - << std::endl;
3394   - exit_code = EXIT_ERROR;
3395   - }
3396   - else
3397   - {
3398   - QUtil::binary_stdout();
3399   - Pl_StdioFile out("stdout", stdout);
3400   - obj.pipeStreamData(
3401   - &out,
3402   - (filter && o.normalize) ? qpdf_ef_normalize : 0,
3403   - filter ? qpdf_dl_all : qpdf_dl_none);
3404   - }
3405   - }
3406   - else
3407   - {
3408   - std::cout
3409   - << "Object is stream. Dictionary:" << std::endl
3410   - << obj.getDict().unparseResolved() << std::endl;
3411   - }
3412   - }
3413   - else
3414   - {
3415   - std::cout << obj.unparseResolved() << std::endl;
  2431 + std::cerr << whoami << ": " << e.what() << std::endl;
  2432 + errors = true;
3416 2433 }
3417   -}
3418   -
3419   -static void do_show_pages(QPDF& pdf, Options& o)
3420   -{
3421   - QPDFPageDocumentHelper dh(pdf);
3422   - std::vector<QPDFPageObjectHelper> pages = dh.getAllPages();
3423   - int pageno = 0;
3424   - for (std::vector<QPDFPageObjectHelper>::iterator iter = pages.begin();
3425   - iter != pages.end(); ++iter)
3426   - {
3427   - QPDFPageObjectHelper& ph(*iter);
3428   - QPDFObjectHandle page = ph.getObjectHandle();
3429   - ++pageno;
3430   -
3431   - std::cout << "page " << pageno << ": "
3432   - << page.getObjectID() << " "
3433   - << page.getGeneration() << " R" << std::endl;
3434   - if (o.show_page_images)
3435   - {
3436   - std::map<std::string, QPDFObjectHandle> images = ph.getImages();
3437   - if (! images.empty())
3438   - {
3439   - std::cout << " images:" << std::endl;
3440   - for (auto const& iter2: images)
3441   - {
3442   - std::string const& name = iter2.first;
3443   - QPDFObjectHandle image = iter2.second;
3444   - QPDFObjectHandle dict = image.getDict();
3445   - int width =
3446   - dict.getKey("/Width").getIntValueAsInt();
3447   - int height =
3448   - dict.getKey("/Height").getIntValueAsInt();
3449   - std::cout << " " << name << ": "
3450   - << image.unparse()
3451   - << ", " << width << " x " << height
3452   - << std::endl;
3453   - }
3454   - }
3455   - }
3456 2434  
3457   - std::cout << " content:" << std::endl;
3458   - std::vector<QPDFObjectHandle> content =
3459   - ph.getPageContents();
3460   - for (auto& iter2: content)
3461   - {
3462   - std::cout << " " << iter2.unparse() << std::endl;
3463   - }
3464   - }
3465   -}
  2435 + // QXXXQ
  2436 + bool warnings = j.hasWarnings();
3466 2437  
3467   -static void do_list_attachments(QPDF& pdf, Options& o)
3468   -{
3469   - QPDFEmbeddedFileDocumentHelper efdh(pdf);
3470   - if (efdh.hasEmbeddedFiles())
  2438 + if (warnings)
3471 2439 {
3472   - for (auto const& i: efdh.getEmbeddedFiles())
  2440 + if (! j.suppressWarnings())
3473 2441 {
3474   - std::string const& key = i.first;
3475   - auto efoh = i.second;
3476   - std::cout << key << " -> "
3477   - << efoh->getEmbeddedFileStream().getObjGen()
  2442 + std::cerr << whoami << ": operation succeeded with warnings;"
  2443 + << " resulting file may have some problems"
3478 2444 << std::endl;
3479   - if (o.verbose)
3480   - {
3481   - auto desc = efoh->getDescription();
3482   - if (! desc.empty())
3483   - {
3484   - std::cout << " description: " << desc << std::endl;
3485   - }
3486   - std::cout << " preferred name: " << efoh->getFilename()
3487   - << std::endl;
3488   - std::cout << " all names:" << std::endl;
3489   - for (auto const& i2: efoh->getFilenames())
3490   - {
3491   - std::cout << " " << i2.first << " -> " << i2.second
3492   - << std::endl;
3493   - }
3494   - std::cout << " all data streams:" << std::endl;
3495   - for (auto i2: efoh->getEmbeddedFileStreams().ditems())
3496   - {
3497   - std::cout << " " << i2.first << " -> "
3498   - << i2.second.getObjGen()
3499   - << std::endl;
3500   - }
3501   - }
3502   - }
3503   - }
3504   - else
3505   - {
3506   - std::cout << o.infilename << " has no embedded files" << std::endl;
3507   - }
3508   -}
3509   -
3510   -static void do_show_attachment(QPDF& pdf, Options& o, int& exit_code)
3511   -{
3512   - QPDFEmbeddedFileDocumentHelper efdh(pdf);
3513   - auto fs = efdh.getEmbeddedFile(o.attachment_to_show);
3514   - if (! fs)
3515   - {
3516   - std::cerr << whoami << ": attachment " << o.attachment_to_show
3517   - << " not found" << std::endl;
3518   - exit_code = EXIT_ERROR;
3519   - return;
3520   - }
3521   - auto efs = fs->getEmbeddedFileStream();
3522   - QUtil::binary_stdout();
3523   - Pl_StdioFile out("stdout", stdout);
3524   - efs.pipeStreamData(&out, 0, qpdf_dl_all);
3525   -}
3526   -
3527   -static std::set<QPDFObjGen>
3528   -get_wanted_json_objects(Options& o)
3529   -{
3530   - std::set<QPDFObjGen> wanted_og;
3531   - for (auto const& iter: o.json_objects)
3532   - {
3533   - bool trailer;
3534   - int obj = 0;
3535   - int gen = 0;
3536   - parse_object_id(iter, trailer, obj, gen);
3537   - if (obj)
3538   - {
3539   - wanted_og.insert(QPDFObjGen(obj, gen));
3540 2445 }
  2446 + // Still return with warning code even if warnings were
  2447 + // suppressed, so leave warnings == true.
3541 2448 }
3542   - return wanted_og;
3543   -}
3544 2449  
3545   -static void do_json_objects(QPDF& pdf, Options& o, JSON& j)
3546   -{
3547   - // Add all objects. Do this first before other code below modifies
3548   - // things by doing stuff like calling
3549   - // pushInheritedAttributesToPage.
3550   - bool all_objects = o.json_objects.empty();
3551   - std::set<QPDFObjGen> wanted_og = get_wanted_json_objects(o);
3552   - JSON j_objects = j.addDictionaryMember("objects", JSON::makeDictionary());
3553   - if (all_objects || o.json_objects.count("trailer"))
  2450 + unsigned long encryption_status = j.getEncryptionStatus();
  2451 + if (j.checkIsEncrypted())
3554 2452 {
3555   - j_objects.addDictionaryMember(
3556   - "trailer", pdf.getTrailer().getJSON(true));
3557   - }
3558   - std::vector<QPDFObjectHandle> objects = pdf.getAllObjects();
3559   - for (std::vector<QPDFObjectHandle>::iterator iter = objects.begin();
3560   - iter != objects.end(); ++iter)
3561   - {
3562   - if (all_objects || wanted_og.count((*iter).getObjGen()))
  2453 + if (encryption_status & qpdf_es_encrypted)
3563 2454 {
3564   - j_objects.addDictionaryMember(
3565   - (*iter).unparse(), (*iter).getJSON(true));
  2455 + QTC::TC("qpdf", "qpdf check encrypted encrypted");
  2456 + return 0;
3566 2457 }
3567   - }
3568   -}
3569   -
3570   -static void do_json_objectinfo(QPDF& pdf, Options& o, JSON& j)
3571   -{
3572   - // Do this first before other code below modifies things by doing
3573   - // stuff like calling pushInheritedAttributesToPage.
3574   - bool all_objects = o.json_objects.empty();
3575   - std::set<QPDFObjGen> wanted_og = get_wanted_json_objects(o);
3576   - JSON j_objectinfo = j.addDictionaryMember(
3577   - "objectinfo", JSON::makeDictionary());
3578   - for (auto& obj: pdf.getAllObjects())
3579   - {
3580   - if (all_objects || wanted_og.count(obj.getObjGen()))
  2458 + else
3581 2459 {
3582   - auto j_details = j_objectinfo.addDictionaryMember(
3583   - obj.unparse(), JSON::makeDictionary());
3584   - auto j_stream = j_details.addDictionaryMember(
3585   - "stream", JSON::makeDictionary());
3586   - bool is_stream = obj.isStream();
3587   - j_stream.addDictionaryMember(
3588   - "is", JSON::makeBool(is_stream));
3589   - j_stream.addDictionaryMember(
3590   - "length",
3591   - (is_stream
3592   - ? obj.getDict().getKey("/Length").getJSON(true)
3593   - : JSON::makeNull()));
3594   - j_stream.addDictionaryMember(
3595   - "filter",
3596   - (is_stream
3597   - ? obj.getDict().getKey("/Filter").getJSON(true)
3598   - : JSON::makeNull()));
  2460 + QTC::TC("qpdf", "qpdf check encrypted not encrypted");
  2461 + return EXIT_IS_NOT_ENCRYPTED;
3599 2462 }
3600 2463 }
3601   -}
3602   -
3603   -static void do_json_pages(QPDF& pdf, Options& o, JSON& j)
3604   -{
3605   - JSON j_pages = j.addDictionaryMember("pages", JSON::makeArray());
3606   - QPDFPageDocumentHelper pdh(pdf);
3607   - QPDFPageLabelDocumentHelper pldh(pdf);
3608   - QPDFOutlineDocumentHelper odh(pdf);
3609   - pdh.pushInheritedAttributesToPage();
3610   - std::vector<QPDFPageObjectHelper> pages = pdh.getAllPages();
3611   - int pageno = 0;
3612   - for (std::vector<QPDFPageObjectHelper>::iterator iter = pages.begin();
3613   - iter != pages.end(); ++iter, ++pageno)
  2464 + else if (j.checkRequiresPassword())
3614 2465 {
3615   - JSON j_page = j_pages.addArrayElement(JSON::makeDictionary());
3616   - QPDFPageObjectHelper& ph(*iter);
3617   - QPDFObjectHandle page = ph.getObjectHandle();
3618   - j_page.addDictionaryMember("object", page.getJSON());
3619   - JSON j_images = j_page.addDictionaryMember(
3620   - "images", JSON::makeArray());
3621   - std::map<std::string, QPDFObjectHandle> images = ph.getImages();
3622   - for (auto const& iter2: images)
  2466 + if (encryption_status & qpdf_es_encrypted)
3623 2467 {
3624   - JSON j_image = j_images.addArrayElement(JSON::makeDictionary());
3625   - j_image.addDictionaryMember(
3626   - "name", JSON::makeString(iter2.first));
3627   - QPDFObjectHandle image = iter2.second;
3628   - QPDFObjectHandle dict = image.getDict();
3629   - j_image.addDictionaryMember("object", image.getJSON());
3630   - j_image.addDictionaryMember(
3631   - "width", dict.getKey("/Width").getJSON());
3632   - j_image.addDictionaryMember(
3633   - "height", dict.getKey("/Height").getJSON());
3634   - j_image.addDictionaryMember(
3635   - "colorspace", dict.getKey("/ColorSpace").getJSON());
3636   - j_image.addDictionaryMember(
3637   - "bitspercomponent", dict.getKey("/BitsPerComponent").getJSON());
3638   - QPDFObjectHandle filters = dict.getKey("/Filter").wrapInArray();
3639   - j_image.addDictionaryMember(
3640   - "filter", filters.getJSON());
3641   - QPDFObjectHandle decode_parms = dict.getKey("/DecodeParms");
3642   - QPDFObjectHandle dp_array;
3643   - if (decode_parms.isArray())
  2468 + if (encryption_status & qpdf_es_password_incorrect)
3644 2469 {
3645   - dp_array = decode_parms;
  2470 + QTC::TC("qpdf", "qpdf check password password incorrect");
  2471 + return 0;
3646 2472 }
3647 2473 else
3648 2474 {
3649   - dp_array = QPDFObjectHandle::newArray();
3650   - for (int i = 0; i < filters.getArrayNItems(); ++i)
3651   - {
3652   - dp_array.appendItem(decode_parms);
3653   - }
  2475 + QTC::TC("qpdf", "qpdf check password password correct");
  2476 + return EXIT_CORRECT_PASSWORD;
3654 2477 }
3655   - j_image.addDictionaryMember("decodeparms", dp_array.getJSON());
3656   - j_image.addDictionaryMember(
3657   - "filterable",
3658   - JSON::makeBool(
3659   - image.pipeStreamData(0, 0, o.decode_level, true)));
3660   - }
3661   - j_page.addDictionaryMember("images", j_images);
3662   - JSON j_contents = j_page.addDictionaryMember(
3663   - "contents", JSON::makeArray());
3664   - std::vector<QPDFObjectHandle> content = ph.getPageContents();
3665   - for (auto& iter2: content)
3666   - {
3667   - j_contents.addArrayElement(iter2.getJSON());
3668 2478 }
3669   - j_page.addDictionaryMember(
3670   - "label", pldh.getLabelForPage(pageno).getJSON());
3671   - JSON j_outlines = j_page.addDictionaryMember(
3672   - "outlines", JSON::makeArray());
3673   - std::vector<QPDFOutlineObjectHelper> outlines =
3674   - odh.getOutlinesForPage(page.getObjGen());
3675   - for (std::vector<QPDFOutlineObjectHelper>::iterator oiter =
3676   - outlines.begin();
3677   - oiter != outlines.end(); ++oiter)
  2479 + else
3678 2480 {
3679   - JSON j_outline = j_outlines.addArrayElement(JSON::makeDictionary());
3680   - j_outline.addDictionaryMember(
3681   - "object", (*oiter).getObjectHandle().getJSON());
3682   - j_outline.addDictionaryMember(
3683   - "title", JSON::makeString((*oiter).getTitle()));
3684   - j_outline.addDictionaryMember(
3685   - "dest", (*oiter).getDest().getJSON(true));
  2481 + QTC::TC("qpdf", "qpdf check password not encrypted");
  2482 + return EXIT_IS_NOT_ENCRYPTED;
3686 2483 }
3687   - j_page.addDictionaryMember("pageposfrom1", JSON::makeInt(1 + pageno));
3688   - }
3689   -}
3690   -
3691   -static void do_json_page_labels(QPDF& pdf, Options& o, JSON& j)
3692   -{
3693   - JSON j_labels = j.addDictionaryMember("pagelabels", JSON::makeArray());
3694   - QPDFPageLabelDocumentHelper pldh(pdf);
3695   - QPDFPageDocumentHelper pdh(pdf);
3696   - std::vector<QPDFPageObjectHelper> pages = pdh.getAllPages();
3697   - if (pldh.hasPageLabels())
3698   - {
3699   - std::vector<QPDFObjectHandle> labels;
3700   - pldh.getLabelsForPageRange(
3701   - 0, QIntC::to_int(pages.size()) - 1, 0, labels);
3702   - for (std::vector<QPDFObjectHandle>::iterator iter = labels.begin();
3703   - iter != labels.end(); ++iter)
3704   - {
3705   - std::vector<QPDFObjectHandle>::iterator next = iter;
3706   - ++next;
3707   - if (next == labels.end())
3708   - {
3709   - // This can't happen, so ignore it. This could only
3710   - // happen if getLabelsForPageRange somehow returned an
3711   - // odd number of items.
3712   - break;
3713   - }
3714   - JSON j_label = j_labels.addArrayElement(JSON::makeDictionary());
3715   - j_label.addDictionaryMember("index", (*iter).getJSON());
3716   - ++iter;
3717   - j_label.addDictionaryMember("label", (*iter).getJSON());
3718   - }
3719   - }
3720   -}
3721   -
3722   -static void add_outlines_to_json(
3723   - std::vector<QPDFOutlineObjectHelper> outlines, JSON& j,
3724   - std::map<QPDFObjGen, int>& page_numbers)
3725   -{
3726   - for (std::vector<QPDFOutlineObjectHelper>::iterator iter = outlines.begin();
3727   - iter != outlines.end(); ++iter)
3728   - {
3729   - QPDFOutlineObjectHelper& ol = *iter;
3730   - JSON jo = j.addArrayElement(JSON::makeDictionary());
3731   - jo.addDictionaryMember("object", ol.getObjectHandle().getJSON());
3732   - jo.addDictionaryMember("title", JSON::makeString(ol.getTitle()));
3733   - jo.addDictionaryMember("dest", ol.getDest().getJSON(true));
3734   - jo.addDictionaryMember("open", JSON::makeBool(ol.getCount() >= 0));
3735   - QPDFObjectHandle page = ol.getDestPage();
3736   - JSON j_destpage = JSON::makeNull();
3737   - if (page.isIndirect())
3738   - {
3739   - QPDFObjGen og = page.getObjGen();
3740   - if (page_numbers.count(og))
3741   - {
3742   - j_destpage = JSON::makeInt(page_numbers[og]);
3743   - }
3744   - }
3745   - jo.addDictionaryMember("destpageposfrom1", j_destpage);
3746   - JSON j_kids = jo.addDictionaryMember("kids", JSON::makeArray());
3747   - add_outlines_to_json(ol.getKids(), j_kids, page_numbers);
3748   - }
3749   -}
3750   -
3751   -static void do_json_outlines(QPDF& pdf, Options& o, JSON& j)
3752   -{
3753   - std::map<QPDFObjGen, int> page_numbers;
3754   - QPDFPageDocumentHelper dh(pdf);
3755   - std::vector<QPDFPageObjectHelper> pages = dh.getAllPages();
3756   - int n = 0;
3757   - for (std::vector<QPDFPageObjectHelper>::iterator iter = pages.begin();
3758   - iter != pages.end(); ++iter)
3759   - {
3760   - QPDFObjectHandle oh = (*iter).getObjectHandle();
3761   - page_numbers[oh.getObjGen()] = ++n;
3762   - }
3763   -
3764   - JSON j_outlines = j.addDictionaryMember(
3765   - "outlines", JSON::makeArray());
3766   - QPDFOutlineDocumentHelper odh(pdf);
3767   - add_outlines_to_json(odh.getTopLevelOutlines(), j_outlines, page_numbers);
3768   -}
3769   -
3770   -static void do_json_acroform(QPDF& pdf, Options& o, JSON& j)
3771   -{
3772   - JSON j_acroform = j.addDictionaryMember(
3773   - "acroform", JSON::makeDictionary());
3774   - QPDFAcroFormDocumentHelper afdh(pdf);
3775   - j_acroform.addDictionaryMember(
3776   - "hasacroform",
3777   - JSON::makeBool(afdh.hasAcroForm()));
3778   - j_acroform.addDictionaryMember(
3779   - "needappearances",
3780   - JSON::makeBool(afdh.getNeedAppearances()));
3781   - JSON j_fields = j_acroform.addDictionaryMember(
3782   - "fields", JSON::makeArray());
3783   - QPDFPageDocumentHelper pdh(pdf);
3784   - std::vector<QPDFPageObjectHelper> pages = pdh.getAllPages();
3785   - int pagepos1 = 0;
3786   - for (std::vector<QPDFPageObjectHelper>::iterator page_iter =
3787   - pages.begin();
3788   - page_iter != pages.end(); ++page_iter)
3789   - {
3790   - ++pagepos1;
3791   - std::vector<QPDFAnnotationObjectHelper> annotations =
3792   - afdh.getWidgetAnnotationsForPage(*page_iter);
3793   - for (std::vector<QPDFAnnotationObjectHelper>::iterator annot_iter =
3794   - annotations.begin();
3795   - annot_iter != annotations.end(); ++annot_iter)
3796   - {
3797   - QPDFAnnotationObjectHelper& aoh = *annot_iter;
3798   - QPDFFormFieldObjectHelper ffh =
3799   - afdh.getFieldForAnnotation(aoh);
3800   - JSON j_field = j_fields.addArrayElement(
3801   - JSON::makeDictionary());
3802   - j_field.addDictionaryMember(
3803   - "object",
3804   - ffh.getObjectHandle().getJSON());
3805   - j_field.addDictionaryMember(
3806   - "parent",
3807   - ffh.getObjectHandle().getKey("/Parent").getJSON());
3808   - j_field.addDictionaryMember(
3809   - "pageposfrom1",
3810   - JSON::makeInt(pagepos1));
3811   - j_field.addDictionaryMember(
3812   - "fieldtype",
3813   - JSON::makeString(ffh.getFieldType()));
3814   - j_field.addDictionaryMember(
3815   - "fieldflags",
3816   - JSON::makeInt(ffh.getFlags()));
3817   - j_field.addDictionaryMember(
3818   - "fullname",
3819   - JSON::makeString(ffh.getFullyQualifiedName()));
3820   - j_field.addDictionaryMember(
3821   - "partialname",
3822   - JSON::makeString(ffh.getPartialName()));
3823   - j_field.addDictionaryMember(
3824   - "alternativename",
3825   - JSON::makeString(ffh.getAlternativeName()));
3826   - j_field.addDictionaryMember(
3827   - "mappingname",
3828   - JSON::makeString(ffh.getMappingName()));
3829   - j_field.addDictionaryMember(
3830   - "value",
3831   - ffh.getValue().getJSON());
3832   - j_field.addDictionaryMember(
3833   - "defaultvalue",
3834   - ffh.getDefaultValue().getJSON());
3835   - j_field.addDictionaryMember(
3836   - "quadding",
3837   - JSON::makeInt(ffh.getQuadding()));
3838   - j_field.addDictionaryMember(
3839   - "ischeckbox",
3840   - JSON::makeBool(ffh.isCheckbox()));
3841   - j_field.addDictionaryMember(
3842   - "isradiobutton",
3843   - JSON::makeBool(ffh.isRadioButton()));
3844   - j_field.addDictionaryMember(
3845   - "ischoice",
3846   - JSON::makeBool(ffh.isChoice()));
3847   - j_field.addDictionaryMember(
3848   - "istext",
3849   - JSON::makeBool(ffh.isText()));
3850   - JSON j_choices = j_field.addDictionaryMember(
3851   - "choices", JSON::makeArray());
3852   - std::vector<std::string> choices = ffh.getChoices();
3853   - for (std::vector<std::string>::iterator iter = choices.begin();
3854   - iter != choices.end(); ++iter)
3855   - {
3856   - j_choices.addArrayElement(JSON::makeString(*iter));
3857   - }
3858   - JSON j_annot = j_field.addDictionaryMember(
3859   - "annotation", JSON::makeDictionary());
3860   - j_annot.addDictionaryMember(
3861   - "object",
3862   - aoh.getObjectHandle().getJSON());
3863   - j_annot.addDictionaryMember(
3864   - "appearancestate",
3865   - JSON::makeString(aoh.getAppearanceState()));
3866   - j_annot.addDictionaryMember(
3867   - "annotationflags",
3868   - JSON::makeInt(aoh.getFlags()));
3869   - }
3870   - }
3871   -}
3872   -
3873   -static void do_json_encrypt(QPDF& pdf, Options& o, JSON& j)
3874   -{
3875   - int R = 0;
3876   - int P = 0;
3877   - int V = 0;
3878   - QPDF::encryption_method_e stream_method = QPDF::e_none;
3879   - QPDF::encryption_method_e string_method = QPDF::e_none;
3880   - QPDF::encryption_method_e file_method = QPDF::e_none;
3881   - bool is_encrypted = pdf.isEncrypted(
3882   - R, P, V, stream_method, string_method, file_method);
3883   - JSON j_encrypt = j.addDictionaryMember(
3884   - "encrypt", JSON::makeDictionary());
3885   - j_encrypt.addDictionaryMember(
3886   - "encrypted",
3887   - JSON::makeBool(is_encrypted));
3888   - j_encrypt.addDictionaryMember(
3889   - "userpasswordmatched",
3890   - JSON::makeBool(is_encrypted && pdf.userPasswordMatched()));
3891   - j_encrypt.addDictionaryMember(
3892   - "ownerpasswordmatched",
3893   - JSON::makeBool(is_encrypted && pdf.ownerPasswordMatched()));
3894   - JSON j_capabilities = j_encrypt.addDictionaryMember(
3895   - "capabilities", JSON::makeDictionary());
3896   - j_capabilities.addDictionaryMember(
3897   - "accessibility",
3898   - JSON::makeBool(pdf.allowAccessibility()));
3899   - j_capabilities.addDictionaryMember(
3900   - "extract",
3901   - JSON::makeBool(pdf.allowExtractAll()));
3902   - j_capabilities.addDictionaryMember(
3903   - "printlow",
3904   - JSON::makeBool(pdf.allowPrintLowRes()));
3905   - j_capabilities.addDictionaryMember(
3906   - "printhigh",
3907   - JSON::makeBool(pdf.allowPrintHighRes()));
3908   - j_capabilities.addDictionaryMember(
3909   - "modifyassembly",
3910   - JSON::makeBool(pdf.allowModifyAssembly()));
3911   - j_capabilities.addDictionaryMember(
3912   - "modifyforms",
3913   - JSON::makeBool(pdf.allowModifyForm()));
3914   - j_capabilities.addDictionaryMember(
3915   - "moddifyannotations",
3916   - JSON::makeBool(pdf.allowModifyAnnotation()));
3917   - j_capabilities.addDictionaryMember(
3918   - "modifyother",
3919   - JSON::makeBool(pdf.allowModifyOther()));
3920   - j_capabilities.addDictionaryMember(
3921   - "modify",
3922   - JSON::makeBool(pdf.allowModifyAll()));
3923   - JSON j_parameters = j_encrypt.addDictionaryMember(
3924   - "parameters", JSON::makeDictionary());
3925   - j_parameters.addDictionaryMember("R", JSON::makeInt(R));
3926   - j_parameters.addDictionaryMember("V", JSON::makeInt(V));
3927   - j_parameters.addDictionaryMember("P", JSON::makeInt(P));
3928   - int bits = 0;
3929   - JSON key = JSON::makeNull();
3930   - if (is_encrypted)
3931   - {
3932   - std::string encryption_key = pdf.getEncryptionKey();
3933   - bits = QIntC::to_int(encryption_key.length() * 8);
3934   - if (o.show_encryption_key)
3935   - {
3936   - key = JSON::makeString(QUtil::hex_encode(encryption_key));
3937   - }
3938   - }
3939   - j_parameters.addDictionaryMember("bits", JSON::makeInt(bits));
3940   - j_parameters.addDictionaryMember("key", key);
3941   - auto fix_method = [is_encrypted](QPDF::encryption_method_e& m) {
3942   - if (is_encrypted && m == QPDF::e_none)
3943   - {
3944   - m = QPDF::e_rc4;
3945   - }
3946   - };
3947   - fix_method(stream_method);
3948   - fix_method(string_method);
3949   - fix_method(file_method);
3950   - std::string s_stream_method = show_encryption_method(stream_method);
3951   - std::string s_string_method = show_encryption_method(string_method);
3952   - std::string s_file_method = show_encryption_method(file_method);
3953   - std::string s_overall_method;
3954   - if ((stream_method == string_method) &&
3955   - (stream_method == file_method))
3956   - {
3957   - s_overall_method = s_stream_method;
3958   - }
3959   - else
3960   - {
3961   - s_overall_method = "mixed";
3962   - }
3963   - j_parameters.addDictionaryMember(
3964   - "method", JSON::makeString(s_overall_method));
3965   - j_parameters.addDictionaryMember(
3966   - "streammethod", JSON::makeString(s_stream_method));
3967   - j_parameters.addDictionaryMember(
3968   - "stringmethod", JSON::makeString(s_string_method));
3969   - j_parameters.addDictionaryMember(
3970   - "filemethod", JSON::makeString(s_file_method));
3971   -}
3972   -
3973   -static void do_json_attachments(QPDF& pdf, Options& o, JSON& j)
3974   -{
3975   - JSON j_attachments = j.addDictionaryMember(
3976   - "attachments", JSON::makeDictionary());
3977   - QPDFEmbeddedFileDocumentHelper efdh(pdf);
3978   - for (auto const& iter: efdh.getEmbeddedFiles())
3979   - {
3980   - std::string const& key = iter.first;
3981   - auto fsoh = iter.second;
3982   - auto j_details = j_attachments.addDictionaryMember(
3983   - key, JSON::makeDictionary());
3984   - j_details.addDictionaryMember(
3985   - "filespec",
3986   - JSON::makeString(fsoh->getObjectHandle().unparse()));
3987   - j_details.addDictionaryMember(
3988   - "preferredname", JSON::makeString(fsoh->getFilename()));
3989   - j_details.addDictionaryMember(
3990   - "preferredcontents",
3991   - JSON::makeString(fsoh->getEmbeddedFileStream().unparse()));
3992   - }
3993   -}
3994   -
3995   -static void do_json(QPDF& pdf, Options& o)
3996   -{
3997   - JSON j = JSON::makeDictionary();
3998   - // This version is updated every time a non-backward-compatible
3999   - // change is made to the JSON format. Clients of the JSON are to
4000   - // ignore unrecognized keys, so we only update the version of a
4001   - // key disappears or if its value changes meaning.
4002   - j.addDictionaryMember("version", JSON::makeInt(1));
4003   - JSON j_params = j.addDictionaryMember(
4004   - "parameters", JSON::makeDictionary());
4005   - std::string decode_level_str;
4006   - switch (o.decode_level)
4007   - {
4008   - case qpdf_dl_none:
4009   - decode_level_str = "none";
4010   - break;
4011   - case qpdf_dl_generalized:
4012   - decode_level_str = "generalized";
4013   - break;
4014   - case qpdf_dl_specialized:
4015   - decode_level_str = "specialized";
4016   - break;
4017   - case qpdf_dl_all:
4018   - decode_level_str = "all";
4019   - break;
4020   - }
4021   - j_params.addDictionaryMember(
4022   - "decodelevel", JSON::makeString(decode_level_str));
4023   -
4024   - bool all_keys = o.json_keys.empty();
4025   - // The list of selectable top-level keys id duplicated in three
4026   - // places: json_schema, do_json, and initOptionTable.
4027   - if (all_keys || o.json_keys.count("objects"))
4028   - {
4029   - do_json_objects(pdf, o, j);
4030   - }
4031   - if (all_keys || o.json_keys.count("objectinfo"))
4032   - {
4033   - do_json_objectinfo(pdf, o, j);
4034   - }
4035   - if (all_keys || o.json_keys.count("pages"))
4036   - {
4037   - do_json_pages(pdf, o, j);
4038   - }
4039   - if (all_keys || o.json_keys.count("pagelabels"))
4040   - {
4041   - do_json_page_labels(pdf, o, j);
4042   - }
4043   - if (all_keys || o.json_keys.count("outlines"))
4044   - {
4045   - do_json_outlines(pdf, o, j);
4046   - }
4047   - if (all_keys || o.json_keys.count("acroform"))
4048   - {
4049   - do_json_acroform(pdf, o, j);
4050   - }
4051   - if (all_keys || o.json_keys.count("encrypt"))
4052   - {
4053   - do_json_encrypt(pdf, o, j);
4054   - }
4055   - if (all_keys || o.json_keys.count("attachments"))
4056   - {
4057   - do_json_attachments(pdf, o, j);
4058   - }
4059   -
4060   - // Check against schema
4061   -
4062   - JSON schema = json_schema(&o.json_keys);
4063   - std::list<std::string> errors;
4064   - if (! j.checkSchema(schema, errors))
4065   - {
4066   - std::cerr
4067   - << whoami << " didn't create JSON that complies with its own\n\
4068   -rules. Please report this as a bug at\n\
4069   - https://github.com/qpdf/qpdf/issues/new\n\
4070   -ideally with the file that caused the error and the output below. Thanks!\n\
4071   -\n";
4072   - for (std::list<std::string>::iterator iter = errors.begin();
4073   - iter != errors.end(); ++iter)
4074   - {
4075   - std::cerr << (*iter) << std::endl;
4076   - }
4077   - }
4078   -
4079   - std::cout << j.unparse() << std::endl;
4080   -}
4081   -
4082   -static void do_inspection(QPDF& pdf, Options& o)
4083   -{
4084   - int exit_code = 0;
4085   - if (o.check)
4086   - {
4087   - do_check(pdf, o, exit_code);
4088   - }
4089   - if (o.json)
4090   - {
4091   - do_json(pdf, o);
4092   - }
4093   - if (o.show_npages)
4094   - {
4095   - QTC::TC("qpdf", "qpdf npages");
4096   - std::cout << pdf.getRoot().getKey("/Pages").
4097   - getKey("/Count").getIntValue() << std::endl;
4098   - }
4099   - if (o.show_encryption)
4100   - {
4101   - show_encryption(pdf, o);
4102   - }
4103   - if (o.check_linearization)
4104   - {
4105   - if (pdf.checkLinearization())
4106   - {
4107   - std::cout << o.infilename << ": no linearization errors"
4108   - << std::endl;
4109   - }
4110   - else if (exit_code != EXIT_ERROR)
4111   - {
4112   - exit_code = EXIT_WARNING;
4113   - }
4114   - }
4115   - if (o.show_linearization)
4116   - {
4117   - if (pdf.isLinearized())
4118   - {
4119   - pdf.showLinearizationData();
4120   - }
4121   - else
4122   - {
4123   - std::cout << o.infilename << " is not linearized"
4124   - << std::endl;
4125   - }
4126   - }
4127   - if (o.show_xref)
4128   - {
4129   - pdf.showXRefTable();
4130   - }
4131   - if ((o.show_obj > 0) || o.show_trailer)
4132   - {
4133   - do_show_obj(pdf, o, exit_code);
4134   - }
4135   - if (o.show_pages)
4136   - {
4137   - do_show_pages(pdf, o);
4138   - }
4139   - if (o.list_attachments)
4140   - {
4141   - do_list_attachments(pdf, o);
4142   - }
4143   - if (! o.attachment_to_show.empty())
4144   - {
4145   - do_show_attachment(pdf, o, exit_code);
4146   - }
4147   - if ((! pdf.getWarnings().empty()) && (exit_code != EXIT_ERROR))
4148   - {
4149   - std::cerr << whoami
4150   - << ": operation succeeded with warnings" << std::endl;
4151   - exit_code = EXIT_WARNING;
4152   - }
4153   - if (exit_code)
4154   - {
4155   - exit(exit_code);
4156   - }
4157   -}
4158   -
4159   -class ImageOptimizer: public QPDFObjectHandle::StreamDataProvider
4160   -{
4161   - public:
4162   - ImageOptimizer(Options& o, QPDFObjectHandle& image);
4163   - virtual ~ImageOptimizer()
4164   - {
4165   - }
4166   - virtual void provideStreamData(int objid, int generation,
4167   - Pipeline* pipeline);
4168   - PointerHolder<Pipeline> makePipeline(
4169   - std::string const& description, Pipeline* next);
4170   - bool evaluate(std::string const& description);
4171   -
4172   - private:
4173   - Options& o;
4174   - QPDFObjectHandle image;
4175   -};
4176   -
4177   -ImageOptimizer::ImageOptimizer(Options& o, QPDFObjectHandle& image) :
4178   - o(o),
4179   - image(image)
4180   -{
4181   -}
4182   -
4183   -PointerHolder<Pipeline>
4184   -ImageOptimizer::makePipeline(std::string const& description, Pipeline* next)
4185   -{
4186   - PointerHolder<Pipeline> result;
4187   - QPDFObjectHandle dict = image.getDict();
4188   - QPDFObjectHandle w_obj = dict.getKey("/Width");
4189   - QPDFObjectHandle h_obj = dict.getKey("/Height");
4190   - QPDFObjectHandle colorspace_obj = dict.getKey("/ColorSpace");
4191   - if (! (w_obj.isNumber() && h_obj.isNumber()))
4192   - {
4193   - if (o.verbose && (! description.empty()))
4194   - {
4195   - std::cout << whoami << ": " << description
4196   - << ": not optimizing because image dictionary"
4197   - << " is missing required keys" << std::endl;
4198   - }
4199   - return result;
4200   - }
4201   - QPDFObjectHandle components_obj = dict.getKey("/BitsPerComponent");
4202   - if (! (components_obj.isInteger() && (components_obj.getIntValue() == 8)))
4203   - {
4204   - QTC::TC("qpdf", "qpdf image optimize bits per component");
4205   - if (o.verbose && (! description.empty()))
4206   - {
4207   - std::cout << whoami << ": " << description
4208   - << ": not optimizing because image has other than"
4209   - << " 8 bits per component" << std::endl;
4210   - }
4211   - return result;
4212   - }
4213   - // Files have been seen in the wild whose width and height are
4214   - // floating point, which is goofy, but we can deal with it.
4215   - JDIMENSION w = 0;
4216   - if (w_obj.isInteger())
4217   - {
4218   - w = w_obj.getUIntValueAsUInt();
4219   - }
4220   - else
4221   - {
4222   - w = static_cast<JDIMENSION>(w_obj.getNumericValue());
4223   - }
4224   - JDIMENSION h = 0;
4225   - if (h_obj.isInteger())
4226   - {
4227   - h = h_obj.getUIntValueAsUInt();
4228   - }
4229   - else
4230   - {
4231   - h = static_cast<JDIMENSION>(h_obj.getNumericValue());
4232   - }
4233   - std::string colorspace = (colorspace_obj.isName() ?
4234   - colorspace_obj.getName() :
4235   - std::string());
4236   - int components = 0;
4237   - J_COLOR_SPACE cs = JCS_UNKNOWN;
4238   - if (colorspace == "/DeviceRGB")
4239   - {
4240   - components = 3;
4241   - cs = JCS_RGB;
4242   - }
4243   - else if (colorspace == "/DeviceGray")
4244   - {
4245   - components = 1;
4246   - cs = JCS_GRAYSCALE;
4247   - }
4248   - else if (colorspace == "/DeviceCMYK")
4249   - {
4250   - components = 4;
4251   - cs = JCS_CMYK;
4252   - }
4253   - else
4254   - {
4255   - QTC::TC("qpdf", "qpdf image optimize colorspace");
4256   - if (o.verbose && (! description.empty()))
4257   - {
4258   - std::cout << whoami << ": " << description
4259   - << ": not optimizing because qpdf can't optimize"
4260   - << " images with this colorspace" << std::endl;
4261   - }
4262   - return result;
4263   - }
4264   - if (((o.oi_min_width > 0) && (w <= o.oi_min_width)) ||
4265   - ((o.oi_min_height > 0) && (h <= o.oi_min_height)) ||
4266   - ((o.oi_min_area > 0) && ((w * h) <= o.oi_min_area)))
4267   - {
4268   - QTC::TC("qpdf", "qpdf image optimize too small");
4269   - if (o.verbose && (! description.empty()))
4270   - {
4271   - std::cout << whoami << ": " << description
4272   - << ": not optimizing because image"
4273   - << " is smaller than requested minimum dimensions"
4274   - << std::endl;
4275   - }
4276   - return result;
4277   - }
4278   -
4279   - result = new Pl_DCT("jpg", next, w, h, components, cs);
4280   - return result;
4281   -}
4282   -
4283   -bool
4284   -ImageOptimizer::evaluate(std::string const& description)
4285   -{
4286   - if (! image.pipeStreamData(0, 0, qpdf_dl_specialized, true))
4287   - {
4288   - QTC::TC("qpdf", "qpdf image optimize no pipeline");
4289   - if (o.verbose)
4290   - {
4291   - std::cout << whoami << ": " << description
4292   - << ": not optimizing because unable to decode data"
4293   - << " or data already uses DCT"
4294   - << std::endl;
4295   - }
4296   - return false;
4297   - }
4298   - Pl_Discard d;
4299   - Pl_Count c("count", &d);
4300   - PointerHolder<Pipeline> p = makePipeline(description, &c);
4301   - if (p.getPointer() == 0)
4302   - {
4303   - // message issued by makePipeline
4304   - return false;
4305   - }
4306   - if (! image.pipeStreamData(p.getPointer(), 0, qpdf_dl_specialized))
4307   - {
4308   - return false;
4309   - }
4310   - long long orig_length = image.getDict().getKey("/Length").getIntValue();
4311   - if (c.getCount() >= orig_length)
4312   - {
4313   - QTC::TC("qpdf", "qpdf image optimize no shrink");
4314   - if (o.verbose)
4315   - {
4316   - std::cout << whoami << ": " << description
4317   - << ": not optimizing because DCT compression does not"
4318   - << " reduce image size" << std::endl;
4319   - }
4320   - return false;
4321   - }
4322   - if (o.verbose)
4323   - {
4324   - std::cout << whoami << ": " << description
4325   - << ": optimizing image reduces size from "
4326   - << orig_length << " to " << c.getCount()
4327   - << std::endl;
4328   - }
4329   - return true;
4330   -}
4331   -
4332   -void
4333   -ImageOptimizer::provideStreamData(int, int, Pipeline* pipeline)
4334   -{
4335   - PointerHolder<Pipeline> p = makePipeline("", pipeline);
4336   - if (p.getPointer() == 0)
4337   - {
4338   - // Should not be possible
4339   - image.warnIfPossible("unable to create pipeline after previous"
4340   - " success; image data will be lost");
4341   - pipeline->finish();
4342   - return;
4343   - }
4344   - image.pipeStreamData(p.getPointer(), 0, qpdf_dl_specialized,
4345   - false, false);
4346   -}
4347   -
4348   -template <typename T>
4349   -static PointerHolder<QPDF> do_process_once(
4350   - void (QPDF::*fn)(T, char const*),
4351   - T item, char const* password,
4352   - Options& o, bool empty)
4353   -{
4354   - PointerHolder<QPDF> pdf = new QPDF;
4355   - set_qpdf_options(*pdf, o);
4356   - if (empty)
4357   - {
4358   - pdf->emptyPDF();
4359   - }
4360   - else
4361   - {
4362   - ((*pdf).*fn)(item, password);
4363   - }
4364   - return pdf;
4365   -}
4366   -
4367   -template <typename T>
4368   -static PointerHolder<QPDF> do_process(
4369   - void (QPDF::*fn)(T, char const*),
4370   - T item, char const* password,
4371   - Options& o, bool empty)
4372   -{
4373   - // If a password has been specified but doesn't work, try other
4374   - // passwords that are equivalent in different character encodings.
4375   - // This makes it possible to open PDF files that were encrypted
4376   - // using incorrect string encodings. For example, if someone used
4377   - // a password encoded in PDF Doc encoding or Windows code page
4378   - // 1252 for an AES-encrypted file or a UTF-8-encoded password on
4379   - // an RC4-encrypted file, or if the password was properly encoded
4380   - // by the password given here was incorrectly encoded, there's a
4381   - // good chance we'd succeed here.
4382   -
4383   - std::string ptemp;
4384   - if (password && (! o.password_is_hex_key))
4385   - {
4386   - if (o.password_mode == pm_hex_bytes)
4387   - {
4388   - // Special case: handle --password-mode=hex-bytes for input
4389   - // password as well as output password
4390   - QTC::TC("qpdf", "qpdf input password hex-bytes");
4391   - ptemp = QUtil::hex_decode(password);
4392   - password = ptemp.c_str();
4393   - }
4394   - }
4395   - if ((password == 0) || empty || o.password_is_hex_key ||
4396   - o.suppress_password_recovery)
4397   - {
4398   - // There is no password, or we're not doing recovery, so just
4399   - // do the normal processing with the supplied password.
4400   - return do_process_once(fn, item, password, o, empty);
4401   - }
4402   -
4403   - // Get a list of otherwise encoded strings. Keep in scope for this
4404   - // method.
4405   - std::vector<std::string> passwords_str =
4406   - QUtil::possible_repaired_encodings(password);
4407   - // Represent to char const*, as required by the QPDF class.
4408   - std::vector<char const*> passwords;
4409   - for (std::vector<std::string>::iterator iter = passwords_str.begin();
4410   - iter != passwords_str.end(); ++iter)
4411   - {
4412   - passwords.push_back((*iter).c_str());
4413   - }
4414   - // We always try the supplied password first because it is the
4415   - // first string returned by possible_repaired_encodings. If there
4416   - // is more than one option, go ahead and put the supplied password
4417   - // at the end so that it's that decoding attempt whose exception
4418   - // is thrown.
4419   - if (passwords.size() > 1)
4420   - {
4421   - passwords.push_back(password);
4422   - }
4423   -
4424   - // Try each password. If one works, return the resulting object.
4425   - // If they all fail, throw the exception thrown by the final
4426   - // attempt, which, like the first attempt, will be with the
4427   - // supplied password.
4428   - bool warned = false;
4429   - for (std::vector<char const*>::iterator iter = passwords.begin();
4430   - iter != passwords.end(); ++iter)
4431   - {
4432   - try
4433   - {
4434   - return do_process_once(fn, item, *iter, o, empty);
4435   - }
4436   - catch (QPDFExc& e)
4437   - {
4438   - std::vector<char const*>::iterator next = iter;
4439   - ++next;
4440   - if (next == passwords.end())
4441   - {
4442   - throw e;
4443   - }
4444   - }
4445   - if ((! warned) && o.verbose)
4446   - {
4447   - warned = true;
4448   - std::cout << whoami << ": supplied password didn't work;"
4449   - << " trying other passwords based on interpreting"
4450   - << " password with different string encodings"
4451   - << std::endl;
4452   - }
4453   - }
4454   - // Should not be reachable
4455   - throw std::logic_error("do_process returned");
4456   -}
4457   -
4458   -static PointerHolder<QPDF> process_file(char const* filename,
4459   - char const* password,
4460   - Options& o)
4461   -{
4462   - return do_process(&QPDF::processFile, filename, password, o,
4463   - strcmp(filename, "") == 0);
4464   -}
4465   -
4466   -static PointerHolder<QPDF> process_input_source(
4467   - PointerHolder<InputSource> is, char const* password, Options& o)
4468   -{
4469   - return do_process(&QPDF::processInputSource, is, password, o, false);
4470   -}
4471   -
4472   -static void validate_under_overlay(QPDF& pdf, UnderOverlay* uo, Options& o)
4473   -{
4474   - if (0 == uo->filename)
4475   - {
4476   - return;
4477   - }
4478   - QPDFPageDocumentHelper main_pdh(pdf);
4479   - int main_npages = QIntC::to_int(main_pdh.getAllPages().size());
4480   - uo->pdf = process_file(uo->filename, uo->password, o);
4481   - QPDFPageDocumentHelper uo_pdh(*(uo->pdf));
4482   - int uo_npages = QIntC::to_int(uo_pdh.getAllPages().size());
4483   - try
4484   - {
4485   - uo->to_pagenos = QUtil::parse_numrange(uo->to_nr, main_npages);
4486   - }
4487   - catch (std::runtime_error& e)
4488   - {
4489   - usageExit("parsing numeric range for " + uo->which +
4490   - " \"to\" pages: " + e.what());
4491   - }
4492   - try
4493   - {
4494   - if (0 == strlen(uo->from_nr))
4495   - {
4496   - QTC::TC("qpdf", "qpdf from_nr from repeat_nr");
4497   - uo->from_nr = uo->repeat_nr;
4498   - }
4499   - uo->from_pagenos = QUtil::parse_numrange(uo->from_nr, uo_npages);
4500   - if (strlen(uo->repeat_nr))
4501   - {
4502   - uo->repeat_pagenos =
4503   - QUtil::parse_numrange(uo->repeat_nr, uo_npages);
4504   - }
4505   - }
4506   - catch (std::runtime_error& e)
4507   - {
4508   - usageExit("parsing numeric range for " + uo->which + " file " +
4509   - uo->filename + ": " + e.what());
4510   - }
4511   -}
4512   -
4513   -static void get_uo_pagenos(UnderOverlay& uo,
4514   - std::map<int, std::vector<int> >& pagenos)
4515   -{
4516   - size_t idx = 0;
4517   - size_t from_size = uo.from_pagenos.size();
4518   - size_t repeat_size = uo.repeat_pagenos.size();
4519   - for (std::vector<int>::iterator iter = uo.to_pagenos.begin();
4520   - iter != uo.to_pagenos.end(); ++iter, ++idx)
4521   - {
4522   - if (idx < from_size)
4523   - {
4524   - pagenos[*iter].push_back(uo.from_pagenos.at(idx));
4525   - }
4526   - else if (repeat_size)
4527   - {
4528   - pagenos[*iter].push_back(
4529   - uo.repeat_pagenos.at((idx - from_size) % repeat_size));
4530   - }
4531   - }
4532   -}
4533   -
4534   -static QPDFAcroFormDocumentHelper* get_afdh_for_qpdf(
4535   - std::map<unsigned long long,
4536   - PointerHolder<QPDFAcroFormDocumentHelper>>& afdh_map,
4537   - QPDF* q)
4538   -{
4539   - auto uid = q->getUniqueId();
4540   - if (! afdh_map.count(uid))
4541   - {
4542   - afdh_map[uid] = new QPDFAcroFormDocumentHelper(*q);
4543   - }
4544   - return afdh_map[uid].getPointer();
4545   -}
4546   -
4547   -static void do_under_overlay_for_page(
4548   - QPDF& pdf,
4549   - Options& o,
4550   - UnderOverlay& uo,
4551   - std::map<int, std::vector<int> >& pagenos,
4552   - size_t page_idx,
4553   - std::map<int, QPDFObjectHandle>& fo,
4554   - std::vector<QPDFPageObjectHelper>& pages,
4555   - QPDFPageObjectHelper& dest_page,
4556   - bool before)
4557   -{
4558   - int pageno = 1 + QIntC::to_int(page_idx);
4559   - if (! pagenos.count(pageno))
4560   - {
4561   - return;
4562   - }
4563   -
4564   - std::map<unsigned long long,
4565   - PointerHolder<QPDFAcroFormDocumentHelper>> afdh;
4566   - auto make_afdh = [&](QPDFPageObjectHelper& ph) {
4567   - QPDF* q = ph.getObjectHandle().getOwningQPDF();
4568   - return get_afdh_for_qpdf(afdh, q);
4569   - };
4570   - auto dest_afdh = make_afdh(dest_page);
4571   -
4572   - std::string content;
4573   - int min_suffix = 1;
4574   - QPDFObjectHandle resources = dest_page.getAttribute("/Resources", true);
4575   - if (! resources.isDictionary())
4576   - {
4577   - QTC::TC("qpdf", "qpdf overlay page with no resources");
4578   - resources = QPDFObjectHandle::newDictionary();
4579   - dest_page.getObjectHandle().replaceKey("/Resources", resources);
4580   - }
4581   - for (std::vector<int>::iterator iter = pagenos[pageno].begin();
4582   - iter != pagenos[pageno].end(); ++iter)
4583   - {
4584   - int from_pageno = *iter;
4585   - if (o.verbose)
4586   - {
4587   - std::cout << " " << uo.which << " " << from_pageno << std::endl;
4588   - }
4589   - auto from_page = pages.at(QIntC::to_size(from_pageno - 1));
4590   - if (0 == fo.count(from_pageno))
4591   - {
4592   - fo[from_pageno] =
4593   - pdf.copyForeignObject(
4594   - from_page.getFormXObjectForPage());
4595   - }
4596   -
4597   - // If the same page is overlaid or underlaid multiple times,
4598   - // we'll generate multiple names for it, but that's harmless
4599   - // and also a pretty goofy case that's not worth coding
4600   - // around.
4601   - std::string name = resources.getUniqueResourceName("/Fx", min_suffix);
4602   - QPDFMatrix cm;
4603   - std::string new_content = dest_page.placeFormXObject(
4604   - fo[from_pageno], name,
4605   - dest_page.getTrimBox().getArrayAsRectangle(), cm);
4606   - dest_page.copyAnnotations(
4607   - from_page, cm, dest_afdh, make_afdh(from_page));
4608   - if (! new_content.empty())
4609   - {
4610   - resources.mergeResources(
4611   - QPDFObjectHandle::parse("<< /XObject << >> >>"));
4612   - auto xobject = resources.getKey("/XObject");
4613   - if (xobject.isDictionary())
4614   - {
4615   - xobject.replaceKey(name, fo[from_pageno]);
4616   - }
4617   - ++min_suffix;
4618   - content += new_content;
4619   - }
4620   - }
4621   - if (! content.empty())
4622   - {
4623   - if (before)
4624   - {
4625   - dest_page.addPageContents(
4626   - QPDFObjectHandle::newStream(&pdf, content), true);
4627   - }
4628   - else
4629   - {
4630   - dest_page.addPageContents(
4631   - QPDFObjectHandle::newStream(&pdf, "q\n"), true);
4632   - dest_page.addPageContents(
4633   - QPDFObjectHandle::newStream(&pdf, "\nQ\n" + content), false);
4634   - }
4635   - }
4636   -}
4637   -
4638   -static void handle_under_overlay(QPDF& pdf, Options& o)
4639   -{
4640   - validate_under_overlay(pdf, &o.underlay, o);
4641   - validate_under_overlay(pdf, &o.overlay, o);
4642   - if ((0 == o.underlay.pdf.getPointer()) &&
4643   - (0 == o.overlay.pdf.getPointer()))
4644   - {
4645   - return;
4646   - }
4647   - std::map<int, std::vector<int> > underlay_pagenos;
4648   - get_uo_pagenos(o.underlay, underlay_pagenos);
4649   - std::map<int, std::vector<int> > overlay_pagenos;
4650   - get_uo_pagenos(o.overlay, overlay_pagenos);
4651   - std::map<int, QPDFObjectHandle> underlay_fo;
4652   - std::map<int, QPDFObjectHandle> overlay_fo;
4653   - std::vector<QPDFPageObjectHelper> upages;
4654   - if (o.underlay.pdf.getPointer())
4655   - {
4656   - upages = QPDFPageDocumentHelper(*(o.underlay.pdf)).getAllPages();
4657   - }
4658   - std::vector<QPDFPageObjectHelper> opages;
4659   - if (o.overlay.pdf.getPointer())
4660   - {
4661   - opages = QPDFPageDocumentHelper(*(o.overlay.pdf)).getAllPages();
4662   - }
4663   -
4664   - QPDFPageDocumentHelper main_pdh(pdf);
4665   - std::vector<QPDFPageObjectHelper> main_pages = main_pdh.getAllPages();
4666   - size_t main_npages = main_pages.size();
4667   - if (o.verbose)
4668   - {
4669   - std::cout << whoami << ": processing underlay/overlay" << std::endl;
4670   - }
4671   - for (size_t i = 0; i < main_npages; ++i)
4672   - {
4673   - if (o.verbose)
4674   - {
4675   - std::cout << " page " << 1+i << std::endl;
4676   - }
4677   - do_under_overlay_for_page(pdf, o, o.underlay, underlay_pagenos, i,
4678   - underlay_fo, upages, main_pages.at(i),
4679   - true);
4680   - do_under_overlay_for_page(pdf, o, o.overlay, overlay_pagenos, i,
4681   - overlay_fo, opages, main_pages.at(i),
4682   - false);
4683   - }
4684   -}
4685   -
4686   -static void maybe_set_pagemode(QPDF& pdf, std::string const& pagemode)
4687   -{
4688   - auto root = pdf.getRoot();
4689   - if (root.getKey("/PageMode").isNull())
4690   - {
4691   - root.replaceKey("/PageMode", QPDFObjectHandle::newName(pagemode));
4692   - }
4693   -}
4694   -
4695   -static void add_attachments(QPDF& pdf, Options& o, int& exit_code)
4696   -{
4697   - maybe_set_pagemode(pdf, "/UseAttachments");
4698   - QPDFEmbeddedFileDocumentHelper efdh(pdf);
4699   - for (auto const& to_add: o.attachments_to_add)
4700   - {
4701   - if ((! to_add.replace) && efdh.getEmbeddedFile(to_add.key))
4702   - {
4703   - std::cerr << whoami << ": " << pdf.getFilename()
4704   - << " already has an attachment with key = "
4705   - << to_add.key << "; use --replace to replace"
4706   - << " or --key to specify a different key"
4707   - << std::endl;
4708   - exit_code = EXIT_ERROR;
4709   - continue;
4710   - }
4711   -
4712   - auto fs = QPDFFileSpecObjectHelper::createFileSpec(
4713   - pdf, to_add.filename, to_add.path);
4714   - if (! to_add.description.empty())
4715   - {
4716   - fs.setDescription(to_add.description);
4717   - }
4718   - auto efs = QPDFEFStreamObjectHelper(fs.getEmbeddedFileStream());
4719   - efs.setCreationDate(to_add.creationdate)
4720   - .setModDate(to_add.moddate);
4721   - if (! to_add.mimetype.empty())
4722   - {
4723   - efs.setSubtype(to_add.mimetype);
4724   - }
4725   -
4726   - efdh.replaceEmbeddedFile(to_add.key, fs);
4727   - if (o.verbose)
4728   - {
4729   - std::cout << whoami << ": attached " << to_add.path
4730   - << " as " << to_add.filename
4731   - << " with key " << to_add.key << std::endl;
4732   - }
4733   - }
4734   -}
4735   -
4736   -static void copy_attachments(QPDF& pdf, Options& o, int& exit_code)
4737   -{
4738   - maybe_set_pagemode(pdf, "/UseAttachments");
4739   - QPDFEmbeddedFileDocumentHelper efdh(pdf);
4740   - for (auto const& to_copy: o.attachments_to_copy)
4741   - {
4742   - auto other = process_file(
4743   - to_copy.path.c_str(), to_copy.password.c_str(), o);
4744   - QPDFEmbeddedFileDocumentHelper other_efdh(*other);
4745   - auto other_attachments = other_efdh.getEmbeddedFiles();
4746   - for (auto const& iter: other_attachments)
4747   - {
4748   - if (o.verbose)
4749   - {
4750   - std::cout << whoami << ": copying attachments from "
4751   - << to_copy.path << std::endl;
4752   - }
4753   - std::string new_key = to_copy.prefix + iter.first;
4754   - if (efdh.getEmbeddedFile(new_key))
4755   - {
4756   - exit_code = EXIT_ERROR;
4757   - std::cerr << whoami << to_copy.path << " and "
4758   - << pdf.getFilename()
4759   - << " both have attachments with key " << new_key
4760   - << "; use --prefix with --copy-attachments-from"
4761   - << " or manually copy individual attachments"
4762   - << std::endl;
4763   - }
4764   - else
4765   - {
4766   - auto new_fs_oh = pdf.copyForeignObject(
4767   - iter.second->getObjectHandle());
4768   - efdh.replaceEmbeddedFile(
4769   - new_key, QPDFFileSpecObjectHelper(new_fs_oh));
4770   - if (o.verbose)
4771   - {
4772   - std::cout << " " << iter.first << " -> " << new_key
4773   - << std::endl;
4774   - }
4775   - }
4776   - }
4777   -
4778   - if ((other->anyWarnings()) && (exit_code == 0))
4779   - {
4780   - exit_code = EXIT_WARNING;
4781   - }
4782   - }
4783   -}
4784   -
4785   -static void handle_transformations(QPDF& pdf, Options& o, int& exit_code)
4786   -{
4787   - QPDFPageDocumentHelper dh(pdf);
4788   - PointerHolder<QPDFAcroFormDocumentHelper> afdh;
4789   - auto make_afdh = [&]() {
4790   - if (! afdh.getPointer())
4791   - {
4792   - afdh = new QPDFAcroFormDocumentHelper(pdf);
4793   - }
4794   - };
4795   - if (o.externalize_inline_images)
4796   - {
4797   - std::vector<QPDFPageObjectHelper> pages = dh.getAllPages();
4798   - for (std::vector<QPDFPageObjectHelper>::iterator iter = pages.begin();
4799   - iter != pages.end(); ++iter)
4800   - {
4801   - QPDFPageObjectHelper& ph(*iter);
4802   - ph.externalizeInlineImages(o.ii_min_bytes);
4803   - }
4804   - }
4805   - if (o.optimize_images)
4806   - {
4807   - int pageno = 0;
4808   - std::vector<QPDFPageObjectHelper> pages = dh.getAllPages();
4809   - for (std::vector<QPDFPageObjectHelper>::iterator iter = pages.begin();
4810   - iter != pages.end(); ++iter)
4811   - {
4812   - ++pageno;
4813   - QPDFPageObjectHelper& ph(*iter);
4814   - QPDFObjectHandle page = ph.getObjectHandle();
4815   - std::map<std::string, QPDFObjectHandle> images = ph.getImages();
4816   - for (auto& iter2: images)
4817   - {
4818   - std::string name = iter2.first;
4819   - QPDFObjectHandle& image = iter2.second;
4820   - ImageOptimizer* io = new ImageOptimizer(o, image);
4821   - PointerHolder<QPDFObjectHandle::StreamDataProvider> sdp(io);
4822   - if (io->evaluate("image " + name + " on page " +
4823   - QUtil::int_to_string(pageno)))
4824   - {
4825   - QPDFObjectHandle new_image =
4826   - QPDFObjectHandle::newStream(&pdf);
4827   - new_image.replaceDict(image.getDict().shallowCopy());
4828   - new_image.replaceStreamData(
4829   - sdp,
4830   - QPDFObjectHandle::newName("/DCTDecode"),
4831   - QPDFObjectHandle::newNull());
4832   - ph.getAttribute("/Resources", true).
4833   - getKey("/XObject").replaceKey(
4834   - name, new_image);
4835   - }
4836   - }
4837   - }
4838   - }
4839   - if (o.generate_appearances)
4840   - {
4841   - make_afdh();
4842   - afdh->generateAppearancesIfNeeded();
4843   - }
4844   - if (o.flatten_annotations)
4845   - {
4846   - dh.flattenAnnotations(o.flatten_annotations_required,
4847   - o.flatten_annotations_forbidden);
4848   - }
4849   - if (o.coalesce_contents)
4850   - {
4851   - std::vector<QPDFPageObjectHelper> pages = dh.getAllPages();
4852   - for (std::vector<QPDFPageObjectHelper>::iterator iter = pages.begin();
4853   - iter != pages.end(); ++iter)
4854   - {
4855   - (*iter).coalesceContentStreams();
4856   - }
4857   - }
4858   - if (o.flatten_rotation)
4859   - {
4860   - make_afdh();
4861   - for (auto& page: dh.getAllPages())
4862   - {
4863   - page.flattenRotation(afdh.getPointer());
4864   - }
4865   - }
4866   - if (o.remove_page_labels)
4867   - {
4868   - pdf.getRoot().removeKey("/PageLabels");
4869   - }
4870   - if (! o.attachments_to_remove.empty())
4871   - {
4872   - QPDFEmbeddedFileDocumentHelper efdh(pdf);
4873   - for (auto const& key: o.attachments_to_remove)
4874   - {
4875   - if (efdh.removeEmbeddedFile(key))
4876   - {
4877   - if (o.verbose)
4878   - {
4879   - std::cout << whoami <<
4880   - ": removed attachment " << key << std::endl;
4881   - }
4882   - }
4883   - else
4884   - {
4885   - std::cerr << whoami <<
4886   - ": attachment " << key << " not found" << std::endl;
4887   - exit_code = EXIT_ERROR;
4888   - }
4889   - }
4890   - }
4891   - if (! o.attachments_to_add.empty())
4892   - {
4893   - add_attachments(pdf, o, exit_code);
4894   - }
4895   - if (! o.attachments_to_copy.empty())
4896   - {
4897   - copy_attachments(pdf, o, exit_code);
4898   - }
4899   -}
4900   -
4901   -static bool should_remove_unreferenced_resources(QPDF& pdf, Options& o)
4902   -{
4903   - if (o.remove_unreferenced_page_resources == re_no)
4904   - {
4905   - return false;
4906   - }
4907   - else if (o.remove_unreferenced_page_resources == re_yes)
4908   - {
4909   - return true;
4910   - }
4911   -
4912   - // Unreferenced resources are common in files where resources
4913   - // dictionaries are shared across pages. As a heuristic, we look
4914   - // in the file for shared resources dictionaries or shared XObject
4915   - // subkeys of resources dictionaries either on pages or on form
4916   - // XObjects in pages. If we find any, then there is a higher
4917   - // likelihood that the expensive process of finding unreferenced
4918   - // resources is worth it.
4919   -
4920   - // Return true as soon as we find any shared resources.
4921   -
4922   - std::set<QPDFObjGen> resources_seen; // shared resources detection
4923   - std::set<QPDFObjGen> nodes_seen; // loop detection
4924   -
4925   - if (o.verbose)
4926   - {
4927   - std::cout << whoami << ": " << pdf.getFilename()
4928   - << ": checking for shared resources" << std::endl;
4929   - }
4930   -
4931   - std::list<QPDFObjectHandle> queue;
4932   - queue.push_back(pdf.getRoot().getKey("/Pages"));
4933   - while (! queue.empty())
4934   - {
4935   - QPDFObjectHandle node = *queue.begin();
4936   - queue.pop_front();
4937   - QPDFObjGen og = node.getObjGen();
4938   - if (nodes_seen.count(og))
4939   - {
4940   - continue;
4941   - }
4942   - nodes_seen.insert(og);
4943   - QPDFObjectHandle dict = node.isStream() ? node.getDict() : node;
4944   - QPDFObjectHandle kids = dict.getKey("/Kids");
4945   - if (kids.isArray())
4946   - {
4947   - // This is a non-leaf node.
4948   - if (dict.hasKey("/Resources"))
4949   - {
4950   - QTC::TC("qpdf", "qpdf found resources in non-leaf");
4951   - if (o.verbose)
4952   - {
4953   - std::cout << " found resources in non-leaf page node "
4954   - << og.getObj() << " " << og.getGen()
4955   - << std::endl;
4956   - }
4957   - return true;
4958   - }
4959   - int n = kids.getArrayNItems();
4960   - for (int i = 0; i < n; ++i)
4961   - {
4962   - queue.push_back(kids.getArrayItem(i));
4963   - }
4964   - }
4965   - else
4966   - {
4967   - // This is a leaf node or a form XObject.
4968   - QPDFObjectHandle resources = dict.getKey("/Resources");
4969   - if (resources.isIndirect())
4970   - {
4971   - QPDFObjGen resources_og = resources.getObjGen();
4972   - if (resources_seen.count(resources_og))
4973   - {
4974   - QTC::TC("qpdf", "qpdf found shared resources in leaf");
4975   - if (o.verbose)
4976   - {
4977   - std::cout << " found shared resources in leaf node "
4978   - << og.getObj() << " " << og.getGen()
4979   - << ": "
4980   - << resources_og.getObj() << " "
4981   - << resources_og.getGen()
4982   - << std::endl;
4983   - }
4984   - return true;
4985   - }
4986   - resources_seen.insert(resources_og);
4987   - }
4988   - QPDFObjectHandle xobject = (resources.isDictionary() ?
4989   - resources.getKey("/XObject") :
4990   - QPDFObjectHandle::newNull());
4991   - if (xobject.isIndirect())
4992   - {
4993   - QPDFObjGen xobject_og = xobject.getObjGen();
4994   - if (resources_seen.count(xobject_og))
4995   - {
4996   - QTC::TC("qpdf", "qpdf found shared xobject in leaf");
4997   - if (o.verbose)
4998   - {
4999   - std::cout << " found shared xobject in leaf node "
5000   - << og.getObj() << " " << og.getGen()
5001   - << ": "
5002   - << xobject_og.getObj() << " "
5003   - << xobject_og.getGen()
5004   - << std::endl;
5005   - }
5006   - return true;
5007   - }
5008   - resources_seen.insert(xobject_og);
5009   - }
5010   - if (xobject.isDictionary())
5011   - {
5012   - for (auto const& k: xobject.getKeys())
5013   - {
5014   - QPDFObjectHandle xobj = xobject.getKey(k);
5015   - if (xobj.isStream() &&
5016   - xobj.getDict().getKey("/Type").isName() &&
5017   - ("/XObject" ==
5018   - xobj.getDict().getKey("/Type").getName()) &&
5019   - xobj.getDict().getKey("/Subtype").isName() &&
5020   - ("/Form" ==
5021   - xobj.getDict().getKey("/Subtype").getName()))
5022   - {
5023   - queue.push_back(xobj);
5024   - }
5025   - }
5026   - }
5027   - }
5028   - }
5029   -
5030   - if (o.verbose)
5031   - {
5032   - std::cout << whoami << ": no shared resources found" << std::endl;
5033   - }
5034   - return false;
5035   -}
5036   -
5037   -static QPDFObjectHandle added_page(QPDF& pdf, QPDFObjectHandle page)
5038   -{
5039   - QPDFObjectHandle result = page;
5040   - if (page.getOwningQPDF() != &pdf)
5041   - {
5042   - // Calling copyForeignObject on an object we already copied
5043   - // will give us the already existing copy.
5044   - result = pdf.copyForeignObject(page);
5045   - }
5046   - return result;
5047   -}
5048   -
5049   -static QPDFObjectHandle added_page(QPDF& pdf, QPDFPageObjectHelper page)
5050   -{
5051   - return added_page(pdf, page.getObjectHandle());
5052   -}
5053   -
5054   -static void handle_page_specs(
5055   - QPDF& pdf, Options& o, bool& warnings,
5056   - std::vector<PointerHolder<QPDF>>& page_heap)
5057   -{
5058   - // Parse all page specifications and translate them into lists of
5059   - // actual pages.
5060   -
5061   - // Handle "." as a shortcut for the input file
5062   - for (std::vector<PageSpec>::iterator iter = o.page_specs.begin();
5063   - iter != o.page_specs.end(); ++iter)
5064   - {
5065   - PageSpec& page_spec = *iter;
5066   - if (page_spec.filename == ".")
5067   - {
5068   - page_spec.filename = o.infilename;
5069   - }
5070   - }
5071   -
5072   - if (! o.keep_files_open_set)
5073   - {
5074   - // Count the number of distinct files to determine whether we
5075   - // should keep files open or not. Rather than trying to code
5076   - // some portable heuristic based on OS limits, just hard-code
5077   - // this at a given number and allow users to override.
5078   - std::set<std::string> filenames;
5079   - for (std::vector<PageSpec>::iterator iter = o.page_specs.begin();
5080   - iter != o.page_specs.end(); ++iter)
5081   - {
5082   - PageSpec& page_spec = *iter;
5083   - filenames.insert(page_spec.filename);
5084   - }
5085   - if (filenames.size() > o.keep_files_open_threshold)
5086   - {
5087   - QTC::TC("qpdf", "qpdf disable keep files open");
5088   - if (o.verbose)
5089   - {
5090   - std::cout << whoami << ": selecting --keep-open-files=n"
5091   - << std::endl;
5092   - }
5093   - o.keep_files_open = false;
5094   - }
5095   - else
5096   - {
5097   - if (o.verbose)
5098   - {
5099   - std::cout << whoami << ": selecting --keep-open-files=y"
5100   - << std::endl;
5101   - }
5102   - o.keep_files_open = true;
5103   - QTC::TC("qpdf", "qpdf don't disable keep files open");
5104   - }
5105   - }
5106   -
5107   - // Create a QPDF object for each file that we may take pages from.
5108   - std::map<std::string, QPDF*> page_spec_qpdfs;
5109   - std::map<std::string, ClosedFileInputSource*> page_spec_cfis;
5110   - page_spec_qpdfs[o.infilename] = &pdf;
5111   - std::vector<QPDFPageData> parsed_specs;
5112   - std::map<unsigned long long, std::set<QPDFObjGen> > copied_pages;
5113   - for (std::vector<PageSpec>::iterator iter = o.page_specs.begin();
5114   - iter != o.page_specs.end(); ++iter)
5115   - {
5116   - PageSpec& page_spec = *iter;
5117   - if (page_spec_qpdfs.count(page_spec.filename) == 0)
5118   - {
5119   - // Open the PDF file and store the QPDF object. Throw a
5120   - // PointerHolder to the qpdf into a heap so that it
5121   - // survives through copying to the output but gets cleaned up
5122   - // automatically at the end. Do not canonicalize the file
5123   - // name. Using two different paths to refer to the same
5124   - // file is a document workaround for duplicating a page.
5125   - // If you are using this an example of how to do this with
5126   - // the API, you can just create two different QPDF objects
5127   - // to the same underlying file with the same path to
5128   - // achieve the same affect.
5129   - char const* password = page_spec.password;
5130   - if (o.encryption_file && (password == 0) &&
5131   - (page_spec.filename == o.encryption_file))
5132   - {
5133   - QTC::TC("qpdf", "qpdf pages encryption password");
5134   - password = o.encryption_file_password;
5135   - }
5136   - if (o.verbose)
5137   - {
5138   - std::cout << whoami << ": processing "
5139   - << page_spec.filename << std::endl;
5140   - }
5141   - PointerHolder<InputSource> is;
5142   - ClosedFileInputSource* cis = 0;
5143   - if (! o.keep_files_open)
5144   - {
5145   - QTC::TC("qpdf", "qpdf keep files open n");
5146   - cis = new ClosedFileInputSource(page_spec.filename.c_str());
5147   - is = cis;
5148   - cis->stayOpen(true);
5149   - }
5150   - else
5151   - {
5152   - QTC::TC("qpdf", "qpdf keep files open y");
5153   - FileInputSource* fis = new FileInputSource();
5154   - is = fis;
5155   - fis->setFilename(page_spec.filename.c_str());
5156   - }
5157   - PointerHolder<QPDF> qpdf_ph = process_input_source(is, password, o);
5158   - page_heap.push_back(qpdf_ph);
5159   - page_spec_qpdfs[page_spec.filename] = qpdf_ph.getPointer();
5160   - if (cis)
5161   - {
5162   - cis->stayOpen(false);
5163   - page_spec_cfis[page_spec.filename] = cis;
5164   - }
5165   - }
5166   -
5167   - // Read original pages from the PDF, and parse the page range
5168   - // associated with this occurrence of the file.
5169   - parsed_specs.push_back(
5170   - QPDFPageData(page_spec.filename,
5171   - page_spec_qpdfs[page_spec.filename],
5172   - page_spec.range));
5173   - }
5174   -
5175   - std::map<unsigned long long, bool> remove_unreferenced;
5176   - if (o.remove_unreferenced_page_resources != re_no)
5177   - {
5178   - for (std::map<std::string, QPDF*>::iterator iter =
5179   - page_spec_qpdfs.begin();
5180   - iter != page_spec_qpdfs.end(); ++iter)
5181   - {
5182   - std::string const& filename = (*iter).first;
5183   - ClosedFileInputSource* cis = 0;
5184   - if (page_spec_cfis.count(filename))
5185   - {
5186   - cis = page_spec_cfis[filename];
5187   - cis->stayOpen(true);
5188   - }
5189   - QPDF& other(*((*iter).second));
5190   - auto other_uuid = other.getUniqueId();
5191   - if (remove_unreferenced.count(other_uuid) == 0)
5192   - {
5193   - remove_unreferenced[other_uuid] =
5194   - should_remove_unreferenced_resources(other, o);
5195   - }
5196   - if (cis)
5197   - {
5198   - cis->stayOpen(false);
5199   - }
5200   - }
5201   - }
5202   -
5203   - // Clear all pages out of the primary QPDF's pages tree but leave
5204   - // the objects in place in the file so they can be re-added
5205   - // without changing their object numbers. This enables other
5206   - // things in the original file, such as outlines, to continue to
5207   - // work.
5208   - if (o.verbose)
5209   - {
5210   - std::cout << whoami
5211   - << ": removing unreferenced pages from primary input"
5212   - << std::endl;
5213   - }
5214   - QPDFPageDocumentHelper dh(pdf);
5215   - std::vector<QPDFPageObjectHelper> orig_pages = dh.getAllPages();
5216   - for (std::vector<QPDFPageObjectHelper>::iterator iter =
5217   - orig_pages.begin();
5218   - iter != orig_pages.end(); ++iter)
5219   - {
5220   - dh.removePage(*iter);
5221   - }
5222   -
5223   - if (o.collate && (parsed_specs.size() > 1))
5224   - {
5225   - // Collate the pages by selecting one page from each spec in
5226   - // order. When a spec runs out of pages, stop selecting from
5227   - // it.
5228   - std::vector<QPDFPageData> new_parsed_specs;
5229   - size_t nspecs = parsed_specs.size();
5230   - size_t cur_page = 0;
5231   - bool got_pages = true;
5232   - while (got_pages)
5233   - {
5234   - got_pages = false;
5235   - for (size_t i = 0; i < nspecs; ++i)
5236   - {
5237   - QPDFPageData& page_data = parsed_specs.at(i);
5238   - for (size_t j = 0; j < o.collate; ++j)
5239   - {
5240   - if (cur_page + j < page_data.selected_pages.size())
5241   - {
5242   - got_pages = true;
5243   - new_parsed_specs.push_back(
5244   - QPDFPageData(
5245   - page_data,
5246   - page_data.selected_pages.at(cur_page + j)));
5247   - }
5248   - }
5249   - }
5250   - cur_page += o.collate;
5251   - }
5252   - parsed_specs = new_parsed_specs;
5253   - }
5254   -
5255   - // Add all the pages from all the files in the order specified.
5256   - // Keep track of any pages from the original file that we are
5257   - // selecting.
5258   - std::set<int> selected_from_orig;
5259   - std::vector<QPDFObjectHandle> new_labels;
5260   - bool any_page_labels = false;
5261   - int out_pageno = 0;
5262   - std::map<unsigned long long,
5263   - PointerHolder<QPDFAcroFormDocumentHelper>> afdh_map;
5264   - auto this_afdh = get_afdh_for_qpdf(afdh_map, &pdf);
5265   - std::set<QPDFObjGen> referenced_fields;
5266   - for (std::vector<QPDFPageData>::iterator iter =
5267   - parsed_specs.begin();
5268   - iter != parsed_specs.end(); ++iter)
5269   - {
5270   - QPDFPageData& page_data = *iter;
5271   - ClosedFileInputSource* cis = 0;
5272   - if (page_spec_cfis.count(page_data.filename))
5273   - {
5274   - cis = page_spec_cfis[page_data.filename];
5275   - cis->stayOpen(true);
5276   - }
5277   - QPDFPageLabelDocumentHelper pldh(*page_data.qpdf);
5278   - auto other_afdh = get_afdh_for_qpdf(afdh_map, page_data.qpdf);
5279   - if (pldh.hasPageLabels())
5280   - {
5281   - any_page_labels = true;
5282   - }
5283   - if (o.verbose)
5284   - {
5285   - std::cout << whoami << ": adding pages from "
5286   - << page_data.filename << std::endl;
5287   - }
5288   - for (std::vector<int>::iterator pageno_iter =
5289   - page_data.selected_pages.begin();
5290   - pageno_iter != page_data.selected_pages.end();
5291   - ++pageno_iter, ++out_pageno)
5292   - {
5293   - // Pages are specified from 1 but numbered from 0 in the
5294   - // vector
5295   - int pageno = *pageno_iter - 1;
5296   - pldh.getLabelsForPageRange(pageno, pageno, out_pageno,
5297   - new_labels);
5298   - QPDFPageObjectHelper to_copy =
5299   - page_data.orig_pages.at(QIntC::to_size(pageno));
5300   - QPDFObjGen to_copy_og = to_copy.getObjectHandle().getObjGen();
5301   - unsigned long long from_uuid = page_data.qpdf->getUniqueId();
5302   - if (copied_pages[from_uuid].count(to_copy_og))
5303   - {
5304   - QTC::TC("qpdf", "qpdf copy same page more than once",
5305   - (page_data.qpdf == &pdf) ? 0 : 1);
5306   - to_copy = to_copy.shallowCopyPage();
5307   - }
5308   - else
5309   - {
5310   - copied_pages[from_uuid].insert(to_copy_og);
5311   - if (remove_unreferenced[from_uuid])
5312   - {
5313   - to_copy.removeUnreferencedResources();
5314   - }
5315   - }
5316   - dh.addPage(to_copy, false);
5317   - bool first_copy_from_orig = false;
5318   - bool this_file = (page_data.qpdf == &pdf);
5319   - if (this_file)
5320   - {
5321   - // This is a page from the original file. Keep track
5322   - // of the fact that we are using it.
5323   - first_copy_from_orig = (selected_from_orig.count(pageno) == 0);
5324   - selected_from_orig.insert(pageno);
5325   - }
5326   - auto new_page = added_page(pdf, to_copy);
5327   - // Try to avoid gratuitously renaming fields. In the case
5328   - // of where we're just extracting a bunch of pages from
5329   - // the original file and not copying any page more than
5330   - // once, there's no reason to do anything with the fields.
5331   - // Since we don't remove fields from the original file
5332   - // until all copy operations are completed, any foreign
5333   - // pages that conflict with original pages will be
5334   - // adjusted. If we copy any page from the original file
5335   - // more than once, that page would be in conflict with the
5336   - // previous copy of itself.
5337   - if (other_afdh->hasAcroForm() &&
5338   - ((! this_file) || (! first_copy_from_orig)))
5339   - {
5340   - if (! this_file)
5341   - {
5342   - QTC::TC("qpdf", "qpdf copy fields not this file");
5343   - }
5344   - else if (! first_copy_from_orig)
5345   - {
5346   - QTC::TC("qpdf", "qpdf copy fields non-first from orig");
5347   - }
5348   - try
5349   - {
5350   - this_afdh->fixCopiedAnnotations(
5351   - new_page, to_copy.getObjectHandle(), *other_afdh,
5352   - &referenced_fields);
5353   - }
5354   - catch (std::exception& e)
5355   - {
5356   - pdf.warn(
5357   - QPDFExc(qpdf_e_damaged_pdf, pdf.getFilename(),
5358   - "", 0, "Exception caught while fixing copied"
5359   - " annotations. This may be a qpdf bug. " +
5360   - std::string("Exception: ") + e.what()));
5361   - }
5362   - }
5363   - }
5364   - if (page_data.qpdf->anyWarnings())
5365   - {
5366   - warnings = true;
5367   - }
5368   - if (cis)
5369   - {
5370   - cis->stayOpen(false);
5371   - }
5372   - }
5373   - if (any_page_labels)
5374   - {
5375   - QPDFObjectHandle page_labels =
5376   - QPDFObjectHandle::newDictionary();
5377   - page_labels.replaceKey(
5378   - "/Nums", QPDFObjectHandle::newArray(new_labels));
5379   - pdf.getRoot().replaceKey("/PageLabels", page_labels);
5380   - }
5381   -
5382   - // Delete page objects for unused page in primary. This prevents
5383   - // those objects from being preserved by being referred to from
5384   - // other places, such as the outlines dictionary. Also make sure
5385   - // we keep form fields from pages we preserved.
5386   - for (size_t pageno = 0; pageno < orig_pages.size(); ++pageno)
5387   - {
5388   - auto page = orig_pages.at(pageno);
5389   - if (selected_from_orig.count(QIntC::to_int(pageno)))
5390   - {
5391   - for (auto field: this_afdh->getFormFieldsForPage(page))
5392   - {
5393   - QTC::TC("qpdf", "qpdf pages keeping field from original");
5394   - referenced_fields.insert(field.getObjectHandle().getObjGen());
5395   - }
5396   - }
5397   - else
5398   - {
5399   - pdf.replaceObject(
5400   - page.getObjectHandle().getObjGen(),
5401   - QPDFObjectHandle::newNull());
5402   - }
5403   - }
5404   - // Remove unreferenced form fields
5405   - if (this_afdh->hasAcroForm())
5406   - {
5407   - auto acroform = pdf.getRoot().getKey("/AcroForm");
5408   - auto fields = acroform.getKey("/Fields");
5409   - if (fields.isArray())
5410   - {
5411   - auto new_fields = QPDFObjectHandle::newArray();
5412   - if (fields.isIndirect())
5413   - {
5414   - new_fields = pdf.makeIndirectObject(new_fields);
5415   - }
5416   - for (auto const& field: fields.aitems())
5417   - {
5418   - if (referenced_fields.count(field.getObjGen()))
5419   - {
5420   - new_fields.appendItem(field);
5421   - }
5422   - }
5423   - if (new_fields.getArrayNItems() > 0)
5424   - {
5425   - QTC::TC("qpdf", "qpdf keep some fields in pages");
5426   - acroform.replaceKey("/Fields", new_fields);
5427   - }
5428   - else
5429   - {
5430   - QTC::TC("qpdf", "qpdf no more fields in pages");
5431   - pdf.getRoot().removeKey("/AcroForm");
5432   - }
5433   - }
5434   - }
5435   -}
5436   -
5437   -static void handle_rotations(QPDF& pdf, Options& o)
5438   -{
5439   - QPDFPageDocumentHelper dh(pdf);
5440   - std::vector<QPDFPageObjectHelper> pages = dh.getAllPages();
5441   - int npages = QIntC::to_int(pages.size());
5442   - for (std::map<std::string, RotationSpec>::iterator iter =
5443   - o.rotations.begin();
5444   - iter != o.rotations.end(); ++iter)
5445   - {
5446   - std::string const& range = (*iter).first;
5447   - RotationSpec const& rspec = (*iter).second;
5448   - // range has been previously validated
5449   - std::vector<int> to_rotate =
5450   - QUtil::parse_numrange(range.c_str(), npages);
5451   - for (std::vector<int>::iterator i2 = to_rotate.begin();
5452   - i2 != to_rotate.end(); ++i2)
5453   - {
5454   - int pageno = *i2 - 1;
5455   - if ((pageno >= 0) && (pageno < npages))
5456   - {
5457   - pages.at(QIntC::to_size(pageno)).rotatePage(
5458   - rspec.angle, rspec.relative);
5459   - }
5460   - }
5461   - }
5462   -}
5463   -
5464   -static void maybe_fix_write_password(int R, Options& o, std::string& password)
5465   -{
5466   - switch (o.password_mode)
5467   - {
5468   - case pm_bytes:
5469   - QTC::TC("qpdf", "qpdf password mode bytes");
5470   - break;
5471   -
5472   - case pm_hex_bytes:
5473   - QTC::TC("qpdf", "qpdf password mode hex-bytes");
5474   - password = QUtil::hex_decode(password);
5475   - break;
5476   -
5477   - case pm_unicode:
5478   - case pm_auto:
5479   - {
5480   - bool has_8bit_chars;
5481   - bool is_valid_utf8;
5482   - bool is_utf16;
5483   - QUtil::analyze_encoding(password,
5484   - has_8bit_chars,
5485   - is_valid_utf8,
5486   - is_utf16);
5487   - if (! has_8bit_chars)
5488   - {
5489   - return;
5490   - }
5491   - if (o.password_mode == pm_unicode)
5492   - {
5493   - if (! is_valid_utf8)
5494   - {
5495   - QTC::TC("qpdf", "qpdf password not unicode");
5496   - throw std::runtime_error(
5497   - "supplied password is not valid UTF-8");
5498   - }
5499   - if (R < 5)
5500   - {
5501   - std::string encoded;
5502   - if (! QUtil::utf8_to_pdf_doc(password, encoded))
5503   - {
5504   - QTC::TC("qpdf", "qpdf password not encodable");
5505   - throw std::runtime_error(
5506   - "supplied password cannot be encoded for"
5507   - " 40-bit or 128-bit encryption formats");
5508   - }
5509   - password = encoded;
5510   - }
5511   - }
5512   - else
5513   - {
5514   - if ((R < 5) && is_valid_utf8)
5515   - {
5516   - std::string encoded;
5517   - if (QUtil::utf8_to_pdf_doc(password, encoded))
5518   - {
5519   - QTC::TC("qpdf", "qpdf auto-encode password");
5520   - if (o.verbose)
5521   - {
5522   - std::cout
5523   - << whoami
5524   - << ": automatically converting Unicode"
5525   - << " password to single-byte encoding as"
5526   - << " required for 40-bit or 128-bit"
5527   - << " encryption" << std::endl;
5528   - }
5529   - password = encoded;
5530   - }
5531   - else
5532   - {
5533   - QTC::TC("qpdf", "qpdf bytes fallback warning");
5534   - std::cerr
5535   - << whoami << ": WARNING: "
5536   - << "supplied password looks like a Unicode"
5537   - << " password with characters not allowed in"
5538   - << " passwords for 40-bit and 128-bit encryption;"
5539   - << " most readers will not be able to open this"
5540   - << " file with the supplied password."
5541   - << " (Use --password-mode=bytes to suppress this"
5542   - << " warning and use the password anyway.)"
5543   - << std::endl;
5544   - }
5545   - }
5546   - else if ((R >= 5) && (! is_valid_utf8))
5547   - {
5548   - QTC::TC("qpdf", "qpdf invalid utf-8 in auto");
5549   - throw std::runtime_error(
5550   - "supplied password is not a valid Unicode password,"
5551   - " which is required for 256-bit encryption; to"
5552   - " really use this password, rerun with the"
5553   - " --password-mode=bytes option");
5554   - }
5555   - }
5556   - }
5557   - break;
5558   - }
5559   -}
5560   -
5561   -static void set_encryption_options(QPDF& pdf, Options& o, QPDFWriter& w)
5562   -{
5563   - int R = 0;
5564   - if (o.keylen == 40)
5565   - {
5566   - R = 2;
5567   - }
5568   - else if (o.keylen == 128)
5569   - {
5570   - if (o.force_V4 || o.cleartext_metadata || o.use_aes)
5571   - {
5572   - R = 4;
5573   - }
5574   - else
5575   - {
5576   - R = 3;
5577   - }
5578   - }
5579   - else if (o.keylen == 256)
5580   - {
5581   - if (o.force_R5)
5582   - {
5583   - R = 5;
5584   - }
5585   - else
5586   - {
5587   - R = 6;
5588   - }
5589   - }
5590   - else
5591   - {
5592   - throw std::logic_error("bad encryption keylen");
5593   - }
5594   - if ((R > 3) && (o.r3_accessibility == false))
5595   - {
5596   - std::cerr << whoami
5597   - << ": -accessibility=n is ignored for modern"
5598   - << " encryption formats" << std::endl;
5599   - }
5600   - maybe_fix_write_password(R, o, o.user_password);
5601   - maybe_fix_write_password(R, o, o.owner_password);
5602   - if ((R < 4) || ((R == 4) && (! o.use_aes)))
5603   - {
5604   - if (! o.allow_weak_crypto)
5605   - {
5606   - // Do not set exit code to EXIT_WARNING for this case as
5607   - // this does not reflect a potential problem with the
5608   - // input file.
5609   - QTC::TC("qpdf", "qpdf weak crypto warning");
5610   - std::cerr
5611   - << whoami
5612   - << ": writing a file with RC4, a weak cryptographic algorithm"
5613   - << std::endl
5614   - << "Please use 256-bit keys for better security."
5615   - << std::endl
5616   - << "Pass --allow-weak-crypto to suppress this warning."
5617   - << std::endl
5618   - << "This will become an error in a future version of qpdf."
5619   - << std::endl;
5620   - }
5621   - }
5622   - switch (R)
5623   - {
5624   - case 2:
5625   - w.setR2EncryptionParameters(
5626   - o.user_password.c_str(), o.owner_password.c_str(),
5627   - o.r2_print, o.r2_modify, o.r2_extract, o.r2_annotate);
5628   - break;
5629   - case 3:
5630   - w.setR3EncryptionParameters(
5631   - o.user_password.c_str(), o.owner_password.c_str(),
5632   - o.r3_accessibility, o.r3_extract,
5633   - o.r3_assemble, o.r3_annotate_and_form,
5634   - o.r3_form_filling, o.r3_modify_other,
5635   - o.r3_print);
5636   - break;
5637   - case 4:
5638   - w.setR4EncryptionParameters(
5639   - o.user_password.c_str(), o.owner_password.c_str(),
5640   - o.r3_accessibility, o.r3_extract,
5641   - o.r3_assemble, o.r3_annotate_and_form,
5642   - o.r3_form_filling, o.r3_modify_other,
5643   - o.r3_print, !o.cleartext_metadata, o.use_aes);
5644   - break;
5645   - case 5:
5646   - w.setR5EncryptionParameters(
5647   - o.user_password.c_str(), o.owner_password.c_str(),
5648   - o.r3_accessibility, o.r3_extract,
5649   - o.r3_assemble, o.r3_annotate_and_form,
5650   - o.r3_form_filling, o.r3_modify_other,
5651   - o.r3_print, !o.cleartext_metadata);
5652   - break;
5653   - case 6:
5654   - w.setR6EncryptionParameters(
5655   - o.user_password.c_str(), o.owner_password.c_str(),
5656   - o.r3_accessibility, o.r3_extract,
5657   - o.r3_assemble, o.r3_annotate_and_form,
5658   - o.r3_form_filling, o.r3_modify_other,
5659   - o.r3_print, !o.cleartext_metadata);
5660   - break;
5661   - default:
5662   - throw std::logic_error("bad encryption R value");
5663   - break;
5664   - }
5665   -}
5666   -
5667   -static void set_writer_options(QPDF& pdf, Options& o, QPDFWriter& w)
5668   -{
5669   - if (o.compression_level >= 0)
5670   - {
5671   - Pl_Flate::setCompressionLevel(o.compression_level);
5672   - }
5673   - if (o.qdf_mode)
5674   - {
5675   - w.setQDFMode(true);
5676   - }
5677   - if (o.preserve_unreferenced_objects)
5678   - {
5679   - w.setPreserveUnreferencedObjects(true);
5680   - }
5681   - if (o.newline_before_endstream)
5682   - {
5683   - w.setNewlineBeforeEndstream(true);
5684   - }
5685   - if (o.normalize_set)
5686   - {
5687   - w.setContentNormalization(o.normalize);
5688   - }
5689   - if (o.stream_data_set)
5690   - {
5691   - w.setStreamDataMode(o.stream_data_mode);
5692   - }
5693   - if (o.compress_streams_set)
5694   - {
5695   - w.setCompressStreams(o.compress_streams);
5696   - }
5697   - if (o.recompress_flate_set)
5698   - {
5699   - w.setRecompressFlate(o.recompress_flate);
5700   - }
5701   - if (o.decode_level_set)
5702   - {
5703   - w.setDecodeLevel(o.decode_level);
5704   - }
5705   - if (o.decrypt)
5706   - {
5707   - w.setPreserveEncryption(false);
5708   - }
5709   - if (o.deterministic_id)
5710   - {
5711   - w.setDeterministicID(true);
5712   - }
5713   - if (o.static_id)
5714   - {
5715   - w.setStaticID(true);
5716   - }
5717   - if (o.static_aes_iv)
5718   - {
5719   - w.setStaticAesIV(true);
5720   - }
5721   - if (o.suppress_original_object_id)
5722   - {
5723   - w.setSuppressOriginalObjectIDs(true);
5724   - }
5725   - if (o.copy_encryption)
5726   - {
5727   - PointerHolder<QPDF> encryption_pdf =
5728   - process_file(
5729   - o.encryption_file, o.encryption_file_password, o);
5730   - w.copyEncryptionParameters(*encryption_pdf);
5731   - }
5732   - if (o.encrypt)
5733   - {
5734   - set_encryption_options(pdf, o, w);
5735   - }
5736   - if (o.linearize)
5737   - {
5738   - w.setLinearization(true);
5739   - }
5740   - if (! o.linearize_pass1.empty())
5741   - {
5742   - w.setLinearizationPass1Filename(o.linearize_pass1);
5743   - }
5744   - if (o.object_stream_set)
5745   - {
5746   - w.setObjectStreamMode(o.object_stream_mode);
5747   - }
5748   - if (! o.min_version.empty())
5749   - {
5750   - std::string version;
5751   - int extension_level = 0;
5752   - parse_version(o.min_version, version, extension_level);
5753   - w.setMinimumPDFVersion(version, extension_level);
5754   - }
5755   - if (! o.force_version.empty())
5756   - {
5757   - std::string version;
5758   - int extension_level = 0;
5759   - parse_version(o.force_version, version, extension_level);
5760   - w.forcePDFVersion(version, extension_level);
5761   - }
5762   - if (o.progress && o.outfilename)
5763   - {
5764   - w.registerProgressReporter(new ProgressReporter(o.outfilename));
5765   - }
5766   -}
5767   -
5768   -static void do_split_pages(QPDF& pdf, Options& o, bool& warnings)
5769   -{
5770   - // Generate output file pattern
5771   - std::string before;
5772   - std::string after;
5773   - size_t len = strlen(o.outfilename);
5774   - char* num_spot = strstr(const_cast<char*>(o.outfilename), "%d");
5775   - if (num_spot != 0)
5776   - {
5777   - QTC::TC("qpdf", "qpdf split-pages %d");
5778   - before = std::string(o.outfilename,
5779   - QIntC::to_size(num_spot - o.outfilename));
5780   - after = num_spot + 2;
5781   - }
5782   - else if ((len >= 4) &&
5783   - (QUtil::str_compare_nocase(
5784   - o.outfilename + len - 4, ".pdf") == 0))
5785   - {
5786   - QTC::TC("qpdf", "qpdf split-pages .pdf");
5787   - before = std::string(o.outfilename, len - 4) + "-";
5788   - after = o.outfilename + len - 4;
5789   - }
5790   - else
5791   - {
5792   - QTC::TC("qpdf", "qpdf split-pages other");
5793   - before = std::string(o.outfilename) + "-";
5794   - }
5795   -
5796   - if (should_remove_unreferenced_resources(pdf, o))
5797   - {
5798   - QPDFPageDocumentHelper dh(pdf);
5799   - dh.removeUnreferencedResources();
5800   - }
5801   - QPDFPageLabelDocumentHelper pldh(pdf);
5802   - QPDFAcroFormDocumentHelper afdh(pdf);
5803   - std::vector<QPDFObjectHandle> const& pages = pdf.getAllPages();
5804   - size_t pageno_len = QUtil::uint_to_string(pages.size()).length();
5805   - size_t num_pages = pages.size();
5806   - for (size_t i = 0; i < num_pages; i += QIntC::to_size(o.split_pages))
5807   - {
5808   - size_t first = i + 1;
5809   - size_t last = i + QIntC::to_size(o.split_pages);
5810   - if (last > num_pages)
5811   - {
5812   - last = num_pages;
5813   - }
5814   - QPDF outpdf;
5815   - outpdf.emptyPDF();
5816   - PointerHolder<QPDFAcroFormDocumentHelper> out_afdh;
5817   - if (afdh.hasAcroForm())
5818   - {
5819   - out_afdh = new QPDFAcroFormDocumentHelper(outpdf);
5820   - }
5821   - if (o.suppress_warnings)
5822   - {
5823   - outpdf.setSuppressWarnings(true);
5824   - }
5825   - for (size_t pageno = first; pageno <= last; ++pageno)
5826   - {
5827   - QPDFObjectHandle page = pages.at(pageno - 1);
5828   - outpdf.addPage(page, false);
5829   - auto new_page = added_page(outpdf, page);
5830   - if (out_afdh.getPointer())
5831   - {
5832   - QTC::TC("qpdf", "qpdf copy form fields in split_pages");
5833   - try
5834   - {
5835   - out_afdh->fixCopiedAnnotations(new_page, page, afdh);
5836   - }
5837   - catch (std::exception& e)
5838   - {
5839   - pdf.warn(
5840   - QPDFExc(qpdf_e_damaged_pdf, pdf.getFilename(),
5841   - "", 0, "Exception caught while fixing copied"
5842   - " annotations. This may be a qpdf bug." +
5843   - std::string("Exception: ") + e.what()));
5844   - }
5845   - }
5846   - }
5847   - if (pldh.hasPageLabels())
5848   - {
5849   - std::vector<QPDFObjectHandle> labels;
5850   - pldh.getLabelsForPageRange(
5851   - QIntC::to_longlong(first - 1),
5852   - QIntC::to_longlong(last - 1),
5853   - 0, labels);
5854   - QPDFObjectHandle page_labels =
5855   - QPDFObjectHandle::newDictionary();
5856   - page_labels.replaceKey(
5857   - "/Nums", QPDFObjectHandle::newArray(labels));
5858   - outpdf.getRoot().replaceKey("/PageLabels", page_labels);
5859   - }
5860   - std::string page_range =
5861   - QUtil::uint_to_string(first, QIntC::to_int(pageno_len));
5862   - if (o.split_pages > 1)
5863   - {
5864   - page_range += "-" +
5865   - QUtil::uint_to_string(last, QIntC::to_int(pageno_len));
5866   - }
5867   - std::string outfile = before + page_range + after;
5868   - if (QUtil::same_file(o.infilename, outfile.c_str()))
5869   - {
5870   - std::cerr << whoami
5871   - << ": split pages would overwrite input file with "
5872   - << outfile << std::endl;
5873   - exit(EXIT_ERROR);
5874   - }
5875   - QPDFWriter w(outpdf, outfile.c_str());
5876   - set_writer_options(outpdf, o, w);
5877   - w.write();
5878   - if (o.verbose)
5879   - {
5880   - std::cout << whoami << ": wrote file " << outfile << std::endl;
5881   - }
5882   - if (outpdf.anyWarnings())
5883   - {
5884   - warnings = true;
5885   - }
5886   - }
5887   -}
5888   -
5889   -static void write_outfile(QPDF& pdf, Options& o)
5890   -{
5891   - std::string temp_out;
5892   - if (o.replace_input)
5893   - {
5894   - // Append but don't prepend to the path to generate a
5895   - // temporary name. This saves us from having to split the path
5896   - // by directory and non-directory.
5897   - temp_out = std::string(o.infilename) + ".~qpdf-temp#";
5898   - // o.outfilename will be restored to 0 before temp_out
5899   - // goes out of scope.
5900   - o.outfilename = temp_out.c_str();
5901   - }
5902   - else if (strcmp(o.outfilename, "-") == 0)
5903   - {
5904   - o.outfilename = 0;
5905   - }
5906   - {
5907   - // Private scope so QPDFWriter will close the output file
5908   - QPDFWriter w(pdf, o.outfilename);
5909   - set_writer_options(pdf, o, w);
5910   - w.write();
5911   - }
5912   - if (o.verbose && o.outfilename)
5913   - {
5914   - std::cout << whoami << ": wrote file "
5915   - << o.outfilename << std::endl;
5916   - }
5917   - if (o.replace_input)
5918   - {
5919   - o.outfilename = 0;
5920   - }
5921   - if (o.replace_input)
5922   - {
5923   - // We must close the input before we can rename files
5924   - pdf.closeInputSource();
5925   - std::string backup = std::string(o.infilename) + ".~qpdf-orig";
5926   - bool warnings = pdf.anyWarnings();
5927   - if (! warnings)
5928   - {
5929   - backup.append(1, '#');
5930   - }
5931   - QUtil::rename_file(o.infilename, backup.c_str());
5932   - QUtil::rename_file(temp_out.c_str(), o.infilename);
5933   - if (warnings)
5934   - {
5935   - std::cerr << whoami
5936   - << ": there are warnings; original file kept in "
5937   - << backup << std::endl;
5938   - }
5939   - else
5940   - {
5941   - try
5942   - {
5943   - QUtil::remove_file(backup.c_str());
5944   - }
5945   - catch (QPDFSystemError& e)
5946   - {
5947   - std::cerr
5948   - << whoami
5949   - << ": unable to delete original file ("
5950   - << e.what() << ");"
5951   - << " original file left in " << backup
5952   - << ", but the input was successfully replaced"
5953   - << std::endl;
5954   - }
5955   - }
5956   - }
5957   -}
5958   -
5959   -int realmain(int argc, char* argv[])
5960   -{
5961   - whoami = QUtil::getWhoami(argv[0]);
5962   - QUtil::setLineBuf(stdout);
5963   -
5964   - // Remove prefix added by libtool for consistency during testing.
5965   - if (strncmp(whoami, "lt-", 3) == 0)
5966   - {
5967   - whoami += 3;
5968   - }
5969   -
5970   - // ArgParser must stay in scope for the duration of qpdf's run as
5971   - // it holds dynamic memory used for argv.
5972   - Options o;
5973   - ArgParser ap(argc, argv, o);
5974   -
5975   - int exit_code = 0;
5976   - try
5977   - {
5978   - ap.parseOptions();
5979   - PointerHolder<QPDF> pdf_ph;
5980   - try
5981   - {
5982   - pdf_ph = process_file(o.infilename, o.password, o);
5983   - }
5984   - catch (QPDFExc& e)
5985   - {
5986   - if ((e.getErrorCode() == qpdf_e_password) &&
5987   - (o.check_is_encrypted || o.check_requires_password))
5988   - {
5989   - // Allow --is-encrypted and --requires-password to
5990   - // work when an incorrect password is supplied.
5991   - return 0;
5992   - }
5993   - throw e;
5994   - }
5995   - QPDF& pdf = *pdf_ph;
5996   - if (o.check_is_encrypted)
5997   - {
5998   - if (pdf.isEncrypted())
5999   - {
6000   - return 0;
6001   - }
6002   - else
6003   - {
6004   - return EXIT_IS_NOT_ENCRYPTED;
6005   - }
6006   - }
6007   - else if (o.check_requires_password)
6008   - {
6009   - if (pdf.isEncrypted())
6010   - {
6011   - return EXIT_CORRECT_PASSWORD;
6012   - }
6013   - else
6014   - {
6015   - return EXIT_IS_NOT_ENCRYPTED;
6016   - }
6017   - }
6018   - bool other_warnings = false;
6019   - std::vector<PointerHolder<QPDF>> page_heap;
6020   - if (! o.page_specs.empty())
6021   - {
6022   - handle_page_specs(pdf, o, other_warnings, page_heap);
6023   - }
6024   - if (! o.rotations.empty())
6025   - {
6026   - handle_rotations(pdf, o);
6027   - }
6028   - handle_under_overlay(pdf, o);
6029   - handle_transformations(pdf, o, exit_code);
6030   -
6031   - if ((o.outfilename == 0) && (! o.replace_input))
6032   - {
6033   - do_inspection(pdf, o);
6034   - }
6035   - else if (o.split_pages)
6036   - {
6037   - do_split_pages(pdf, o, other_warnings);
6038   - }
6039   - else
6040   - {
6041   - write_outfile(pdf, o);
6042   - }
6043   - if ((! pdf.getWarnings().empty()) || other_warnings)
6044   - {
6045   - if (! o.suppress_warnings)
6046   - {
6047   - std::cerr << whoami << ": operation succeeded with warnings;"
6048   - << " resulting file may have some problems"
6049   - << std::endl;
6050   - }
6051   - // Still return with warning code even if warnings were suppressed.
6052   - if (exit_code == 0)
6053   - {
6054   - exit_code = EXIT_WARNING;
6055   - }
6056   - }
6057   - }
6058   - catch (std::exception& e)
6059   - {
6060   - std::cerr << whoami << ": " << e.what() << std::endl;
6061   - return EXIT_ERROR;
6062 2484 }
6063 2485  
6064   - return exit_code;
  2486 + return (errors ? EXIT_ERROR :
  2487 + warnings ? EXIT_WARNING :
  2488 + 0);
6065 2489 }
6066 2490  
6067 2491 #ifdef WINDOWS_WMAIN
... ...
qpdf/qpdf.testcov
... ... @@ -626,3 +626,8 @@ qpdf-c called qpdf_oh_get_binary_string_value 0
626 626 qpdf-c called qpdf_oh_new_binary_string 0
627 627 qpdf duplicated pages password 0
628 628 qpdf misplaced pages password 0
  629 +qpdf check encrypted encrypted 0
  630 +qpdf check encrypted not encrypted 0
  631 +qpdf check password password incorrect 0
  632 +qpdf check password password correct 0
  633 +qpdf check password not encrypted 0
... ...
qpdf/qtest/qpdf/add-attachments-duplicate.out
1   -qpdf: a.pdf already has an attachment with key = auto-1; use --replace to replace or --key to specify a different key
2   -qpdf: wrote file b.pdf
  1 +qpdf: a.pdf already has attachments with the following keys: auto-1; use --replace to replace or --key to specify a different key
... ...
qpdf/qtest/qpdf/copy-attachments-1.out
1 1 qpdf: copying attachments from a.pdf
2 2 auto-1 -> auto-1
3   -qpdf: copying attachments from a.pdf
4 3 auto-3 -> auto-3
5   -qpdf: copying attachments from a.pdf
6 4 auto-Two -> auto-Two
7 5 qpdf: wrote file b.pdf
... ...
qpdf/qtest/qpdf/copy-attachments-2.out
1 1 qpdf: copying attachments from b.pdf
2 2 auto-1 -> 1-auto-1
3   -qpdf: copying attachments from b.pdf
4 3 auto-3 -> 1-auto-3
5   -qpdf: copying attachments from b.pdf
6 4 auto-Two -> 1-auto-Two
7 5 qpdf: wrote file c.pdf
... ...
qpdf/qtest/qpdf/copy-attachments-duplicate.out
1 1 qpdf: copying attachments from b.pdf
2   -qpdfb.pdf and a.pdf both have attachments with key auto-1; use --prefix with --copy-attachments-from or manually copy individual attachments
3   -qpdf: copying attachments from b.pdf
4   -qpdfb.pdf and a.pdf both have attachments with key auto-3; use --prefix with --copy-attachments-from or manually copy individual attachments
5   -qpdf: copying attachments from b.pdf
6   -qpdfb.pdf and a.pdf both have attachments with key auto-Two; use --prefix with --copy-attachments-from or manually copy individual attachments
7   -qpdf: wrote file c.pdf
  2 +qpdf: a.pdf already has attachments with keys that conflict with attachments from other files: file: b.pdf, key: auto-1; file: b.pdf, key: auto-3; file: b.pdf, key: auto-Two. Use --prefix with --copy-attachments-from or manually copy individual attachments.
... ...