Commit a51a139615ef59e79d586f21bae0098d6025e7a4

Authored by m-holger
1 parent 2c211a51

Split QPDF.cc into QPDF.cc and QPDF_objects.cc

Move methods responsible for loading or keeping track of objects to
QPDF_objects.cc.

The split was part of the reverted #1297. Reintroducing it now makes it
easier/safer to keep the work to refactor the xref and object tables in
sync with main.
libqpdf/CMakeLists.txt
@@ -54,6 +54,7 @@ set(libqpdf_SOURCES @@ -54,6 +54,7 @@ set(libqpdf_SOURCES
54 Pl_StdioFile.cc 54 Pl_StdioFile.cc
55 Pl_String.cc 55 Pl_String.cc
56 Pl_TIFFPredictor.cc 56 Pl_TIFFPredictor.cc
  57 + QPDF.cc
57 QPDFAcroFormDocumentHelper.cc 58 QPDFAcroFormDocumentHelper.cc
58 QPDFAnnotationObjectHelper.cc 59 QPDFAnnotationObjectHelper.cc
59 QPDFArgParser.cc 60 QPDFArgParser.cc
libqpdf/QPDF.cc 0 → 100644
  1 +#include <qpdf/qpdf-config.h> // include first for large file support
  2 +
  3 +#include <qpdf/QPDF_private.hh>
  4 +
  5 +#include <array>
  6 +#include <atomic>
  7 +#include <cstring>
  8 +#include <limits>
  9 +#include <map>
  10 +#include <regex>
  11 +#include <sstream>
  12 +#include <vector>
  13 +
  14 +#include <qpdf/BufferInputSource.hh>
  15 +#include <qpdf/FileInputSource.hh>
  16 +#include <qpdf/InputSource_private.hh>
  17 +#include <qpdf/OffsetInputSource.hh>
  18 +#include <qpdf/Pipeline.hh>
  19 +#include <qpdf/QPDFExc.hh>
  20 +#include <qpdf/QPDFLogger.hh>
  21 +#include <qpdf/QPDFObjectHandle_private.hh>
  22 +#include <qpdf/QPDFObject_private.hh>
  23 +#include <qpdf/QPDFParser.hh>
  24 +#include <qpdf/QTC.hh>
  25 +#include <qpdf/QUtil.hh>
  26 +#include <qpdf/Util.hh>
  27 +
  28 +using namespace qpdf;
  29 +using namespace std::literals;
  30 +
  31 +// This must be a fixed value. This API returns a const reference to it, and the C API relies on its
  32 +// being static as well.
  33 +std::string const QPDF::qpdf_version(QPDF_VERSION);
  34 +
  35 +static char const* EMPTY_PDF = (
  36 + // force line break
  37 + "%PDF-1.3\n"
  38 + "1 0 obj\n"
  39 + "<< /Type /Catalog /Pages 2 0 R >>\n"
  40 + "endobj\n"
  41 + "2 0 obj\n"
  42 + "<< /Type /Pages /Kids [] /Count 0 >>\n"
  43 + "endobj\n"
  44 + "xref\n"
  45 + "0 3\n"
  46 + "0000000000 65535 f \n"
  47 + "0000000009 00000 n \n"
  48 + "0000000058 00000 n \n"
  49 + "trailer << /Size 3 /Root 1 0 R >>\n"
  50 + "startxref\n"
  51 + "110\n"
  52 + "%%EOF\n");
  53 +
  54 +namespace
  55 +{
  56 + class InvalidInputSource: public InputSource
  57 + {
  58 + public:
  59 + ~InvalidInputSource() override = default;
  60 + qpdf_offset_t
  61 + findAndSkipNextEOL() override
  62 + {
  63 + throwException();
  64 + return 0;
  65 + }
  66 + std::string const&
  67 + getName() const override
  68 + {
  69 + static std::string name("closed input source");
  70 + return name;
  71 + }
  72 + qpdf_offset_t
  73 + tell() override
  74 + {
  75 + throwException();
  76 + return 0;
  77 + }
  78 + void
  79 + seek(qpdf_offset_t offset, int whence) override
  80 + {
  81 + throwException();
  82 + }
  83 + void
  84 + rewind() override
  85 + {
  86 + throwException();
  87 + }
  88 + size_t
  89 + read(char* buffer, size_t length) override
  90 + {
  91 + throwException();
  92 + return 0;
  93 + }
  94 + void
  95 + unreadCh(char ch) override
  96 + {
  97 + throwException();
  98 + }
  99 +
  100 + private:
  101 + void
  102 + throwException()
  103 + {
  104 + throw std::logic_error(
  105 + "QPDF operation attempted on a QPDF object with no input "
  106 + "source. QPDF operations are invalid before processFile (or "
  107 + "another process method) or after closeInputSource");
  108 + }
  109 + };
  110 +} // namespace
  111 +
  112 +QPDF::ForeignStreamData::ForeignStreamData(
  113 + std::shared_ptr<EncryptionParameters> encp,
  114 + std::shared_ptr<InputSource> file,
  115 + QPDFObjGen foreign_og,
  116 + qpdf_offset_t offset,
  117 + size_t length,
  118 + QPDFObjectHandle local_dict) :
  119 + encp(encp),
  120 + file(file),
  121 + foreign_og(foreign_og),
  122 + offset(offset),
  123 + length(length),
  124 + local_dict(local_dict)
  125 +{
  126 +}
  127 +
  128 +QPDF::CopiedStreamDataProvider::CopiedStreamDataProvider(QPDF& destination_qpdf) :
  129 + QPDFObjectHandle::StreamDataProvider(true),
  130 + destination_qpdf(destination_qpdf)
  131 +{
  132 +}
  133 +
  134 +bool
  135 +QPDF::CopiedStreamDataProvider::provideStreamData(
  136 + QPDFObjGen const& og, Pipeline* pipeline, bool suppress_warnings, bool will_retry)
  137 +{
  138 + std::shared_ptr<ForeignStreamData> foreign_data = foreign_stream_data[og];
  139 + bool result = false;
  140 + if (foreign_data.get()) {
  141 + result = destination_qpdf.pipeForeignStreamData(
  142 + foreign_data, pipeline, suppress_warnings, will_retry);
  143 + QTC::TC("qpdf", "QPDF copy foreign with data", result ? 0 : 1);
  144 + } else {
  145 + auto foreign_stream = foreign_streams[og];
  146 + result = foreign_stream.pipeStreamData(
  147 + pipeline, nullptr, 0, qpdf_dl_none, suppress_warnings, will_retry);
  148 + QTC::TC("qpdf", "QPDF copy foreign with foreign_stream", result ? 0 : 1);
  149 + }
  150 + return result;
  151 +}
  152 +
  153 +void
  154 +QPDF::CopiedStreamDataProvider::registerForeignStream(
  155 + QPDFObjGen const& local_og, QPDFObjectHandle foreign_stream)
  156 +{
  157 + this->foreign_streams[local_og] = foreign_stream;
  158 +}
  159 +
  160 +void
  161 +QPDF::CopiedStreamDataProvider::registerForeignStream(
  162 + QPDFObjGen const& local_og, std::shared_ptr<ForeignStreamData> foreign_stream)
  163 +{
  164 + this->foreign_stream_data[local_og] = foreign_stream;
  165 +}
  166 +
  167 +QPDF::StringDecrypter::StringDecrypter(QPDF* qpdf, QPDFObjGen og) :
  168 + qpdf(qpdf),
  169 + og(og)
  170 +{
  171 +}
  172 +
  173 +std::string const&
  174 +QPDF::QPDFVersion()
  175 +{
  176 + // The C API relies on this being a static value.
  177 + return QPDF::qpdf_version;
  178 +}
  179 +
  180 +QPDF::Members::Members() :
  181 + log(QPDFLogger::defaultLogger()),
  182 + file(new InvalidInputSource()),
  183 + encp(new EncryptionParameters)
  184 +{
  185 +}
  186 +
  187 +QPDF::QPDF() :
  188 + m(std::make_unique<Members>())
  189 +{
  190 + m->tokenizer.allowEOF();
  191 + // Generate a unique ID. It just has to be unique among all QPDF objects allocated throughout
  192 + // the lifetime of this running application.
  193 + static std::atomic<unsigned long long> unique_id{0};
  194 + m->unique_id = unique_id.fetch_add(1ULL);
  195 +}
  196 +
  197 +// Provide access to disconnect(). Disconnect will in due course be merged into the current ObjCache
  198 +// (future Objects::Entry) to centralize all QPDF access to QPDFObject.
  199 +class Disconnect: BaseHandle
  200 +{
  201 + public:
  202 + Disconnect(std::shared_ptr<QPDFObject> const& obj) :
  203 + BaseHandle(obj)
  204 + {
  205 + }
  206 + void
  207 + disconnect()
  208 + {
  209 + BaseHandle::disconnect(false);
  210 + if (raw_type_code() != ::ot_null) {
  211 + obj->value = QPDF_Destroyed();
  212 + }
  213 + }
  214 +};
  215 +
  216 +QPDF::~QPDF()
  217 +{
  218 + // If two objects are mutually referential (through each object having an array or dictionary
  219 + // that contains an indirect reference to the other), the circular references in the
  220 + // std::shared_ptr objects will prevent the objects from being deleted. Walk through all objects
  221 + // in the object cache, which is those objects that we read from the file, and break all
  222 + // resolved indirect references by replacing them with an internal object type representing that
  223 + // they have been destroyed. Note that we can't break references like this at any time when the
  224 + // QPDF object is active. The call to reset also causes all direct QPDFObjectHandle objects that
  225 + // are reachable from this object to release their association with this QPDF. Direct objects
  226 + // are not destroyed since they can be moved to other QPDF objects safely.
  227 +
  228 + // At this point, obviously no one is still using the QPDF object, but we'll explicitly clear
  229 + // the xref table anyway just to prevent any possibility of resolve() succeeding.
  230 + m->xref_table.clear();
  231 + for (auto const& iter: m->obj_cache) {
  232 + Disconnect(iter.second.object).disconnect();
  233 + }
  234 +}
  235 +
  236 +std::shared_ptr<QPDF>
  237 +QPDF::create()
  238 +{
  239 + return std::make_shared<QPDF>();
  240 +}
  241 +
  242 +void
  243 +QPDF::processFile(char const* filename, char const* password)
  244 +{
  245 + auto* fi = new FileInputSource(filename);
  246 + processInputSource(std::shared_ptr<InputSource>(fi), password);
  247 +}
  248 +
  249 +void
  250 +QPDF::processFile(char const* description, FILE* filep, bool close_file, char const* password)
  251 +{
  252 + auto* fi = new FileInputSource(description, filep, close_file);
  253 + processInputSource(std::shared_ptr<InputSource>(fi), password);
  254 +}
  255 +
  256 +void
  257 +QPDF::processMemoryFile(
  258 + char const* description, char const* buf, size_t length, char const* password)
  259 +{
  260 + processInputSource(
  261 + std::shared_ptr<InputSource>(
  262 + // line-break
  263 + new BufferInputSource(
  264 + description, new Buffer(QUtil::unsigned_char_pointer(buf), length), true)),
  265 + password);
  266 +}
  267 +
  268 +void
  269 +QPDF::processInputSource(std::shared_ptr<InputSource> source, char const* password)
  270 +{
  271 + m->file = source;
  272 + parse(password);
  273 +}
  274 +
  275 +void
  276 +QPDF::closeInputSource()
  277 +{
  278 + m->file = std::shared_ptr<InputSource>(new InvalidInputSource());
  279 +}
  280 +
  281 +void
  282 +QPDF::setPasswordIsHexKey(bool val)
  283 +{
  284 + m->provided_password_is_hex_key = val;
  285 +}
  286 +
  287 +void
  288 +QPDF::emptyPDF()
  289 +{
  290 + processMemoryFile("empty PDF", EMPTY_PDF, strlen(EMPTY_PDF));
  291 +}
  292 +
  293 +void
  294 +QPDF::registerStreamFilter(
  295 + std::string const& filter_name, std::function<std::shared_ptr<QPDFStreamFilter>()> factory)
  296 +{
  297 + qpdf::Stream::registerStreamFilter(filter_name, factory);
  298 +}
  299 +
  300 +void
  301 +QPDF::setIgnoreXRefStreams(bool val)
  302 +{
  303 + m->ignore_xref_streams = val;
  304 +}
  305 +
  306 +std::shared_ptr<QPDFLogger>
  307 +QPDF::getLogger()
  308 +{
  309 + return m->log;
  310 +}
  311 +
  312 +void
  313 +QPDF::setLogger(std::shared_ptr<QPDFLogger> l)
  314 +{
  315 + m->log = l;
  316 +}
  317 +
  318 +void
  319 +QPDF::setOutputStreams(std::ostream* out, std::ostream* err)
  320 +{
  321 + setLogger(QPDFLogger::create());
  322 + m->log->setOutputStreams(out, err);
  323 +}
  324 +
  325 +void
  326 +QPDF::setSuppressWarnings(bool val)
  327 +{
  328 + m->suppress_warnings = val;
  329 +}
  330 +
  331 +void
  332 +QPDF::setMaxWarnings(size_t val)
  333 +{
  334 + m->max_warnings = val;
  335 +}
  336 +
  337 +void
  338 +QPDF::setAttemptRecovery(bool val)
  339 +{
  340 + m->attempt_recovery = val;
  341 +}
  342 +
  343 +void
  344 +QPDF::setImmediateCopyFrom(bool val)
  345 +{
  346 + m->immediate_copy_from = val;
  347 +}
  348 +
  349 +std::vector<QPDFExc>
  350 +QPDF::getWarnings()
  351 +{
  352 + std::vector<QPDFExc> result = m->warnings;
  353 + m->warnings.clear();
  354 + return result;
  355 +}
  356 +
  357 +bool
  358 +QPDF::anyWarnings() const
  359 +{
  360 + return !m->warnings.empty();
  361 +}
  362 +
  363 +size_t
  364 +QPDF::numWarnings() const
  365 +{
  366 + return m->warnings.size();
  367 +}
  368 +
  369 +bool
  370 +QPDF::validatePDFVersion(char const*& p, std::string& version)
  371 +{
  372 + bool valid = util::is_digit(*p);
  373 + if (valid) {
  374 + while (util::is_digit(*p)) {
  375 + version.append(1, *p++);
  376 + }
  377 + if ((*p == '.') && util::is_digit(*(p + 1))) {
  378 + version.append(1, *p++);
  379 + while (util::is_digit(*p)) {
  380 + version.append(1, *p++);
  381 + }
  382 + } else {
  383 + valid = false;
  384 + }
  385 + }
  386 + return valid;
  387 +}
  388 +
  389 +bool
  390 +QPDF::findHeader()
  391 +{
  392 + qpdf_offset_t global_offset = m->file->tell();
  393 + std::string line = m->file->readLine(1024);
  394 + char const* p = line.c_str();
  395 + if (strncmp(p, "%PDF-", 5) != 0) {
  396 + throw std::logic_error("findHeader is not looking at %PDF-");
  397 + }
  398 + p += 5;
  399 + std::string version;
  400 + // Note: The string returned by line.c_str() is always null-terminated. The code below never
  401 + // overruns the buffer because a null character always short-circuits further advancement.
  402 + bool valid = validatePDFVersion(p, version);
  403 + if (valid) {
  404 + m->pdf_version = version;
  405 + if (global_offset != 0) {
  406 + // Empirical evidence strongly suggests that when there is leading material prior to the
  407 + // PDF header, all explicit offsets in the file are such that 0 points to the beginning
  408 + // of the header.
  409 + QTC::TC("qpdf", "QPDF global offset");
  410 + m->file = std::shared_ptr<InputSource>(new OffsetInputSource(m->file, global_offset));
  411 + }
  412 + }
  413 + return valid;
  414 +}
  415 +
  416 +void
  417 +QPDF::warn(QPDFExc const& e)
  418 +{
  419 + if (m->max_warnings > 0 && m->warnings.size() >= m->max_warnings) {
  420 + stopOnError("Too many warnings - file is too badly damaged");
  421 + }
  422 + m->warnings.push_back(e);
  423 + if (!m->suppress_warnings) {
  424 + *m->log->getWarn() << "WARNING: " << m->warnings.back().what() << "\n";
  425 + }
  426 +}
  427 +
  428 +void
  429 +QPDF::warn(
  430 + qpdf_error_code_e error_code,
  431 + std::string const& object,
  432 + qpdf_offset_t offset,
  433 + std::string const& message)
  434 +{
  435 + warn(QPDFExc(error_code, getFilename(), object, offset, message));
  436 +}
  437 +
  438 +QPDFObjectHandle
  439 +QPDF::newReserved()
  440 +{
  441 + return makeIndirectFromQPDFObject(QPDFObject::create<QPDF_Reserved>());
  442 +}
  443 +
  444 +QPDFObjectHandle
  445 +QPDF::newIndirectNull()
  446 +{
  447 + return makeIndirectFromQPDFObject(QPDFObject::create<QPDF_Null>());
  448 +}
  449 +
  450 +QPDFObjectHandle
  451 +QPDF::newStream()
  452 +{
  453 + return makeIndirectObject(
  454 + qpdf::Stream(*this, nextObjGen(), QPDFObjectHandle::newDictionary(), 0, 0));
  455 +}
  456 +
  457 +QPDFObjectHandle
  458 +QPDF::newStream(std::shared_ptr<Buffer> data)
  459 +{
  460 + auto result = newStream();
  461 + result.replaceStreamData(data, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
  462 + return result;
  463 +}
  464 +
  465 +QPDFObjectHandle
  466 +QPDF::newStream(std::string const& data)
  467 +{
  468 + auto result = newStream();
  469 + result.replaceStreamData(data, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
  470 + return result;
  471 +}
  472 +
  473 +QPDFObjectHandle
  474 +QPDF::getObject(int objid, int generation)
  475 +{
  476 + return getObject(QPDFObjGen(objid, generation));
  477 +}
  478 +
  479 +QPDFObjectHandle
  480 +QPDF::getObjectByObjGen(QPDFObjGen og)
  481 +{
  482 + return getObject(og);
  483 +}
  484 +
  485 +QPDFObjectHandle
  486 +QPDF::getObjectByID(int objid, int generation)
  487 +{
  488 + return getObject(QPDFObjGen(objid, generation));
  489 +}
  490 +
  491 +QPDFObjectHandle
  492 +QPDF::copyForeignObject(QPDFObjectHandle foreign)
  493 +{
  494 + // Here's an explanation of what's going on here.
  495 + //
  496 + // A QPDFObjectHandle that is an indirect object has an owning QPDF. The object ID and
  497 + // generation refers to an object in the owning QPDF. When we copy the QPDFObjectHandle from a
  498 + // foreign QPDF into the local QPDF, we have to replace all indirect object references with
  499 + // references to the corresponding object in the local file.
  500 + //
  501 + // To do this, we maintain mappings from foreign object IDs to local object IDs for each foreign
  502 + // QPDF that we are copying from. The mapping is stored in an ObjCopier, which contains a
  503 + // mapping from the foreign ObjGen to the local QPDFObjectHandle.
  504 + //
  505 + // To copy, we do a deep traversal of the foreign object with loop detection to discover all
  506 + // indirect objects that are encountered, stopping at page boundaries. Whenever we encounter an
  507 + // indirect object, we check to see if we have already created a local copy of it. If not, we
  508 + // allocate a "reserved" object (or, for a stream, just a new stream) and store in the map the
  509 + // mapping from the foreign object ID to the new object. While we
  510 + // do this, we keep a list of objects to copy.
  511 + //
  512 + // Once we are done with the traversal, we copy all the objects that we need to copy. However,
  513 + // the copies will contain indirect object IDs that refer to objects in the foreign file. We
  514 + // need to replace them with references to objects in the local file. This is what
  515 + // replaceForeignIndirectObjects does. Once we have created a copy of the foreign object with
  516 + // all the indirect references replaced with new ones in the local context, we can replace the
  517 + // local reserved object with the copy. This mechanism allows us to copy objects with circular
  518 + // references in any order.
  519 +
  520 + // For streams, rather than copying the objects, we set up the stream data to pull from the
  521 + // original stream by using a stream data provider. This is done in a manner that doesn't
  522 + // require the original QPDF object but may require the original source of the stream data with
  523 + // special handling for immediate_copy_from. This logic is also in
  524 + // replaceForeignIndirectObjects.
  525 +
  526 + // Note that we explicitly allow use of copyForeignObject on page objects. It is a documented
  527 + // use case to copy pages this way if the intention is to not update the pages tree.
  528 + if (!foreign.isIndirect()) {
  529 + QTC::TC("qpdf", "QPDF copyForeign direct");
  530 + throw std::logic_error("QPDF::copyForeign called with direct object handle");
  531 + }
  532 + QPDF& other = foreign.getQPDF();
  533 + if (&other == this) {
  534 + QTC::TC("qpdf", "QPDF copyForeign not foreign");
  535 + throw std::logic_error("QPDF::copyForeign called with object from this QPDF");
  536 + }
  537 +
  538 + ObjCopier& obj_copier = m->object_copiers[other.m->unique_id];
  539 + if (!obj_copier.visiting.empty()) {
  540 + throw std::logic_error(
  541 + "obj_copier.visiting is not empty at the beginning of copyForeignObject");
  542 + }
  543 +
  544 + // Make sure we have an object in this file for every referenced object in the old file.
  545 + // obj_copier.object_map maps foreign QPDFObjGen to local objects. For everything new that we
  546 + // have to copy, the local object will be a reservation, unless it is a stream, in which case
  547 + // the local object will already be a stream.
  548 + reserveObjects(foreign, obj_copier, true);
  549 +
  550 + if (!obj_copier.visiting.empty()) {
  551 + throw std::logic_error("obj_copier.visiting is not empty after reserving objects");
  552 + }
  553 +
  554 + // Copy any new objects and replace the reservations.
  555 + for (auto& to_copy: obj_copier.to_copy) {
  556 + QPDFObjectHandle copy = replaceForeignIndirectObjects(to_copy, obj_copier, true);
  557 + if (!to_copy.isStream()) {
  558 + QPDFObjGen og(to_copy.getObjGen());
  559 + replaceReserved(obj_copier.object_map[og], copy);
  560 + }
  561 + }
  562 + obj_copier.to_copy.clear();
  563 +
  564 + auto og = foreign.getObjGen();
  565 + if (!obj_copier.object_map.count(og)) {
  566 + warn(damagedPDF(
  567 + other.getFilename() + " object " + og.unparse(' '),
  568 + foreign.getParsedOffset(),
  569 + "unexpected reference to /Pages object while copying foreign object; replacing with "
  570 + "null"));
  571 + return QPDFObjectHandle::newNull();
  572 + }
  573 + return obj_copier.object_map[foreign.getObjGen()];
  574 +}
  575 +
  576 +void
  577 +QPDF::reserveObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top)
  578 +{
  579 + auto foreign_tc = foreign.getTypeCode();
  580 + if (foreign_tc == ::ot_reserved) {
  581 + throw std::logic_error("QPDF: attempting to copy a foreign reserved object");
  582 + }
  583 +
  584 + if (foreign.isPagesObject()) {
  585 + QTC::TC("qpdf", "QPDF not copying pages object");
  586 + return;
  587 + }
  588 +
  589 + if (foreign.isIndirect()) {
  590 + QPDFObjGen foreign_og(foreign.getObjGen());
  591 + if (!obj_copier.visiting.add(foreign_og)) {
  592 + QTC::TC("qpdf", "QPDF loop reserving objects");
  593 + return;
  594 + }
  595 + if (obj_copier.object_map.count(foreign_og) > 0) {
  596 + QTC::TC("qpdf", "QPDF already reserved object");
  597 + if (!(top && foreign.isPageObject() && obj_copier.object_map[foreign_og].isNull())) {
  598 + obj_copier.visiting.erase(foreign);
  599 + return;
  600 + }
  601 + } else {
  602 + QTC::TC("qpdf", "QPDF copy indirect");
  603 + obj_copier.object_map[foreign_og] =
  604 + foreign.isStream() ? newStream() : newIndirectNull();
  605 + if ((!top) && foreign.isPageObject()) {
  606 + QTC::TC("qpdf", "QPDF not crossing page boundary");
  607 + obj_copier.visiting.erase(foreign_og);
  608 + return;
  609 + }
  610 + }
  611 + obj_copier.to_copy.push_back(foreign);
  612 + }
  613 +
  614 + if (foreign_tc == ::ot_array) {
  615 + QTC::TC("qpdf", "QPDF reserve array");
  616 + for (auto const& item: foreign.as_array()) {
  617 + reserveObjects(item, obj_copier, false);
  618 + }
  619 + } else if (foreign_tc == ::ot_dictionary) {
  620 + QTC::TC("qpdf", "QPDF reserve dictionary");
  621 + for (auto const& item: foreign.as_dictionary()) {
  622 + if (!item.second.null()) {
  623 + reserveObjects(item.second, obj_copier, false);
  624 + }
  625 + }
  626 + } else if (foreign_tc == ::ot_stream) {
  627 + QTC::TC("qpdf", "QPDF reserve stream");
  628 + reserveObjects(foreign.getDict(), obj_copier, false);
  629 + }
  630 +
  631 + obj_copier.visiting.erase(foreign);
  632 +}
  633 +
  634 +QPDFObjectHandle
  635 +QPDF::replaceForeignIndirectObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top)
  636 +{
  637 + auto foreign_tc = foreign.getTypeCode();
  638 + QPDFObjectHandle result;
  639 + if ((!top) && foreign.isIndirect()) {
  640 + QTC::TC("qpdf", "QPDF replace indirect");
  641 + auto mapping = obj_copier.object_map.find(foreign.getObjGen());
  642 + if (mapping == obj_copier.object_map.end()) {
  643 + // This case would occur if this is a reference to a Pages object that we didn't
  644 + // traverse into.
  645 + QTC::TC("qpdf", "QPDF replace foreign indirect with null");
  646 + result = QPDFObjectHandle::newNull();
  647 + } else {
  648 + result = mapping->second;
  649 + }
  650 + } else if (foreign_tc == ::ot_array) {
  651 + QTC::TC("qpdf", "QPDF replace array");
  652 + result = QPDFObjectHandle::newArray();
  653 + for (auto const& item: foreign.as_array()) {
  654 + result.appendItem(replaceForeignIndirectObjects(item, obj_copier, false));
  655 + }
  656 + } else if (foreign_tc == ::ot_dictionary) {
  657 + QTC::TC("qpdf", "QPDF replace dictionary");
  658 + result = QPDFObjectHandle::newDictionary();
  659 + for (auto const& [key, value]: foreign.as_dictionary()) {
  660 + if (!value.null()) {
  661 + result.replaceKey(key, replaceForeignIndirectObjects(value, obj_copier, false));
  662 + }
  663 + }
  664 + } else if (foreign_tc == ::ot_stream) {
  665 + QTC::TC("qpdf", "QPDF replace stream");
  666 + result = obj_copier.object_map[foreign.getObjGen()];
  667 + QPDFObjectHandle dict = result.getDict();
  668 + QPDFObjectHandle old_dict = foreign.getDict();
  669 + for (auto const& [key, value]: old_dict.as_dictionary()) {
  670 + if (!value.null()) {
  671 + dict.replaceKey(key, replaceForeignIndirectObjects(value, obj_copier, false));
  672 + }
  673 + }
  674 + copyStreamData(result, foreign);
  675 + } else {
  676 + foreign.assertScalar();
  677 + result = foreign;
  678 + result.makeDirect();
  679 + }
  680 +
  681 + if (top && (!result.isStream()) && result.isIndirect()) {
  682 + throw std::logic_error("replacement for foreign object is indirect");
  683 + }
  684 +
  685 + return result;
  686 +}
  687 +
  688 +void
  689 +QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign)
  690 +{
  691 + // This method was originally written for copying foreign streams, but it is used by
  692 + // QPDFObjectHandle to copy streams from the same QPDF object as well.
  693 +
  694 + QPDFObjectHandle dict = result.getDict();
  695 + QPDFObjectHandle old_dict = foreign.getDict();
  696 + if (m->copied_stream_data_provider == nullptr) {
  697 + m->copied_stream_data_provider = new CopiedStreamDataProvider(*this);
  698 + m->copied_streams =
  699 + std::shared_ptr<QPDFObjectHandle::StreamDataProvider>(m->copied_stream_data_provider);
  700 + }
  701 + QPDFObjGen local_og(result.getObjGen());
  702 + // Copy information from the foreign stream so we can pipe its data later without keeping the
  703 + // original QPDF object around.
  704 +
  705 + QPDF& foreign_stream_qpdf =
  706 + foreign.getQPDF("unable to retrieve owning qpdf from foreign stream");
  707 +
  708 + auto stream = foreign.as_stream();
  709 + if (!stream) {
  710 + throw std::logic_error("unable to retrieve underlying stream object from foreign stream");
  711 + }
  712 + std::shared_ptr<Buffer> stream_buffer = stream.getStreamDataBuffer();
  713 + if ((foreign_stream_qpdf.m->immediate_copy_from) && (stream_buffer == nullptr)) {
  714 + // Pull the stream data into a buffer before attempting the copy operation. Do it on the
  715 + // source stream so that if the source stream is copied multiple times, we don't have to
  716 + // keep duplicating the memory.
  717 + QTC::TC("qpdf", "QPDF immediate copy stream data");
  718 + foreign.replaceStreamData(
  719 + foreign.getRawStreamData(),
  720 + old_dict.getKey("/Filter"),
  721 + old_dict.getKey("/DecodeParms"));
  722 + stream_buffer = stream.getStreamDataBuffer();
  723 + }
  724 + std::shared_ptr<QPDFObjectHandle::StreamDataProvider> stream_provider =
  725 + stream.getStreamDataProvider();
  726 + if (stream_buffer.get()) {
  727 + QTC::TC("qpdf", "QPDF copy foreign stream with buffer");
  728 + result.replaceStreamData(
  729 + stream_buffer, dict.getKey("/Filter"), dict.getKey("/DecodeParms"));
  730 + } else if (stream_provider.get()) {
  731 + // In this case, the remote stream's QPDF must stay in scope.
  732 + QTC::TC("qpdf", "QPDF copy foreign stream with provider");
  733 + m->copied_stream_data_provider->registerForeignStream(local_og, foreign);
  734 + result.replaceStreamData(
  735 + m->copied_streams, dict.getKey("/Filter"), dict.getKey("/DecodeParms"));
  736 + } else {
  737 + auto foreign_stream_data = std::make_shared<ForeignStreamData>(
  738 + foreign_stream_qpdf.m->encp,
  739 + foreign_stream_qpdf.m->file,
  740 + foreign,
  741 + foreign.getParsedOffset(),
  742 + stream.getLength(),
  743 + dict);
  744 + m->copied_stream_data_provider->registerForeignStream(local_og, foreign_stream_data);
  745 + result.replaceStreamData(
  746 + m->copied_streams, dict.getKey("/Filter"), dict.getKey("/DecodeParms"));
  747 + }
  748 +}
  749 +
  750 +unsigned long long
  751 +QPDF::getUniqueId() const
  752 +{
  753 + return m->unique_id;
  754 +}
  755 +
  756 +std::string
  757 +QPDF::getFilename() const
  758 +{
  759 + return m->file->getName();
  760 +}
  761 +
  762 +PDFVersion
  763 +QPDF::getVersionAsPDFVersion()
  764 +{
  765 + int major = 1;
  766 + int minor = 3;
  767 + int extension_level = getExtensionLevel();
  768 +
  769 + std::regex v("^[[:space:]]*([0-9]+)\\.([0-9]+)");
  770 + std::smatch match;
  771 + if (std::regex_search(m->pdf_version, match, v)) {
  772 + major = QUtil::string_to_int(match[1].str().c_str());
  773 + minor = QUtil::string_to_int(match[2].str().c_str());
  774 + }
  775 +
  776 + return {major, minor, extension_level};
  777 +}
  778 +
  779 +std::string
  780 +QPDF::getPDFVersion() const
  781 +{
  782 + return m->pdf_version;
  783 +}
  784 +
  785 +int
  786 +QPDF::getExtensionLevel()
  787 +{
  788 + int result = 0;
  789 + QPDFObjectHandle obj = getRoot();
  790 + if (obj.hasKey("/Extensions")) {
  791 + obj = obj.getKey("/Extensions");
  792 + if (obj.isDictionary() && obj.hasKey("/ADBE")) {
  793 + obj = obj.getKey("/ADBE");
  794 + if (obj.isDictionary() && obj.hasKey("/ExtensionLevel")) {
  795 + obj = obj.getKey("/ExtensionLevel");
  796 + if (obj.isInteger()) {
  797 + result = obj.getIntValueAsInt();
  798 + }
  799 + }
  800 + }
  801 + }
  802 + return result;
  803 +}
  804 +
  805 +QPDFObjectHandle
  806 +QPDF::getTrailer()
  807 +{
  808 + return m->trailer;
  809 +}
  810 +
  811 +QPDFObjectHandle
  812 +QPDF::getRoot()
  813 +{
  814 + QPDFObjectHandle root = m->trailer.getKey("/Root");
  815 + if (!root.isDictionary()) {
  816 + throw damagedPDF("", 0, "unable to find /Root dictionary");
  817 + } else if (
  818 + // Check_mode is an interim solution to request #810 pending a more comprehensive review of
  819 + // the approach to more extensive checks and warning levels.
  820 + m->check_mode && !root.getKey("/Type").isNameAndEquals("/Catalog")) {
  821 + warn(damagedPDF("", 0, "catalog /Type entry missing or invalid"));
  822 + root.replaceKey("/Type", "/Catalog"_qpdf);
  823 + }
  824 + return root;
  825 +}
  826 +
  827 +std::map<QPDFObjGen, QPDFXRefEntry>
  828 +QPDF::getXRefTable()
  829 +{
  830 + return getXRefTableInternal();
  831 +}
  832 +
  833 +std::map<QPDFObjGen, QPDFXRefEntry> const&
  834 +QPDF::getXRefTableInternal()
  835 +{
  836 + if (!m->parsed) {
  837 + throw std::logic_error("QPDF::getXRefTable called before parsing.");
  838 + }
  839 +
  840 + return m->xref_table;
  841 +}
  842 +
  843 +bool
  844 +QPDF::pipeStreamData(
  845 + std::shared_ptr<EncryptionParameters> encp,
  846 + std::shared_ptr<InputSource> file,
  847 + QPDF& qpdf_for_warning,
  848 + QPDFObjGen og,
  849 + qpdf_offset_t offset,
  850 + size_t length,
  851 + QPDFObjectHandle stream_dict,
  852 + Pipeline* pipeline,
  853 + bool suppress_warnings,
  854 + bool will_retry)
  855 +{
  856 + std::unique_ptr<Pipeline> to_delete;
  857 + if (encp->encrypted) {
  858 + decryptStream(encp, file, qpdf_for_warning, pipeline, og, stream_dict, to_delete);
  859 + }
  860 +
  861 + bool attempted_finish = false;
  862 + try {
  863 + auto buf = file->read(length, offset);
  864 + if (buf.size() != length) {
  865 + throw damagedPDF(
  866 + *file, "", offset + toO(buf.size()), "unexpected EOF reading stream data");
  867 + }
  868 + pipeline->write(buf.data(), length);
  869 + attempted_finish = true;
  870 + pipeline->finish();
  871 + return true;
  872 + } catch (QPDFExc& e) {
  873 + if (!suppress_warnings) {
  874 + qpdf_for_warning.warn(e);
  875 + }
  876 + } catch (std::exception& e) {
  877 + if (!suppress_warnings) {
  878 + QTC::TC("qpdf", "QPDF decoding error warning");
  879 + qpdf_for_warning.warn(
  880 + // line-break
  881 + damagedPDF(
  882 + *file,
  883 + "",
  884 + file->getLastOffset(),
  885 + ("error decoding stream data for object " + og.unparse(' ') + ": " +
  886 + e.what())));
  887 + if (will_retry) {
  888 + qpdf_for_warning.warn(
  889 + // line-break
  890 + damagedPDF(
  891 + *file,
  892 + "",
  893 + file->getLastOffset(),
  894 + "stream will be re-processed without filtering to avoid data loss"));
  895 + }
  896 + }
  897 + }
  898 + if (!attempted_finish) {
  899 + try {
  900 + pipeline->finish();
  901 + } catch (std::exception&) {
  902 + // ignore
  903 + }
  904 + }
  905 + return false;
  906 +}
  907 +
  908 +bool
  909 +QPDF::pipeStreamData(
  910 + QPDFObjGen og,
  911 + qpdf_offset_t offset,
  912 + size_t length,
  913 + QPDFObjectHandle stream_dict,
  914 + Pipeline* pipeline,
  915 + bool suppress_warnings,
  916 + bool will_retry)
  917 +{
  918 + return pipeStreamData(
  919 + m->encp,
  920 + m->file,
  921 + *this,
  922 + og,
  923 + offset,
  924 + length,
  925 + stream_dict,
  926 + pipeline,
  927 + suppress_warnings,
  928 + will_retry);
  929 +}
  930 +
  931 +bool
  932 +QPDF::pipeForeignStreamData(
  933 + std::shared_ptr<ForeignStreamData> foreign,
  934 + Pipeline* pipeline,
  935 + bool suppress_warnings,
  936 + bool will_retry)
  937 +{
  938 + if (foreign->encp->encrypted) {
  939 + QTC::TC("qpdf", "QPDF pipe foreign encrypted stream");
  940 + }
  941 + return pipeStreamData(
  942 + foreign->encp,
  943 + foreign->file,
  944 + *this,
  945 + foreign->foreign_og,
  946 + foreign->offset,
  947 + foreign->length,
  948 + foreign->local_dict,
  949 + pipeline,
  950 + suppress_warnings,
  951 + will_retry);
  952 +}
  953 +
  954 +// Throw a generic exception when we lack context for something more specific. New code should not
  955 +// use this. This method exists to improve somewhat from calling assert in very old code.
  956 +void
  957 +QPDF::stopOnError(std::string const& message)
  958 +{
  959 + throw damagedPDF("", message);
  960 +}
  961 +
  962 +// Return an exception of type qpdf_e_damaged_pdf.
  963 +QPDFExc
  964 +QPDF::damagedPDF(
  965 + InputSource& input, std::string const& object, qpdf_offset_t offset, std::string const& message)
  966 +{
  967 + return {qpdf_e_damaged_pdf, input.getName(), object, offset, message};
  968 +}
  969 +
  970 +// Return an exception of type qpdf_e_damaged_pdf. The object is taken from
  971 +// m->last_object_description.
  972 +QPDFExc
  973 +QPDF::damagedPDF(InputSource& input, qpdf_offset_t offset, std::string const& message)
  974 +{
  975 + return damagedPDF(input, m->last_object_description, offset, message);
  976 +}
  977 +
  978 +// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file.
  979 +QPDFExc
  980 +QPDF::damagedPDF(std::string const& object, qpdf_offset_t offset, std::string const& message)
  981 +{
  982 + return {qpdf_e_damaged_pdf, m->file->getName(), object, offset, message};
  983 +}
  984 +
  985 +// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the
  986 +// offset from .m->file->getLastOffset().
  987 +QPDFExc
  988 +QPDF::damagedPDF(std::string const& object, std::string const& message)
  989 +{
  990 + return damagedPDF(object, m->file->getLastOffset(), message);
  991 +}
  992 +
  993 +// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the object
  994 +// from .m->last_object_description.
  995 +QPDFExc
  996 +QPDF::damagedPDF(qpdf_offset_t offset, std::string const& message)
  997 +{
  998 + return damagedPDF(m->last_object_description, offset, message);
  999 +}
  1000 +
  1001 +// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file, the object
  1002 +// from m->last_object_description and the offset from m->file->getLastOffset().
  1003 +QPDFExc
  1004 +QPDF::damagedPDF(std::string const& message)
  1005 +{
  1006 + return damagedPDF(m->last_object_description, m->file->getLastOffset(), message);
  1007 +}
  1008 +
  1009 +bool
  1010 +QPDF::everCalledGetAllPages() const
  1011 +{
  1012 + return m->ever_called_get_all_pages;
  1013 +}
  1014 +
  1015 +bool
  1016 +QPDF::everPushedInheritedAttributesToPages() const
  1017 +{
  1018 + return m->ever_pushed_inherited_attributes_to_pages;
  1019 +}
  1020 +
  1021 +void
  1022 +QPDF::removeSecurityRestrictions()
  1023 +{
  1024 + auto root = getRoot();
  1025 + root.removeKey("/Perms");
  1026 + auto acroform = root.getKey("/AcroForm");
  1027 + if (acroform.isDictionary() && acroform.hasKey("/SigFlags")) {
  1028 + acroform.replaceKey("/SigFlags", QPDFObjectHandle::newInteger(0));
  1029 + }
  1030 +}
libqpdf/QPDF_objects.cc
@@ -28,29 +28,6 @@ @@ -28,29 +28,6 @@
28 using namespace qpdf; 28 using namespace qpdf;
29 using namespace std::literals; 29 using namespace std::literals;
30 30
31 -// This must be a fixed value. This API returns a const reference to it, and the C API relies on its  
32 -// being static as well.  
33 -std::string const QPDF::qpdf_version(QPDF_VERSION);  
34 -  
35 -static char const* EMPTY_PDF = (  
36 - // force line break  
37 - "%PDF-1.3\n"  
38 - "1 0 obj\n"  
39 - "<< /Type /Catalog /Pages 2 0 R >>\n"  
40 - "endobj\n"  
41 - "2 0 obj\n"  
42 - "<< /Type /Pages /Kids [] /Count 0 >>\n"  
43 - "endobj\n"  
44 - "xref\n"  
45 - "0 3\n"  
46 - "0000000000 65535 f \n"  
47 - "0000000009 00000 n \n"  
48 - "0000000058 00000 n \n"  
49 - "trailer << /Size 3 /Root 1 0 R >>\n"  
50 - "startxref\n"  
51 - "110\n"  
52 - "%%EOF\n");  
53 -  
54 namespace 31 namespace
55 { 32 {
56 class InvalidInputSource: public InputSource 33 class InvalidInputSource: public InputSource
@@ -109,310 +86,6 @@ namespace @@ -109,310 +86,6 @@ namespace
109 }; 86 };
110 } // namespace 87 } // namespace
111 88
112 -QPDF::ForeignStreamData::ForeignStreamData(  
113 - std::shared_ptr<EncryptionParameters> encp,  
114 - std::shared_ptr<InputSource> file,  
115 - QPDFObjGen foreign_og,  
116 - qpdf_offset_t offset,  
117 - size_t length,  
118 - QPDFObjectHandle local_dict) :  
119 - encp(encp),  
120 - file(file),  
121 - foreign_og(foreign_og),  
122 - offset(offset),  
123 - length(length),  
124 - local_dict(local_dict)  
125 -{  
126 -}  
127 -  
128 -QPDF::CopiedStreamDataProvider::CopiedStreamDataProvider(QPDF& destination_qpdf) :  
129 - QPDFObjectHandle::StreamDataProvider(true),  
130 - destination_qpdf(destination_qpdf)  
131 -{  
132 -}  
133 -  
134 -bool  
135 -QPDF::CopiedStreamDataProvider::provideStreamData(  
136 - QPDFObjGen const& og, Pipeline* pipeline, bool suppress_warnings, bool will_retry)  
137 -{  
138 - std::shared_ptr<ForeignStreamData> foreign_data = foreign_stream_data[og];  
139 - bool result = false;  
140 - if (foreign_data.get()) {  
141 - result = destination_qpdf.pipeForeignStreamData(  
142 - foreign_data, pipeline, suppress_warnings, will_retry);  
143 - QTC::TC("qpdf", "QPDF copy foreign with data", result ? 0 : 1);  
144 - } else {  
145 - auto foreign_stream = foreign_streams[og];  
146 - result = foreign_stream.pipeStreamData(  
147 - pipeline, nullptr, 0, qpdf_dl_none, suppress_warnings, will_retry);  
148 - QTC::TC("qpdf", "QPDF copy foreign with foreign_stream", result ? 0 : 1);  
149 - }  
150 - return result;  
151 -}  
152 -  
153 -void  
154 -QPDF::CopiedStreamDataProvider::registerForeignStream(  
155 - QPDFObjGen const& local_og, QPDFObjectHandle foreign_stream)  
156 -{  
157 - this->foreign_streams[local_og] = foreign_stream;  
158 -}  
159 -  
160 -void  
161 -QPDF::CopiedStreamDataProvider::registerForeignStream(  
162 - QPDFObjGen const& local_og, std::shared_ptr<ForeignStreamData> foreign_stream)  
163 -{  
164 - this->foreign_stream_data[local_og] = foreign_stream;  
165 -}  
166 -  
167 -QPDF::StringDecrypter::StringDecrypter(QPDF* qpdf, QPDFObjGen og) :  
168 - qpdf(qpdf),  
169 - og(og)  
170 -{  
171 -}  
172 -  
173 -std::string const&  
174 -QPDF::QPDFVersion()  
175 -{  
176 - // The C API relies on this being a static value.  
177 - return QPDF::qpdf_version;  
178 -}  
179 -  
180 -QPDF::Members::Members() :  
181 - log(QPDFLogger::defaultLogger()),  
182 - file(new InvalidInputSource()),  
183 - encp(new EncryptionParameters)  
184 -{  
185 -}  
186 -  
187 -QPDF::QPDF() :  
188 - m(std::make_unique<Members>())  
189 -{  
190 - m->tokenizer.allowEOF();  
191 - // Generate a unique ID. It just has to be unique among all QPDF objects allocated throughout  
192 - // the lifetime of this running application.  
193 - static std::atomic<unsigned long long> unique_id{0};  
194 - m->unique_id = unique_id.fetch_add(1ULL);  
195 -}  
196 -  
197 -// Provide access to disconnect(). Disconnect will in due course be merged into the current ObjCache  
198 -// (future Objects::Entry) to centralize all QPDF access to QPDFObject.  
199 -class Disconnect: BaseHandle  
200 -{  
201 - public:  
202 - Disconnect(std::shared_ptr<QPDFObject> const& obj) :  
203 - BaseHandle(obj)  
204 - {  
205 - }  
206 - void  
207 - disconnect()  
208 - {  
209 - BaseHandle::disconnect(false);  
210 - if (raw_type_code() != ::ot_null) {  
211 - obj->value = QPDF_Destroyed();  
212 - }  
213 - }  
214 -};  
215 -  
216 -QPDF::~QPDF()  
217 -{  
218 - // If two objects are mutually referential (through each object having an array or dictionary  
219 - // that contains an indirect reference to the other), the circular references in the  
220 - // std::shared_ptr objects will prevent the objects from being deleted. Walk through all objects  
221 - // in the object cache, which is those objects that we read from the file, and break all  
222 - // resolved indirect references by replacing them with an internal object type representing that  
223 - // they have been destroyed. Note that we can't break references like this at any time when the  
224 - // QPDF object is active. The call to reset also causes all direct QPDFObjectHandle objects that  
225 - // are reachable from this object to release their association with this QPDF. Direct objects  
226 - // are not destroyed since they can be moved to other QPDF objects safely.  
227 -  
228 - // At this point, obviously no one is still using the QPDF object, but we'll explicitly clear  
229 - // the xref table anyway just to prevent any possibility of resolve() succeeding.  
230 - m->xref_table.clear();  
231 - for (auto const& iter: m->obj_cache) {  
232 - Disconnect(iter.second.object).disconnect();  
233 - }  
234 -}  
235 -  
236 -std::shared_ptr<QPDF>  
237 -QPDF::create()  
238 -{  
239 - return std::make_shared<QPDF>();  
240 -}  
241 -  
242 -void  
243 -QPDF::processFile(char const* filename, char const* password)  
244 -{  
245 - auto* fi = new FileInputSource(filename);  
246 - processInputSource(std::shared_ptr<InputSource>(fi), password);  
247 -}  
248 -  
249 -void  
250 -QPDF::processFile(char const* description, FILE* filep, bool close_file, char const* password)  
251 -{  
252 - auto* fi = new FileInputSource(description, filep, close_file);  
253 - processInputSource(std::shared_ptr<InputSource>(fi), password);  
254 -}  
255 -  
256 -void  
257 -QPDF::processMemoryFile(  
258 - char const* description, char const* buf, size_t length, char const* password)  
259 -{  
260 - processInputSource(  
261 - std::shared_ptr<InputSource>(  
262 - // line-break  
263 - new BufferInputSource(  
264 - description, new Buffer(QUtil::unsigned_char_pointer(buf), length), true)),  
265 - password);  
266 -}  
267 -  
268 -void  
269 -QPDF::processInputSource(std::shared_ptr<InputSource> source, char const* password)  
270 -{  
271 - m->file = source;  
272 - parse(password);  
273 -}  
274 -  
275 -void  
276 -QPDF::closeInputSource()  
277 -{  
278 - m->file = std::shared_ptr<InputSource>(new InvalidInputSource());  
279 -}  
280 -  
281 -void  
282 -QPDF::setPasswordIsHexKey(bool val)  
283 -{  
284 - m->provided_password_is_hex_key = val;  
285 -}  
286 -  
287 -void  
288 -QPDF::emptyPDF()  
289 -{  
290 - processMemoryFile("empty PDF", EMPTY_PDF, strlen(EMPTY_PDF));  
291 -}  
292 -  
293 -void  
294 -QPDF::registerStreamFilter(  
295 - std::string const& filter_name, std::function<std::shared_ptr<QPDFStreamFilter>()> factory)  
296 -{  
297 - qpdf::Stream::registerStreamFilter(filter_name, factory);  
298 -}  
299 -  
300 -void  
301 -QPDF::setIgnoreXRefStreams(bool val)  
302 -{  
303 - m->ignore_xref_streams = val;  
304 -}  
305 -  
306 -std::shared_ptr<QPDFLogger>  
307 -QPDF::getLogger()  
308 -{  
309 - return m->log;  
310 -}  
311 -  
312 -void  
313 -QPDF::setLogger(std::shared_ptr<QPDFLogger> l)  
314 -{  
315 - m->log = l;  
316 -}  
317 -  
318 -void  
319 -QPDF::setOutputStreams(std::ostream* out, std::ostream* err)  
320 -{  
321 - setLogger(QPDFLogger::create());  
322 - m->log->setOutputStreams(out, err);  
323 -}  
324 -  
325 -void  
326 -QPDF::setSuppressWarnings(bool val)  
327 -{  
328 - m->suppress_warnings = val;  
329 -}  
330 -  
331 -void  
332 -QPDF::setMaxWarnings(size_t val)  
333 -{  
334 - m->max_warnings = val;  
335 -}  
336 -  
337 -void  
338 -QPDF::setAttemptRecovery(bool val)  
339 -{  
340 - m->attempt_recovery = val;  
341 -}  
342 -  
343 -void  
344 -QPDF::setImmediateCopyFrom(bool val)  
345 -{  
346 - m->immediate_copy_from = val;  
347 -}  
348 -  
349 -std::vector<QPDFExc>  
350 -QPDF::getWarnings()  
351 -{  
352 - std::vector<QPDFExc> result = m->warnings;  
353 - m->warnings.clear();  
354 - return result;  
355 -}  
356 -  
357 -bool  
358 -QPDF::anyWarnings() const  
359 -{  
360 - return !m->warnings.empty();  
361 -}  
362 -  
363 -size_t  
364 -QPDF::numWarnings() const  
365 -{  
366 - return m->warnings.size();  
367 -}  
368 -  
369 -bool  
370 -QPDF::validatePDFVersion(char const*& p, std::string& version)  
371 -{  
372 - bool valid = util::is_digit(*p);  
373 - if (valid) {  
374 - while (util::is_digit(*p)) {  
375 - version.append(1, *p++);  
376 - }  
377 - if ((*p == '.') && util::is_digit(*(p + 1))) {  
378 - version.append(1, *p++);  
379 - while (util::is_digit(*p)) {  
380 - version.append(1, *p++);  
381 - }  
382 - } else {  
383 - valid = false;  
384 - }  
385 - }  
386 - return valid;  
387 -}  
388 -  
389 -bool  
390 -QPDF::findHeader()  
391 -{  
392 - qpdf_offset_t global_offset = m->file->tell();  
393 - std::string line = m->file->readLine(1024);  
394 - char const* p = line.c_str();  
395 - if (strncmp(p, "%PDF-", 5) != 0) {  
396 - throw std::logic_error("findHeader is not looking at %PDF-");  
397 - }  
398 - p += 5;  
399 - std::string version;  
400 - // Note: The string returned by line.c_str() is always null-terminated. The code below never  
401 - // overruns the buffer because a null character always short-circuits further advancement.  
402 - bool valid = validatePDFVersion(p, version);  
403 - if (valid) {  
404 - m->pdf_version = version;  
405 - if (global_offset != 0) {  
406 - // Empirical evidence strongly suggests that when there is leading material prior to the  
407 - // PDF header, all explicit offsets in the file are such that 0 points to the beginning  
408 - // of the header.  
409 - QTC::TC("qpdf", "QPDF global offset");  
410 - m->file = std::shared_ptr<InputSource>(new OffsetInputSource(m->file, global_offset));  
411 - }  
412 - }  
413 - return valid;  
414 -}  
415 -  
416 bool 89 bool
417 QPDF::findStartxref() 90 QPDF::findStartxref()
418 { 91 {
@@ -500,28 +173,6 @@ QPDF::inParse(bool v) @@ -500,28 +173,6 @@ QPDF::inParse(bool v)
500 } 173 }
501 174
502 void 175 void
503 -QPDF::warn(QPDFExc const& e)  
504 -{  
505 - if (m->max_warnings > 0 && m->warnings.size() >= m->max_warnings) {  
506 - stopOnError("Too many warnings - file is too badly damaged");  
507 - }  
508 - m->warnings.push_back(e);  
509 - if (!m->suppress_warnings) {  
510 - *m->log->getWarn() << "WARNING: " << m->warnings.back().what() << "\n";  
511 - }  
512 -}  
513 -  
514 -void  
515 -QPDF::warn(  
516 - qpdf_error_code_e error_code,  
517 - std::string const& object,  
518 - qpdf_offset_t offset,  
519 - std::string const& message)  
520 -{  
521 - warn(QPDFExc(error_code, getFilename(), object, offset, message));  
522 -}  
523 -  
524 -void  
525 QPDF::setTrailer(QPDFObjectHandle obj) 176 QPDF::setTrailer(QPDFObjectHandle obj)
526 { 177 {
527 if (m->trailer) { 178 if (m->trailer) {
@@ -2135,41 +1786,6 @@ QPDF::makeIndirectObject(QPDFObjectHandle oh) @@ -2135,41 +1786,6 @@ QPDF::makeIndirectObject(QPDFObjectHandle oh)
2135 return makeIndirectFromQPDFObject(oh.getObj()); 1786 return makeIndirectFromQPDFObject(oh.getObj());
2136 } 1787 }
2137 1788
2138 -QPDFObjectHandle  
2139 -QPDF::newReserved()  
2140 -{  
2141 - return makeIndirectFromQPDFObject(QPDFObject::create<QPDF_Reserved>());  
2142 -}  
2143 -  
2144 -QPDFObjectHandle  
2145 -QPDF::newIndirectNull()  
2146 -{  
2147 - return makeIndirectFromQPDFObject(QPDFObject::create<QPDF_Null>());  
2148 -}  
2149 -  
2150 -QPDFObjectHandle  
2151 -QPDF::newStream()  
2152 -{  
2153 - return makeIndirectObject(  
2154 - qpdf::Stream(*this, nextObjGen(), QPDFObjectHandle::newDictionary(), 0, 0));  
2155 -}  
2156 -  
2157 -QPDFObjectHandle  
2158 -QPDF::newStream(std::shared_ptr<Buffer> data)  
2159 -{  
2160 - auto result = newStream();  
2161 - result.replaceStreamData(data, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());  
2162 - return result;  
2163 -}  
2164 -  
2165 -QPDFObjectHandle  
2166 -QPDF::newStream(std::string const& data)  
2167 -{  
2168 - auto result = newStream();  
2169 - result.replaceStreamData(data, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());  
2170 - return result;  
2171 -}  
2172 -  
2173 std::shared_ptr<QPDFObject> 1789 std::shared_ptr<QPDFObject>
2174 QPDF::getObjectForParser(int id, int gen, bool parse_pdf) 1790 QPDF::getObjectForParser(int id, int gen, bool parse_pdf)
2175 { 1791 {
@@ -2216,24 +1832,6 @@ QPDF::getObject(QPDFObjGen og) @@ -2216,24 +1832,6 @@ QPDF::getObject(QPDFObjGen og)
2216 } 1832 }
2217 } 1833 }
2218 1834
2219 -QPDFObjectHandle  
2220 -QPDF::getObject(int objid, int generation)  
2221 -{  
2222 - return getObject(QPDFObjGen(objid, generation));  
2223 -}  
2224 -  
2225 -QPDFObjectHandle  
2226 -QPDF::getObjectByObjGen(QPDFObjGen og)  
2227 -{  
2228 - return getObject(og);  
2229 -}  
2230 -  
2231 -QPDFObjectHandle  
2232 -QPDF::getObjectByID(int objid, int generation)  
2233 -{  
2234 - return getObject(QPDFObjGen(objid, generation));  
2235 -}  
2236 -  
2237 void 1835 void
2238 QPDF::replaceObject(int objid, int generation, QPDFObjectHandle oh) 1836 QPDF::replaceObject(int objid, int generation, QPDFObjectHandle oh)
2239 { 1837 {
@@ -2273,265 +1871,6 @@ QPDF::replaceReserved(QPDFObjectHandle reserved, QPDFObjectHandle replacement) @@ -2273,265 +1871,6 @@ QPDF::replaceReserved(QPDFObjectHandle reserved, QPDFObjectHandle replacement)
2273 replaceObject(reserved.getObjGen(), replacement); 1871 replaceObject(reserved.getObjGen(), replacement);
2274 } 1872 }
2275 1873
2276 -QPDFObjectHandle  
2277 -QPDF::copyForeignObject(QPDFObjectHandle foreign)  
2278 -{  
2279 - // Here's an explanation of what's going on here.  
2280 - //  
2281 - // A QPDFObjectHandle that is an indirect object has an owning QPDF. The object ID and  
2282 - // generation refers to an object in the owning QPDF. When we copy the QPDFObjectHandle from a  
2283 - // foreign QPDF into the local QPDF, we have to replace all indirect object references with  
2284 - // references to the corresponding object in the local file.  
2285 - //  
2286 - // To do this, we maintain mappings from foreign object IDs to local object IDs for each foreign  
2287 - // QPDF that we are copying from. The mapping is stored in an ObjCopier, which contains a  
2288 - // mapping from the foreign ObjGen to the local QPDFObjectHandle.  
2289 - //  
2290 - // To copy, we do a deep traversal of the foreign object with loop detection to discover all  
2291 - // indirect objects that are encountered, stopping at page boundaries. Whenever we encounter an  
2292 - // indirect object, we check to see if we have already created a local copy of it. If not, we  
2293 - // allocate a "reserved" object (or, for a stream, just a new stream) and store in the map the  
2294 - // mapping from the foreign object ID to the new object. While we  
2295 - // do this, we keep a list of objects to copy.  
2296 - //  
2297 - // Once we are done with the traversal, we copy all the objects that we need to copy. However,  
2298 - // the copies will contain indirect object IDs that refer to objects in the foreign file. We  
2299 - // need to replace them with references to objects in the local file. This is what  
2300 - // replaceForeignIndirectObjects does. Once we have created a copy of the foreign object with  
2301 - // all the indirect references replaced with new ones in the local context, we can replace the  
2302 - // local reserved object with the copy. This mechanism allows us to copy objects with circular  
2303 - // references in any order.  
2304 -  
2305 - // For streams, rather than copying the objects, we set up the stream data to pull from the  
2306 - // original stream by using a stream data provider. This is done in a manner that doesn't  
2307 - // require the original QPDF object but may require the original source of the stream data with  
2308 - // special handling for immediate_copy_from. This logic is also in  
2309 - // replaceForeignIndirectObjects.  
2310 -  
2311 - // Note that we explicitly allow use of copyForeignObject on page objects. It is a documented  
2312 - // use case to copy pages this way if the intention is to not update the pages tree.  
2313 - if (!foreign.isIndirect()) {  
2314 - QTC::TC("qpdf", "QPDF copyForeign direct");  
2315 - throw std::logic_error("QPDF::copyForeign called with direct object handle");  
2316 - }  
2317 - QPDF& other = foreign.getQPDF();  
2318 - if (&other == this) {  
2319 - QTC::TC("qpdf", "QPDF copyForeign not foreign");  
2320 - throw std::logic_error("QPDF::copyForeign called with object from this QPDF");  
2321 - }  
2322 -  
2323 - ObjCopier& obj_copier = m->object_copiers[other.m->unique_id];  
2324 - if (!obj_copier.visiting.empty()) {  
2325 - throw std::logic_error(  
2326 - "obj_copier.visiting is not empty at the beginning of copyForeignObject");  
2327 - }  
2328 -  
2329 - // Make sure we have an object in this file for every referenced object in the old file.  
2330 - // obj_copier.object_map maps foreign QPDFObjGen to local objects. For everything new that we  
2331 - // have to copy, the local object will be a reservation, unless it is a stream, in which case  
2332 - // the local object will already be a stream.  
2333 - reserveObjects(foreign, obj_copier, true);  
2334 -  
2335 - if (!obj_copier.visiting.empty()) {  
2336 - throw std::logic_error("obj_copier.visiting is not empty after reserving objects");  
2337 - }  
2338 -  
2339 - // Copy any new objects and replace the reservations.  
2340 - for (auto& to_copy: obj_copier.to_copy) {  
2341 - QPDFObjectHandle copy = replaceForeignIndirectObjects(to_copy, obj_copier, true);  
2342 - if (!to_copy.isStream()) {  
2343 - QPDFObjGen og(to_copy.getObjGen());  
2344 - replaceReserved(obj_copier.object_map[og], copy);  
2345 - }  
2346 - }  
2347 - obj_copier.to_copy.clear();  
2348 -  
2349 - auto og = foreign.getObjGen();  
2350 - if (!obj_copier.object_map.count(og)) {  
2351 - warn(damagedPDF(  
2352 - other.getFilename() + " object " + og.unparse(' '),  
2353 - foreign.getParsedOffset(),  
2354 - "unexpected reference to /Pages object while copying foreign object; replacing with "  
2355 - "null"));  
2356 - return QPDFObjectHandle::newNull();  
2357 - }  
2358 - return obj_copier.object_map[foreign.getObjGen()];  
2359 -}  
2360 -  
2361 -void  
2362 -QPDF::reserveObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top)  
2363 -{  
2364 - auto foreign_tc = foreign.getTypeCode();  
2365 - if (foreign_tc == ::ot_reserved) {  
2366 - throw std::logic_error("QPDF: attempting to copy a foreign reserved object");  
2367 - }  
2368 -  
2369 - if (foreign.isPagesObject()) {  
2370 - QTC::TC("qpdf", "QPDF not copying pages object");  
2371 - return;  
2372 - }  
2373 -  
2374 - if (foreign.isIndirect()) {  
2375 - QPDFObjGen foreign_og(foreign.getObjGen());  
2376 - if (!obj_copier.visiting.add(foreign_og)) {  
2377 - QTC::TC("qpdf", "QPDF loop reserving objects");  
2378 - return;  
2379 - }  
2380 - if (obj_copier.object_map.count(foreign_og) > 0) {  
2381 - QTC::TC("qpdf", "QPDF already reserved object");  
2382 - if (!(top && foreign.isPageObject() && obj_copier.object_map[foreign_og].isNull())) {  
2383 - obj_copier.visiting.erase(foreign);  
2384 - return;  
2385 - }  
2386 - } else {  
2387 - QTC::TC("qpdf", "QPDF copy indirect");  
2388 - obj_copier.object_map[foreign_og] =  
2389 - foreign.isStream() ? newStream() : newIndirectNull();  
2390 - if ((!top) && foreign.isPageObject()) {  
2391 - QTC::TC("qpdf", "QPDF not crossing page boundary");  
2392 - obj_copier.visiting.erase(foreign_og);  
2393 - return;  
2394 - }  
2395 - }  
2396 - obj_copier.to_copy.push_back(foreign);  
2397 - }  
2398 -  
2399 - if (foreign_tc == ::ot_array) {  
2400 - QTC::TC("qpdf", "QPDF reserve array");  
2401 - for (auto const& item: foreign.as_array()) {  
2402 - reserveObjects(item, obj_copier, false);  
2403 - }  
2404 - } else if (foreign_tc == ::ot_dictionary) {  
2405 - QTC::TC("qpdf", "QPDF reserve dictionary");  
2406 - for (auto const& item: foreign.as_dictionary()) {  
2407 - if (!item.second.null()) {  
2408 - reserveObjects(item.second, obj_copier, false);  
2409 - }  
2410 - }  
2411 - } else if (foreign_tc == ::ot_stream) {  
2412 - QTC::TC("qpdf", "QPDF reserve stream");  
2413 - reserveObjects(foreign.getDict(), obj_copier, false);  
2414 - }  
2415 -  
2416 - obj_copier.visiting.erase(foreign);  
2417 -}  
2418 -  
2419 -QPDFObjectHandle  
2420 -QPDF::replaceForeignIndirectObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top)  
2421 -{  
2422 - auto foreign_tc = foreign.getTypeCode();  
2423 - QPDFObjectHandle result;  
2424 - if ((!top) && foreign.isIndirect()) {  
2425 - QTC::TC("qpdf", "QPDF replace indirect");  
2426 - auto mapping = obj_copier.object_map.find(foreign.getObjGen());  
2427 - if (mapping == obj_copier.object_map.end()) {  
2428 - // This case would occur if this is a reference to a Pages object that we didn't  
2429 - // traverse into.  
2430 - QTC::TC("qpdf", "QPDF replace foreign indirect with null");  
2431 - result = QPDFObjectHandle::newNull();  
2432 - } else {  
2433 - result = mapping->second;  
2434 - }  
2435 - } else if (foreign_tc == ::ot_array) {  
2436 - QTC::TC("qpdf", "QPDF replace array");  
2437 - result = QPDFObjectHandle::newArray();  
2438 - for (auto const& item: foreign.as_array()) {  
2439 - result.appendItem(replaceForeignIndirectObjects(item, obj_copier, false));  
2440 - }  
2441 - } else if (foreign_tc == ::ot_dictionary) {  
2442 - QTC::TC("qpdf", "QPDF replace dictionary");  
2443 - result = QPDFObjectHandle::newDictionary();  
2444 - for (auto const& [key, value]: foreign.as_dictionary()) {  
2445 - if (!value.null()) {  
2446 - result.replaceKey(key, replaceForeignIndirectObjects(value, obj_copier, false));  
2447 - }  
2448 - }  
2449 - } else if (foreign_tc == ::ot_stream) {  
2450 - QTC::TC("qpdf", "QPDF replace stream");  
2451 - result = obj_copier.object_map[foreign.getObjGen()];  
2452 - QPDFObjectHandle dict = result.getDict();  
2453 - QPDFObjectHandle old_dict = foreign.getDict();  
2454 - for (auto const& [key, value]: old_dict.as_dictionary()) {  
2455 - if (!value.null()) {  
2456 - dict.replaceKey(key, replaceForeignIndirectObjects(value, obj_copier, false));  
2457 - }  
2458 - }  
2459 - copyStreamData(result, foreign);  
2460 - } else {  
2461 - foreign.assertScalar();  
2462 - result = foreign;  
2463 - result.makeDirect();  
2464 - }  
2465 -  
2466 - if (top && (!result.isStream()) && result.isIndirect()) {  
2467 - throw std::logic_error("replacement for foreign object is indirect");  
2468 - }  
2469 -  
2470 - return result;  
2471 -}  
2472 -  
2473 -void  
2474 -QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign)  
2475 -{  
2476 - // This method was originally written for copying foreign streams, but it is used by  
2477 - // QPDFObjectHandle to copy streams from the same QPDF object as well.  
2478 -  
2479 - QPDFObjectHandle dict = result.getDict();  
2480 - QPDFObjectHandle old_dict = foreign.getDict();  
2481 - if (m->copied_stream_data_provider == nullptr) {  
2482 - m->copied_stream_data_provider = new CopiedStreamDataProvider(*this);  
2483 - m->copied_streams =  
2484 - std::shared_ptr<QPDFObjectHandle::StreamDataProvider>(m->copied_stream_data_provider);  
2485 - }  
2486 - QPDFObjGen local_og(result.getObjGen());  
2487 - // Copy information from the foreign stream so we can pipe its data later without keeping the  
2488 - // original QPDF object around.  
2489 -  
2490 - QPDF& foreign_stream_qpdf =  
2491 - foreign.getQPDF("unable to retrieve owning qpdf from foreign stream");  
2492 -  
2493 - auto stream = foreign.as_stream();  
2494 - if (!stream) {  
2495 - throw std::logic_error("unable to retrieve underlying stream object from foreign stream");  
2496 - }  
2497 - std::shared_ptr<Buffer> stream_buffer = stream.getStreamDataBuffer();  
2498 - if ((foreign_stream_qpdf.m->immediate_copy_from) && (stream_buffer == nullptr)) {  
2499 - // Pull the stream data into a buffer before attempting the copy operation. Do it on the  
2500 - // source stream so that if the source stream is copied multiple times, we don't have to  
2501 - // keep duplicating the memory.  
2502 - QTC::TC("qpdf", "QPDF immediate copy stream data");  
2503 - foreign.replaceStreamData(  
2504 - foreign.getRawStreamData(),  
2505 - old_dict.getKey("/Filter"),  
2506 - old_dict.getKey("/DecodeParms"));  
2507 - stream_buffer = stream.getStreamDataBuffer();  
2508 - }  
2509 - std::shared_ptr<QPDFObjectHandle::StreamDataProvider> stream_provider =  
2510 - stream.getStreamDataProvider();  
2511 - if (stream_buffer.get()) {  
2512 - QTC::TC("qpdf", "QPDF copy foreign stream with buffer");  
2513 - result.replaceStreamData(  
2514 - stream_buffer, dict.getKey("/Filter"), dict.getKey("/DecodeParms"));  
2515 - } else if (stream_provider.get()) {  
2516 - // In this case, the remote stream's QPDF must stay in scope.  
2517 - QTC::TC("qpdf", "QPDF copy foreign stream with provider");  
2518 - m->copied_stream_data_provider->registerForeignStream(local_og, foreign);  
2519 - result.replaceStreamData(  
2520 - m->copied_streams, dict.getKey("/Filter"), dict.getKey("/DecodeParms"));  
2521 - } else {  
2522 - auto foreign_stream_data = std::make_shared<ForeignStreamData>(  
2523 - foreign_stream_qpdf.m->encp,  
2524 - foreign_stream_qpdf.m->file,  
2525 - foreign,  
2526 - foreign.getParsedOffset(),  
2527 - stream.getLength(),  
2528 - dict);  
2529 - m->copied_stream_data_provider->registerForeignStream(local_og, foreign_stream_data);  
2530 - result.replaceStreamData(  
2531 - m->copied_streams, dict.getKey("/Filter"), dict.getKey("/DecodeParms"));  
2532 - }  
2533 -}  
2534 -  
2535 void 1874 void
2536 QPDF::swapObjects(int objid1, int generation1, int objid2, int generation2) 1875 QPDF::swapObjects(int objid1, int generation1, int objid2, int generation2)
2537 { 1876 {
@@ -2547,99 +1886,6 @@ QPDF::swapObjects(QPDFObjGen og1, QPDFObjGen og2) @@ -2547,99 +1886,6 @@ QPDF::swapObjects(QPDFObjGen og1, QPDFObjGen og2)
2547 m->obj_cache[og1].object->swapWith(m->obj_cache[og2].object); 1886 m->obj_cache[og1].object->swapWith(m->obj_cache[og2].object);
2548 } 1887 }
2549 1888
2550 -unsigned long long  
2551 -QPDF::getUniqueId() const  
2552 -{  
2553 - return m->unique_id;  
2554 -}  
2555 -  
2556 -std::string  
2557 -QPDF::getFilename() const  
2558 -{  
2559 - return m->file->getName();  
2560 -}  
2561 -  
2562 -PDFVersion  
2563 -QPDF::getVersionAsPDFVersion()  
2564 -{  
2565 - int major = 1;  
2566 - int minor = 3;  
2567 - int extension_level = getExtensionLevel();  
2568 -  
2569 - std::regex v("^[[:space:]]*([0-9]+)\\.([0-9]+)");  
2570 - std::smatch match;  
2571 - if (std::regex_search(m->pdf_version, match, v)) {  
2572 - major = QUtil::string_to_int(match[1].str().c_str());  
2573 - minor = QUtil::string_to_int(match[2].str().c_str());  
2574 - }  
2575 -  
2576 - return {major, minor, extension_level};  
2577 -}  
2578 -  
2579 -std::string  
2580 -QPDF::getPDFVersion() const  
2581 -{  
2582 - return m->pdf_version;  
2583 -}  
2584 -  
2585 -int  
2586 -QPDF::getExtensionLevel()  
2587 -{  
2588 - int result = 0;  
2589 - QPDFObjectHandle obj = getRoot();  
2590 - if (obj.hasKey("/Extensions")) {  
2591 - obj = obj.getKey("/Extensions");  
2592 - if (obj.isDictionary() && obj.hasKey("/ADBE")) {  
2593 - obj = obj.getKey("/ADBE");  
2594 - if (obj.isDictionary() && obj.hasKey("/ExtensionLevel")) {  
2595 - obj = obj.getKey("/ExtensionLevel");  
2596 - if (obj.isInteger()) {  
2597 - result = obj.getIntValueAsInt();  
2598 - }  
2599 - }  
2600 - }  
2601 - }  
2602 - return result;  
2603 -}  
2604 -  
2605 -QPDFObjectHandle  
2606 -QPDF::getTrailer()  
2607 -{  
2608 - return m->trailer;  
2609 -}  
2610 -  
2611 -QPDFObjectHandle  
2612 -QPDF::getRoot()  
2613 -{  
2614 - QPDFObjectHandle root = m->trailer.getKey("/Root");  
2615 - if (!root.isDictionary()) {  
2616 - throw damagedPDF("", 0, "unable to find /Root dictionary");  
2617 - } else if (  
2618 - // Check_mode is an interim solution to request #810 pending a more comprehensive review of  
2619 - // the approach to more extensive checks and warning levels.  
2620 - m->check_mode && !root.getKey("/Type").isNameAndEquals("/Catalog")) {  
2621 - warn(damagedPDF("", 0, "catalog /Type entry missing or invalid"));  
2622 - root.replaceKey("/Type", "/Catalog"_qpdf);  
2623 - }  
2624 - return root;  
2625 -}  
2626 -  
2627 -std::map<QPDFObjGen, QPDFXRefEntry>  
2628 -QPDF::getXRefTable()  
2629 -{  
2630 - return getXRefTableInternal();  
2631 -}  
2632 -  
2633 -std::map<QPDFObjGen, QPDFXRefEntry> const&  
2634 -QPDF::getXRefTableInternal()  
2635 -{  
2636 - if (!m->parsed) {  
2637 - throw std::logic_error("QPDF::getXRefTable called before parsing.");  
2638 - }  
2639 -  
2640 - return m->xref_table;  
2641 -}  
2642 -  
2643 size_t 1889 size_t
2644 QPDF::tableSize() 1890 QPDF::tableSize()
2645 { 1891 {
@@ -2769,192 +2015,3 @@ QPDF::getCompressibleObjGens() @@ -2769,192 +2015,3 @@ QPDF::getCompressibleObjGens()
2769 2015
2770 return result; 2016 return result;
2771 } 2017 }
2772 -  
2773 -bool  
2774 -QPDF::pipeStreamData(  
2775 - std::shared_ptr<EncryptionParameters> encp,  
2776 - std::shared_ptr<InputSource> file,  
2777 - QPDF& qpdf_for_warning,  
2778 - QPDFObjGen og,  
2779 - qpdf_offset_t offset,  
2780 - size_t length,  
2781 - QPDFObjectHandle stream_dict,  
2782 - Pipeline* pipeline,  
2783 - bool suppress_warnings,  
2784 - bool will_retry)  
2785 -{  
2786 - std::unique_ptr<Pipeline> to_delete;  
2787 - if (encp->encrypted) {  
2788 - decryptStream(encp, file, qpdf_for_warning, pipeline, og, stream_dict, to_delete);  
2789 - }  
2790 -  
2791 - bool attempted_finish = false;  
2792 - try {  
2793 - auto buf = file->read(length, offset);  
2794 - if (buf.size() != length) {  
2795 - throw damagedPDF(  
2796 - *file, "", offset + toO(buf.size()), "unexpected EOF reading stream data");  
2797 - }  
2798 - pipeline->write(buf.data(), length);  
2799 - attempted_finish = true;  
2800 - pipeline->finish();  
2801 - return true;  
2802 - } catch (QPDFExc& e) {  
2803 - if (!suppress_warnings) {  
2804 - qpdf_for_warning.warn(e);  
2805 - }  
2806 - } catch (std::exception& e) {  
2807 - if (!suppress_warnings) {  
2808 - QTC::TC("qpdf", "QPDF decoding error warning");  
2809 - qpdf_for_warning.warn(  
2810 - // line-break  
2811 - damagedPDF(  
2812 - *file,  
2813 - "",  
2814 - file->getLastOffset(),  
2815 - ("error decoding stream data for object " + og.unparse(' ') + ": " +  
2816 - e.what())));  
2817 - if (will_retry) {  
2818 - qpdf_for_warning.warn(  
2819 - // line-break  
2820 - damagedPDF(  
2821 - *file,  
2822 - "",  
2823 - file->getLastOffset(),  
2824 - "stream will be re-processed without filtering to avoid data loss"));  
2825 - }  
2826 - }  
2827 - }  
2828 - if (!attempted_finish) {  
2829 - try {  
2830 - pipeline->finish();  
2831 - } catch (std::exception&) {  
2832 - // ignore  
2833 - }  
2834 - }  
2835 - return false;  
2836 -}  
2837 -  
2838 -bool  
2839 -QPDF::pipeStreamData(  
2840 - QPDFObjGen og,  
2841 - qpdf_offset_t offset,  
2842 - size_t length,  
2843 - QPDFObjectHandle stream_dict,  
2844 - Pipeline* pipeline,  
2845 - bool suppress_warnings,  
2846 - bool will_retry)  
2847 -{  
2848 - return pipeStreamData(  
2849 - m->encp,  
2850 - m->file,  
2851 - *this,  
2852 - og,  
2853 - offset,  
2854 - length,  
2855 - stream_dict,  
2856 - pipeline,  
2857 - suppress_warnings,  
2858 - will_retry);  
2859 -}  
2860 -  
2861 -bool  
2862 -QPDF::pipeForeignStreamData(  
2863 - std::shared_ptr<ForeignStreamData> foreign,  
2864 - Pipeline* pipeline,  
2865 - bool suppress_warnings,  
2866 - bool will_retry)  
2867 -{  
2868 - if (foreign->encp->encrypted) {  
2869 - QTC::TC("qpdf", "QPDF pipe foreign encrypted stream");  
2870 - }  
2871 - return pipeStreamData(  
2872 - foreign->encp,  
2873 - foreign->file,  
2874 - *this,  
2875 - foreign->foreign_og,  
2876 - foreign->offset,  
2877 - foreign->length,  
2878 - foreign->local_dict,  
2879 - pipeline,  
2880 - suppress_warnings,  
2881 - will_retry);  
2882 -}  
2883 -  
2884 -// Throw a generic exception when we lack context for something more specific. New code should not  
2885 -// use this. This method exists to improve somewhat from calling assert in very old code.  
2886 -void  
2887 -QPDF::stopOnError(std::string const& message)  
2888 -{  
2889 - throw damagedPDF("", message);  
2890 -}  
2891 -  
2892 -// Return an exception of type qpdf_e_damaged_pdf.  
2893 -QPDFExc  
2894 -QPDF::damagedPDF(  
2895 - InputSource& input, std::string const& object, qpdf_offset_t offset, std::string const& message)  
2896 -{  
2897 - return {qpdf_e_damaged_pdf, input.getName(), object, offset, message};  
2898 -}  
2899 -  
2900 -// Return an exception of type qpdf_e_damaged_pdf. The object is taken from  
2901 -// m->last_object_description.  
2902 -QPDFExc  
2903 -QPDF::damagedPDF(InputSource& input, qpdf_offset_t offset, std::string const& message)  
2904 -{  
2905 - return damagedPDF(input, m->last_object_description, offset, message);  
2906 -}  
2907 -  
2908 -// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file.  
2909 -QPDFExc  
2910 -QPDF::damagedPDF(std::string const& object, qpdf_offset_t offset, std::string const& message)  
2911 -{  
2912 - return {qpdf_e_damaged_pdf, m->file->getName(), object, offset, message};  
2913 -}  
2914 -  
2915 -// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the  
2916 -// offset from .m->file->getLastOffset().  
2917 -QPDFExc  
2918 -QPDF::damagedPDF(std::string const& object, std::string const& message)  
2919 -{  
2920 - return damagedPDF(object, m->file->getLastOffset(), message);  
2921 -}  
2922 -  
2923 -// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the object  
2924 -// from .m->last_object_description.  
2925 -QPDFExc  
2926 -QPDF::damagedPDF(qpdf_offset_t offset, std::string const& message)  
2927 -{  
2928 - return damagedPDF(m->last_object_description, offset, message);  
2929 -}  
2930 -  
2931 -// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file, the object  
2932 -// from m->last_object_description and the offset from m->file->getLastOffset().  
2933 -QPDFExc  
2934 -QPDF::damagedPDF(std::string const& message)  
2935 -{  
2936 - return damagedPDF(m->last_object_description, m->file->getLastOffset(), message);  
2937 -}  
2938 -  
2939 -bool  
2940 -QPDF::everCalledGetAllPages() const  
2941 -{  
2942 - return m->ever_called_get_all_pages;  
2943 -}  
2944 -  
2945 -bool  
2946 -QPDF::everPushedInheritedAttributesToPages() const  
2947 -{  
2948 - return m->ever_pushed_inherited_attributes_to_pages;  
2949 -}  
2950 -  
2951 -void  
2952 -QPDF::removeSecurityRestrictions()  
2953 -{  
2954 - auto root = getRoot();  
2955 - root.removeKey("/Perms");  
2956 - auto acroform = root.getKey("/AcroForm");  
2957 - if (acroform.isDictionary() && acroform.hasKey("/SigFlags")) {  
2958 - acroform.replaceKey("/SigFlags", QPDFObjectHandle::newInteger(0));  
2959 - }  
2960 -}  
manual/release-notes.rst
@@ -21,16 +21,15 @@ more detail. @@ -21,16 +21,15 @@ more detail.
21 integer object. Previously the method returned false if the first 21 integer object. Previously the method returned false if the first
22 dictionary object was not a linearization parameter dictionary. 22 dictionary object was not a linearization parameter dictionary.
23 23
24 -.. _r12-0-0:  
25 -  
26 -12.0.1: not yet released  
27 - - Other enhancements 24 + - Other enhancements
28 25
29 - - There have been further enhancements to how files with damaged xref  
30 - tables are recovered. 26 + - There have been further enhancements to how files with damaged xref
  27 + tables are recovered.
31 28
32 .. cSpell:ignore substract 29 .. cSpell:ignore substract
33 30
  31 +.. _r12-0-0:
  32 +
34 12.0.0: March 9, 2025 33 12.0.0: March 9, 2025
35 - API breaking changes 34 - API breaking changes
36 35