Commit a51a139615ef59e79d586f21bae0098d6025e7a4

Authored by m-holger
1 parent 2c211a51

Split QPDF.cc into QPDF.cc and QPDF_objects.cc

Move methods responsible for loading or keeping track of objects to
QPDF_objects.cc.

The split was part of the reverted #1297. Reintroducing it now makes it
easier/safer to keep the work to refactor the xref and object tables in
sync with main.
libqpdf/CMakeLists.txt
... ... @@ -54,6 +54,7 @@ set(libqpdf_SOURCES
54 54 Pl_StdioFile.cc
55 55 Pl_String.cc
56 56 Pl_TIFFPredictor.cc
  57 + QPDF.cc
57 58 QPDFAcroFormDocumentHelper.cc
58 59 QPDFAnnotationObjectHelper.cc
59 60 QPDFArgParser.cc
... ...
libqpdf/QPDF.cc 0 → 100644
  1 +#include <qpdf/qpdf-config.h> // include first for large file support
  2 +
  3 +#include <qpdf/QPDF_private.hh>
  4 +
  5 +#include <array>
  6 +#include <atomic>
  7 +#include <cstring>
  8 +#include <limits>
  9 +#include <map>
  10 +#include <regex>
  11 +#include <sstream>
  12 +#include <vector>
  13 +
  14 +#include <qpdf/BufferInputSource.hh>
  15 +#include <qpdf/FileInputSource.hh>
  16 +#include <qpdf/InputSource_private.hh>
  17 +#include <qpdf/OffsetInputSource.hh>
  18 +#include <qpdf/Pipeline.hh>
  19 +#include <qpdf/QPDFExc.hh>
  20 +#include <qpdf/QPDFLogger.hh>
  21 +#include <qpdf/QPDFObjectHandle_private.hh>
  22 +#include <qpdf/QPDFObject_private.hh>
  23 +#include <qpdf/QPDFParser.hh>
  24 +#include <qpdf/QTC.hh>
  25 +#include <qpdf/QUtil.hh>
  26 +#include <qpdf/Util.hh>
  27 +
  28 +using namespace qpdf;
  29 +using namespace std::literals;
  30 +
  31 +// This must be a fixed value. This API returns a const reference to it, and the C API relies on its
  32 +// being static as well.
  33 +std::string const QPDF::qpdf_version(QPDF_VERSION);
  34 +
  35 +static char const* EMPTY_PDF = (
  36 + // force line break
  37 + "%PDF-1.3\n"
  38 + "1 0 obj\n"
  39 + "<< /Type /Catalog /Pages 2 0 R >>\n"
  40 + "endobj\n"
  41 + "2 0 obj\n"
  42 + "<< /Type /Pages /Kids [] /Count 0 >>\n"
  43 + "endobj\n"
  44 + "xref\n"
  45 + "0 3\n"
  46 + "0000000000 65535 f \n"
  47 + "0000000009 00000 n \n"
  48 + "0000000058 00000 n \n"
  49 + "trailer << /Size 3 /Root 1 0 R >>\n"
  50 + "startxref\n"
  51 + "110\n"
  52 + "%%EOF\n");
  53 +
  54 +namespace
  55 +{
  56 + class InvalidInputSource: public InputSource
  57 + {
  58 + public:
  59 + ~InvalidInputSource() override = default;
  60 + qpdf_offset_t
  61 + findAndSkipNextEOL() override
  62 + {
  63 + throwException();
  64 + return 0;
  65 + }
  66 + std::string const&
  67 + getName() const override
  68 + {
  69 + static std::string name("closed input source");
  70 + return name;
  71 + }
  72 + qpdf_offset_t
  73 + tell() override
  74 + {
  75 + throwException();
  76 + return 0;
  77 + }
  78 + void
  79 + seek(qpdf_offset_t offset, int whence) override
  80 + {
  81 + throwException();
  82 + }
  83 + void
  84 + rewind() override
  85 + {
  86 + throwException();
  87 + }
  88 + size_t
  89 + read(char* buffer, size_t length) override
  90 + {
  91 + throwException();
  92 + return 0;
  93 + }
  94 + void
  95 + unreadCh(char ch) override
  96 + {
  97 + throwException();
  98 + }
  99 +
  100 + private:
  101 + void
  102 + throwException()
  103 + {
  104 + throw std::logic_error(
  105 + "QPDF operation attempted on a QPDF object with no input "
  106 + "source. QPDF operations are invalid before processFile (or "
  107 + "another process method) or after closeInputSource");
  108 + }
  109 + };
  110 +} // namespace
  111 +
  112 +QPDF::ForeignStreamData::ForeignStreamData(
  113 + std::shared_ptr<EncryptionParameters> encp,
  114 + std::shared_ptr<InputSource> file,
  115 + QPDFObjGen foreign_og,
  116 + qpdf_offset_t offset,
  117 + size_t length,
  118 + QPDFObjectHandle local_dict) :
  119 + encp(encp),
  120 + file(file),
  121 + foreign_og(foreign_og),
  122 + offset(offset),
  123 + length(length),
  124 + local_dict(local_dict)
  125 +{
  126 +}
  127 +
  128 +QPDF::CopiedStreamDataProvider::CopiedStreamDataProvider(QPDF& destination_qpdf) :
  129 + QPDFObjectHandle::StreamDataProvider(true),
  130 + destination_qpdf(destination_qpdf)
  131 +{
  132 +}
  133 +
  134 +bool
  135 +QPDF::CopiedStreamDataProvider::provideStreamData(
  136 + QPDFObjGen const& og, Pipeline* pipeline, bool suppress_warnings, bool will_retry)
  137 +{
  138 + std::shared_ptr<ForeignStreamData> foreign_data = foreign_stream_data[og];
  139 + bool result = false;
  140 + if (foreign_data.get()) {
  141 + result = destination_qpdf.pipeForeignStreamData(
  142 + foreign_data, pipeline, suppress_warnings, will_retry);
  143 + QTC::TC("qpdf", "QPDF copy foreign with data", result ? 0 : 1);
  144 + } else {
  145 + auto foreign_stream = foreign_streams[og];
  146 + result = foreign_stream.pipeStreamData(
  147 + pipeline, nullptr, 0, qpdf_dl_none, suppress_warnings, will_retry);
  148 + QTC::TC("qpdf", "QPDF copy foreign with foreign_stream", result ? 0 : 1);
  149 + }
  150 + return result;
  151 +}
  152 +
  153 +void
  154 +QPDF::CopiedStreamDataProvider::registerForeignStream(
  155 + QPDFObjGen const& local_og, QPDFObjectHandle foreign_stream)
  156 +{
  157 + this->foreign_streams[local_og] = foreign_stream;
  158 +}
  159 +
  160 +void
  161 +QPDF::CopiedStreamDataProvider::registerForeignStream(
  162 + QPDFObjGen const& local_og, std::shared_ptr<ForeignStreamData> foreign_stream)
  163 +{
  164 + this->foreign_stream_data[local_og] = foreign_stream;
  165 +}
  166 +
  167 +QPDF::StringDecrypter::StringDecrypter(QPDF* qpdf, QPDFObjGen og) :
  168 + qpdf(qpdf),
  169 + og(og)
  170 +{
  171 +}
  172 +
  173 +std::string const&
  174 +QPDF::QPDFVersion()
  175 +{
  176 + // The C API relies on this being a static value.
  177 + return QPDF::qpdf_version;
  178 +}
  179 +
  180 +QPDF::Members::Members() :
  181 + log(QPDFLogger::defaultLogger()),
  182 + file(new InvalidInputSource()),
  183 + encp(new EncryptionParameters)
  184 +{
  185 +}
  186 +
  187 +QPDF::QPDF() :
  188 + m(std::make_unique<Members>())
  189 +{
  190 + m->tokenizer.allowEOF();
  191 + // Generate a unique ID. It just has to be unique among all QPDF objects allocated throughout
  192 + // the lifetime of this running application.
  193 + static std::atomic<unsigned long long> unique_id{0};
  194 + m->unique_id = unique_id.fetch_add(1ULL);
  195 +}
  196 +
  197 +// Provide access to disconnect(). Disconnect will in due course be merged into the current ObjCache
  198 +// (future Objects::Entry) to centralize all QPDF access to QPDFObject.
  199 +class Disconnect: BaseHandle
  200 +{
  201 + public:
  202 + Disconnect(std::shared_ptr<QPDFObject> const& obj) :
  203 + BaseHandle(obj)
  204 + {
  205 + }
  206 + void
  207 + disconnect()
  208 + {
  209 + BaseHandle::disconnect(false);
  210 + if (raw_type_code() != ::ot_null) {
  211 + obj->value = QPDF_Destroyed();
  212 + }
  213 + }
  214 +};
  215 +
  216 +QPDF::~QPDF()
  217 +{
  218 + // If two objects are mutually referential (through each object having an array or dictionary
  219 + // that contains an indirect reference to the other), the circular references in the
  220 + // std::shared_ptr objects will prevent the objects from being deleted. Walk through all objects
  221 + // in the object cache, which is those objects that we read from the file, and break all
  222 + // resolved indirect references by replacing them with an internal object type representing that
  223 + // they have been destroyed. Note that we can't break references like this at any time when the
  224 + // QPDF object is active. The call to reset also causes all direct QPDFObjectHandle objects that
  225 + // are reachable from this object to release their association with this QPDF. Direct objects
  226 + // are not destroyed since they can be moved to other QPDF objects safely.
  227 +
  228 + // At this point, obviously no one is still using the QPDF object, but we'll explicitly clear
  229 + // the xref table anyway just to prevent any possibility of resolve() succeeding.
  230 + m->xref_table.clear();
  231 + for (auto const& iter: m->obj_cache) {
  232 + Disconnect(iter.second.object).disconnect();
  233 + }
  234 +}
  235 +
  236 +std::shared_ptr<QPDF>
  237 +QPDF::create()
  238 +{
  239 + return std::make_shared<QPDF>();
  240 +}
  241 +
  242 +void
  243 +QPDF::processFile(char const* filename, char const* password)
  244 +{
  245 + auto* fi = new FileInputSource(filename);
  246 + processInputSource(std::shared_ptr<InputSource>(fi), password);
  247 +}
  248 +
  249 +void
  250 +QPDF::processFile(char const* description, FILE* filep, bool close_file, char const* password)
  251 +{
  252 + auto* fi = new FileInputSource(description, filep, close_file);
  253 + processInputSource(std::shared_ptr<InputSource>(fi), password);
  254 +}
  255 +
  256 +void
  257 +QPDF::processMemoryFile(
  258 + char const* description, char const* buf, size_t length, char const* password)
  259 +{
  260 + processInputSource(
  261 + std::shared_ptr<InputSource>(
  262 + // line-break
  263 + new BufferInputSource(
  264 + description, new Buffer(QUtil::unsigned_char_pointer(buf), length), true)),
  265 + password);
  266 +}
  267 +
  268 +void
  269 +QPDF::processInputSource(std::shared_ptr<InputSource> source, char const* password)
  270 +{
  271 + m->file = source;
  272 + parse(password);
  273 +}
  274 +
  275 +void
  276 +QPDF::closeInputSource()
  277 +{
  278 + m->file = std::shared_ptr<InputSource>(new InvalidInputSource());
  279 +}
  280 +
  281 +void
  282 +QPDF::setPasswordIsHexKey(bool val)
  283 +{
  284 + m->provided_password_is_hex_key = val;
  285 +}
  286 +
  287 +void
  288 +QPDF::emptyPDF()
  289 +{
  290 + processMemoryFile("empty PDF", EMPTY_PDF, strlen(EMPTY_PDF));
  291 +}
  292 +
  293 +void
  294 +QPDF::registerStreamFilter(
  295 + std::string const& filter_name, std::function<std::shared_ptr<QPDFStreamFilter>()> factory)
  296 +{
  297 + qpdf::Stream::registerStreamFilter(filter_name, factory);
  298 +}
  299 +
  300 +void
  301 +QPDF::setIgnoreXRefStreams(bool val)
  302 +{
  303 + m->ignore_xref_streams = val;
  304 +}
  305 +
  306 +std::shared_ptr<QPDFLogger>
  307 +QPDF::getLogger()
  308 +{
  309 + return m->log;
  310 +}
  311 +
  312 +void
  313 +QPDF::setLogger(std::shared_ptr<QPDFLogger> l)
  314 +{
  315 + m->log = l;
  316 +}
  317 +
  318 +void
  319 +QPDF::setOutputStreams(std::ostream* out, std::ostream* err)
  320 +{
  321 + setLogger(QPDFLogger::create());
  322 + m->log->setOutputStreams(out, err);
  323 +}
  324 +
  325 +void
  326 +QPDF::setSuppressWarnings(bool val)
  327 +{
  328 + m->suppress_warnings = val;
  329 +}
  330 +
  331 +void
  332 +QPDF::setMaxWarnings(size_t val)
  333 +{
  334 + m->max_warnings = val;
  335 +}
  336 +
  337 +void
  338 +QPDF::setAttemptRecovery(bool val)
  339 +{
  340 + m->attempt_recovery = val;
  341 +}
  342 +
  343 +void
  344 +QPDF::setImmediateCopyFrom(bool val)
  345 +{
  346 + m->immediate_copy_from = val;
  347 +}
  348 +
  349 +std::vector<QPDFExc>
  350 +QPDF::getWarnings()
  351 +{
  352 + std::vector<QPDFExc> result = m->warnings;
  353 + m->warnings.clear();
  354 + return result;
  355 +}
  356 +
  357 +bool
  358 +QPDF::anyWarnings() const
  359 +{
  360 + return !m->warnings.empty();
  361 +}
  362 +
  363 +size_t
  364 +QPDF::numWarnings() const
  365 +{
  366 + return m->warnings.size();
  367 +}
  368 +
  369 +bool
  370 +QPDF::validatePDFVersion(char const*& p, std::string& version)
  371 +{
  372 + bool valid = util::is_digit(*p);
  373 + if (valid) {
  374 + while (util::is_digit(*p)) {
  375 + version.append(1, *p++);
  376 + }
  377 + if ((*p == '.') && util::is_digit(*(p + 1))) {
  378 + version.append(1, *p++);
  379 + while (util::is_digit(*p)) {
  380 + version.append(1, *p++);
  381 + }
  382 + } else {
  383 + valid = false;
  384 + }
  385 + }
  386 + return valid;
  387 +}
  388 +
  389 +bool
  390 +QPDF::findHeader()
  391 +{
  392 + qpdf_offset_t global_offset = m->file->tell();
  393 + std::string line = m->file->readLine(1024);
  394 + char const* p = line.c_str();
  395 + if (strncmp(p, "%PDF-", 5) != 0) {
  396 + throw std::logic_error("findHeader is not looking at %PDF-");
  397 + }
  398 + p += 5;
  399 + std::string version;
  400 + // Note: The string returned by line.c_str() is always null-terminated. The code below never
  401 + // overruns the buffer because a null character always short-circuits further advancement.
  402 + bool valid = validatePDFVersion(p, version);
  403 + if (valid) {
  404 + m->pdf_version = version;
  405 + if (global_offset != 0) {
  406 + // Empirical evidence strongly suggests that when there is leading material prior to the
  407 + // PDF header, all explicit offsets in the file are such that 0 points to the beginning
  408 + // of the header.
  409 + QTC::TC("qpdf", "QPDF global offset");
  410 + m->file = std::shared_ptr<InputSource>(new OffsetInputSource(m->file, global_offset));
  411 + }
  412 + }
  413 + return valid;
  414 +}
  415 +
  416 +void
  417 +QPDF::warn(QPDFExc const& e)
  418 +{
  419 + if (m->max_warnings > 0 && m->warnings.size() >= m->max_warnings) {
  420 + stopOnError("Too many warnings - file is too badly damaged");
  421 + }
  422 + m->warnings.push_back(e);
  423 + if (!m->suppress_warnings) {
  424 + *m->log->getWarn() << "WARNING: " << m->warnings.back().what() << "\n";
  425 + }
  426 +}
  427 +
  428 +void
  429 +QPDF::warn(
  430 + qpdf_error_code_e error_code,
  431 + std::string const& object,
  432 + qpdf_offset_t offset,
  433 + std::string const& message)
  434 +{
  435 + warn(QPDFExc(error_code, getFilename(), object, offset, message));
  436 +}
  437 +
  438 +QPDFObjectHandle
  439 +QPDF::newReserved()
  440 +{
  441 + return makeIndirectFromQPDFObject(QPDFObject::create<QPDF_Reserved>());
  442 +}
  443 +
  444 +QPDFObjectHandle
  445 +QPDF::newIndirectNull()
  446 +{
  447 + return makeIndirectFromQPDFObject(QPDFObject::create<QPDF_Null>());
  448 +}
  449 +
  450 +QPDFObjectHandle
  451 +QPDF::newStream()
  452 +{
  453 + return makeIndirectObject(
  454 + qpdf::Stream(*this, nextObjGen(), QPDFObjectHandle::newDictionary(), 0, 0));
  455 +}
  456 +
  457 +QPDFObjectHandle
  458 +QPDF::newStream(std::shared_ptr<Buffer> data)
  459 +{
  460 + auto result = newStream();
  461 + result.replaceStreamData(data, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
  462 + return result;
  463 +}
  464 +
  465 +QPDFObjectHandle
  466 +QPDF::newStream(std::string const& data)
  467 +{
  468 + auto result = newStream();
  469 + result.replaceStreamData(data, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
  470 + return result;
  471 +}
  472 +
  473 +QPDFObjectHandle
  474 +QPDF::getObject(int objid, int generation)
  475 +{
  476 + return getObject(QPDFObjGen(objid, generation));
  477 +}
  478 +
  479 +QPDFObjectHandle
  480 +QPDF::getObjectByObjGen(QPDFObjGen og)
  481 +{
  482 + return getObject(og);
  483 +}
  484 +
  485 +QPDFObjectHandle
  486 +QPDF::getObjectByID(int objid, int generation)
  487 +{
  488 + return getObject(QPDFObjGen(objid, generation));
  489 +}
  490 +
  491 +QPDFObjectHandle
  492 +QPDF::copyForeignObject(QPDFObjectHandle foreign)
  493 +{
  494 + // Here's an explanation of what's going on here.
  495 + //
  496 + // A QPDFObjectHandle that is an indirect object has an owning QPDF. The object ID and
  497 + // generation refers to an object in the owning QPDF. When we copy the QPDFObjectHandle from a
  498 + // foreign QPDF into the local QPDF, we have to replace all indirect object references with
  499 + // references to the corresponding object in the local file.
  500 + //
  501 + // To do this, we maintain mappings from foreign object IDs to local object IDs for each foreign
  502 + // QPDF that we are copying from. The mapping is stored in an ObjCopier, which contains a
  503 + // mapping from the foreign ObjGen to the local QPDFObjectHandle.
  504 + //
  505 + // To copy, we do a deep traversal of the foreign object with loop detection to discover all
  506 + // indirect objects that are encountered, stopping at page boundaries. Whenever we encounter an
  507 + // indirect object, we check to see if we have already created a local copy of it. If not, we
  508 + // allocate a "reserved" object (or, for a stream, just a new stream) and store in the map the
  509 + // mapping from the foreign object ID to the new object. While we
  510 + // do this, we keep a list of objects to copy.
  511 + //
  512 + // Once we are done with the traversal, we copy all the objects that we need to copy. However,
  513 + // the copies will contain indirect object IDs that refer to objects in the foreign file. We
  514 + // need to replace them with references to objects in the local file. This is what
  515 + // replaceForeignIndirectObjects does. Once we have created a copy of the foreign object with
  516 + // all the indirect references replaced with new ones in the local context, we can replace the
  517 + // local reserved object with the copy. This mechanism allows us to copy objects with circular
  518 + // references in any order.
  519 +
  520 + // For streams, rather than copying the objects, we set up the stream data to pull from the
  521 + // original stream by using a stream data provider. This is done in a manner that doesn't
  522 + // require the original QPDF object but may require the original source of the stream data with
  523 + // special handling for immediate_copy_from. This logic is also in
  524 + // replaceForeignIndirectObjects.
  525 +
  526 + // Note that we explicitly allow use of copyForeignObject on page objects. It is a documented
  527 + // use case to copy pages this way if the intention is to not update the pages tree.
  528 + if (!foreign.isIndirect()) {
  529 + QTC::TC("qpdf", "QPDF copyForeign direct");
  530 + throw std::logic_error("QPDF::copyForeign called with direct object handle");
  531 + }
  532 + QPDF& other = foreign.getQPDF();
  533 + if (&other == this) {
  534 + QTC::TC("qpdf", "QPDF copyForeign not foreign");
  535 + throw std::logic_error("QPDF::copyForeign called with object from this QPDF");
  536 + }
  537 +
  538 + ObjCopier& obj_copier = m->object_copiers[other.m->unique_id];
  539 + if (!obj_copier.visiting.empty()) {
  540 + throw std::logic_error(
  541 + "obj_copier.visiting is not empty at the beginning of copyForeignObject");
  542 + }
  543 +
  544 + // Make sure we have an object in this file for every referenced object in the old file.
  545 + // obj_copier.object_map maps foreign QPDFObjGen to local objects. For everything new that we
  546 + // have to copy, the local object will be a reservation, unless it is a stream, in which case
  547 + // the local object will already be a stream.
  548 + reserveObjects(foreign, obj_copier, true);
  549 +
  550 + if (!obj_copier.visiting.empty()) {
  551 + throw std::logic_error("obj_copier.visiting is not empty after reserving objects");
  552 + }
  553 +
  554 + // Copy any new objects and replace the reservations.
  555 + for (auto& to_copy: obj_copier.to_copy) {
  556 + QPDFObjectHandle copy = replaceForeignIndirectObjects(to_copy, obj_copier, true);
  557 + if (!to_copy.isStream()) {
  558 + QPDFObjGen og(to_copy.getObjGen());
  559 + replaceReserved(obj_copier.object_map[og], copy);
  560 + }
  561 + }
  562 + obj_copier.to_copy.clear();
  563 +
  564 + auto og = foreign.getObjGen();
  565 + if (!obj_copier.object_map.count(og)) {
  566 + warn(damagedPDF(
  567 + other.getFilename() + " object " + og.unparse(' '),
  568 + foreign.getParsedOffset(),
  569 + "unexpected reference to /Pages object while copying foreign object; replacing with "
  570 + "null"));
  571 + return QPDFObjectHandle::newNull();
  572 + }
  573 + return obj_copier.object_map[foreign.getObjGen()];
  574 +}
  575 +
  576 +void
  577 +QPDF::reserveObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top)
  578 +{
  579 + auto foreign_tc = foreign.getTypeCode();
  580 + if (foreign_tc == ::ot_reserved) {
  581 + throw std::logic_error("QPDF: attempting to copy a foreign reserved object");
  582 + }
  583 +
  584 + if (foreign.isPagesObject()) {
  585 + QTC::TC("qpdf", "QPDF not copying pages object");
  586 + return;
  587 + }
  588 +
  589 + if (foreign.isIndirect()) {
  590 + QPDFObjGen foreign_og(foreign.getObjGen());
  591 + if (!obj_copier.visiting.add(foreign_og)) {
  592 + QTC::TC("qpdf", "QPDF loop reserving objects");
  593 + return;
  594 + }
  595 + if (obj_copier.object_map.count(foreign_og) > 0) {
  596 + QTC::TC("qpdf", "QPDF already reserved object");
  597 + if (!(top && foreign.isPageObject() && obj_copier.object_map[foreign_og].isNull())) {
  598 + obj_copier.visiting.erase(foreign);
  599 + return;
  600 + }
  601 + } else {
  602 + QTC::TC("qpdf", "QPDF copy indirect");
  603 + obj_copier.object_map[foreign_og] =
  604 + foreign.isStream() ? newStream() : newIndirectNull();
  605 + if ((!top) && foreign.isPageObject()) {
  606 + QTC::TC("qpdf", "QPDF not crossing page boundary");
  607 + obj_copier.visiting.erase(foreign_og);
  608 + return;
  609 + }
  610 + }
  611 + obj_copier.to_copy.push_back(foreign);
  612 + }
  613 +
  614 + if (foreign_tc == ::ot_array) {
  615 + QTC::TC("qpdf", "QPDF reserve array");
  616 + for (auto const& item: foreign.as_array()) {
  617 + reserveObjects(item, obj_copier, false);
  618 + }
  619 + } else if (foreign_tc == ::ot_dictionary) {
  620 + QTC::TC("qpdf", "QPDF reserve dictionary");
  621 + for (auto const& item: foreign.as_dictionary()) {
  622 + if (!item.second.null()) {
  623 + reserveObjects(item.second, obj_copier, false);
  624 + }
  625 + }
  626 + } else if (foreign_tc == ::ot_stream) {
  627 + QTC::TC("qpdf", "QPDF reserve stream");
  628 + reserveObjects(foreign.getDict(), obj_copier, false);
  629 + }
  630 +
  631 + obj_copier.visiting.erase(foreign);
  632 +}
  633 +
  634 +QPDFObjectHandle
  635 +QPDF::replaceForeignIndirectObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top)
  636 +{
  637 + auto foreign_tc = foreign.getTypeCode();
  638 + QPDFObjectHandle result;
  639 + if ((!top) && foreign.isIndirect()) {
  640 + QTC::TC("qpdf", "QPDF replace indirect");
  641 + auto mapping = obj_copier.object_map.find(foreign.getObjGen());
  642 + if (mapping == obj_copier.object_map.end()) {
  643 + // This case would occur if this is a reference to a Pages object that we didn't
  644 + // traverse into.
  645 + QTC::TC("qpdf", "QPDF replace foreign indirect with null");
  646 + result = QPDFObjectHandle::newNull();
  647 + } else {
  648 + result = mapping->second;
  649 + }
  650 + } else if (foreign_tc == ::ot_array) {
  651 + QTC::TC("qpdf", "QPDF replace array");
  652 + result = QPDFObjectHandle::newArray();
  653 + for (auto const& item: foreign.as_array()) {
  654 + result.appendItem(replaceForeignIndirectObjects(item, obj_copier, false));
  655 + }
  656 + } else if (foreign_tc == ::ot_dictionary) {
  657 + QTC::TC("qpdf", "QPDF replace dictionary");
  658 + result = QPDFObjectHandle::newDictionary();
  659 + for (auto const& [key, value]: foreign.as_dictionary()) {
  660 + if (!value.null()) {
  661 + result.replaceKey(key, replaceForeignIndirectObjects(value, obj_copier, false));
  662 + }
  663 + }
  664 + } else if (foreign_tc == ::ot_stream) {
  665 + QTC::TC("qpdf", "QPDF replace stream");
  666 + result = obj_copier.object_map[foreign.getObjGen()];
  667 + QPDFObjectHandle dict = result.getDict();
  668 + QPDFObjectHandle old_dict = foreign.getDict();
  669 + for (auto const& [key, value]: old_dict.as_dictionary()) {
  670 + if (!value.null()) {
  671 + dict.replaceKey(key, replaceForeignIndirectObjects(value, obj_copier, false));
  672 + }
  673 + }
  674 + copyStreamData(result, foreign);
  675 + } else {
  676 + foreign.assertScalar();
  677 + result = foreign;
  678 + result.makeDirect();
  679 + }
  680 +
  681 + if (top && (!result.isStream()) && result.isIndirect()) {
  682 + throw std::logic_error("replacement for foreign object is indirect");
  683 + }
  684 +
  685 + return result;
  686 +}
  687 +
  688 +void
  689 +QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign)
  690 +{
  691 + // This method was originally written for copying foreign streams, but it is used by
  692 + // QPDFObjectHandle to copy streams from the same QPDF object as well.
  693 +
  694 + QPDFObjectHandle dict = result.getDict();
  695 + QPDFObjectHandle old_dict = foreign.getDict();
  696 + if (m->copied_stream_data_provider == nullptr) {
  697 + m->copied_stream_data_provider = new CopiedStreamDataProvider(*this);
  698 + m->copied_streams =
  699 + std::shared_ptr<QPDFObjectHandle::StreamDataProvider>(m->copied_stream_data_provider);
  700 + }
  701 + QPDFObjGen local_og(result.getObjGen());
  702 + // Copy information from the foreign stream so we can pipe its data later without keeping the
  703 + // original QPDF object around.
  704 +
  705 + QPDF& foreign_stream_qpdf =
  706 + foreign.getQPDF("unable to retrieve owning qpdf from foreign stream");
  707 +
  708 + auto stream = foreign.as_stream();
  709 + if (!stream) {
  710 + throw std::logic_error("unable to retrieve underlying stream object from foreign stream");
  711 + }
  712 + std::shared_ptr<Buffer> stream_buffer = stream.getStreamDataBuffer();
  713 + if ((foreign_stream_qpdf.m->immediate_copy_from) && (stream_buffer == nullptr)) {
  714 + // Pull the stream data into a buffer before attempting the copy operation. Do it on the
  715 + // source stream so that if the source stream is copied multiple times, we don't have to
  716 + // keep duplicating the memory.
  717 + QTC::TC("qpdf", "QPDF immediate copy stream data");
  718 + foreign.replaceStreamData(
  719 + foreign.getRawStreamData(),
  720 + old_dict.getKey("/Filter"),
  721 + old_dict.getKey("/DecodeParms"));
  722 + stream_buffer = stream.getStreamDataBuffer();
  723 + }
  724 + std::shared_ptr<QPDFObjectHandle::StreamDataProvider> stream_provider =
  725 + stream.getStreamDataProvider();
  726 + if (stream_buffer.get()) {
  727 + QTC::TC("qpdf", "QPDF copy foreign stream with buffer");
  728 + result.replaceStreamData(
  729 + stream_buffer, dict.getKey("/Filter"), dict.getKey("/DecodeParms"));
  730 + } else if (stream_provider.get()) {
  731 + // In this case, the remote stream's QPDF must stay in scope.
  732 + QTC::TC("qpdf", "QPDF copy foreign stream with provider");
  733 + m->copied_stream_data_provider->registerForeignStream(local_og, foreign);
  734 + result.replaceStreamData(
  735 + m->copied_streams, dict.getKey("/Filter"), dict.getKey("/DecodeParms"));
  736 + } else {
  737 + auto foreign_stream_data = std::make_shared<ForeignStreamData>(
  738 + foreign_stream_qpdf.m->encp,
  739 + foreign_stream_qpdf.m->file,
  740 + foreign,
  741 + foreign.getParsedOffset(),
  742 + stream.getLength(),
  743 + dict);
  744 + m->copied_stream_data_provider->registerForeignStream(local_og, foreign_stream_data);
  745 + result.replaceStreamData(
  746 + m->copied_streams, dict.getKey("/Filter"), dict.getKey("/DecodeParms"));
  747 + }
  748 +}
  749 +
  750 +unsigned long long
  751 +QPDF::getUniqueId() const
  752 +{
  753 + return m->unique_id;
  754 +}
  755 +
  756 +std::string
  757 +QPDF::getFilename() const
  758 +{
  759 + return m->file->getName();
  760 +}
  761 +
  762 +PDFVersion
  763 +QPDF::getVersionAsPDFVersion()
  764 +{
  765 + int major = 1;
  766 + int minor = 3;
  767 + int extension_level = getExtensionLevel();
  768 +
  769 + std::regex v("^[[:space:]]*([0-9]+)\\.([0-9]+)");
  770 + std::smatch match;
  771 + if (std::regex_search(m->pdf_version, match, v)) {
  772 + major = QUtil::string_to_int(match[1].str().c_str());
  773 + minor = QUtil::string_to_int(match[2].str().c_str());
  774 + }
  775 +
  776 + return {major, minor, extension_level};
  777 +}
  778 +
  779 +std::string
  780 +QPDF::getPDFVersion() const
  781 +{
  782 + return m->pdf_version;
  783 +}
  784 +
  785 +int
  786 +QPDF::getExtensionLevel()
  787 +{
  788 + int result = 0;
  789 + QPDFObjectHandle obj = getRoot();
  790 + if (obj.hasKey("/Extensions")) {
  791 + obj = obj.getKey("/Extensions");
  792 + if (obj.isDictionary() && obj.hasKey("/ADBE")) {
  793 + obj = obj.getKey("/ADBE");
  794 + if (obj.isDictionary() && obj.hasKey("/ExtensionLevel")) {
  795 + obj = obj.getKey("/ExtensionLevel");
  796 + if (obj.isInteger()) {
  797 + result = obj.getIntValueAsInt();
  798 + }
  799 + }
  800 + }
  801 + }
  802 + return result;
  803 +}
  804 +
  805 +QPDFObjectHandle
  806 +QPDF::getTrailer()
  807 +{
  808 + return m->trailer;
  809 +}
  810 +
  811 +QPDFObjectHandle
  812 +QPDF::getRoot()
  813 +{
  814 + QPDFObjectHandle root = m->trailer.getKey("/Root");
  815 + if (!root.isDictionary()) {
  816 + throw damagedPDF("", 0, "unable to find /Root dictionary");
  817 + } else if (
  818 + // Check_mode is an interim solution to request #810 pending a more comprehensive review of
  819 + // the approach to more extensive checks and warning levels.
  820 + m->check_mode && !root.getKey("/Type").isNameAndEquals("/Catalog")) {
  821 + warn(damagedPDF("", 0, "catalog /Type entry missing or invalid"));
  822 + root.replaceKey("/Type", "/Catalog"_qpdf);
  823 + }
  824 + return root;
  825 +}
  826 +
  827 +std::map<QPDFObjGen, QPDFXRefEntry>
  828 +QPDF::getXRefTable()
  829 +{
  830 + return getXRefTableInternal();
  831 +}
  832 +
  833 +std::map<QPDFObjGen, QPDFXRefEntry> const&
  834 +QPDF::getXRefTableInternal()
  835 +{
  836 + if (!m->parsed) {
  837 + throw std::logic_error("QPDF::getXRefTable called before parsing.");
  838 + }
  839 +
  840 + return m->xref_table;
  841 +}
  842 +
  843 +bool
  844 +QPDF::pipeStreamData(
  845 + std::shared_ptr<EncryptionParameters> encp,
  846 + std::shared_ptr<InputSource> file,
  847 + QPDF& qpdf_for_warning,
  848 + QPDFObjGen og,
  849 + qpdf_offset_t offset,
  850 + size_t length,
  851 + QPDFObjectHandle stream_dict,
  852 + Pipeline* pipeline,
  853 + bool suppress_warnings,
  854 + bool will_retry)
  855 +{
  856 + std::unique_ptr<Pipeline> to_delete;
  857 + if (encp->encrypted) {
  858 + decryptStream(encp, file, qpdf_for_warning, pipeline, og, stream_dict, to_delete);
  859 + }
  860 +
  861 + bool attempted_finish = false;
  862 + try {
  863 + auto buf = file->read(length, offset);
  864 + if (buf.size() != length) {
  865 + throw damagedPDF(
  866 + *file, "", offset + toO(buf.size()), "unexpected EOF reading stream data");
  867 + }
  868 + pipeline->write(buf.data(), length);
  869 + attempted_finish = true;
  870 + pipeline->finish();
  871 + return true;
  872 + } catch (QPDFExc& e) {
  873 + if (!suppress_warnings) {
  874 + qpdf_for_warning.warn(e);
  875 + }
  876 + } catch (std::exception& e) {
  877 + if (!suppress_warnings) {
  878 + QTC::TC("qpdf", "QPDF decoding error warning");
  879 + qpdf_for_warning.warn(
  880 + // line-break
  881 + damagedPDF(
  882 + *file,
  883 + "",
  884 + file->getLastOffset(),
  885 + ("error decoding stream data for object " + og.unparse(' ') + ": " +
  886 + e.what())));
  887 + if (will_retry) {
  888 + qpdf_for_warning.warn(
  889 + // line-break
  890 + damagedPDF(
  891 + *file,
  892 + "",
  893 + file->getLastOffset(),
  894 + "stream will be re-processed without filtering to avoid data loss"));
  895 + }
  896 + }
  897 + }
  898 + if (!attempted_finish) {
  899 + try {
  900 + pipeline->finish();
  901 + } catch (std::exception&) {
  902 + // ignore
  903 + }
  904 + }
  905 + return false;
  906 +}
  907 +
  908 +bool
  909 +QPDF::pipeStreamData(
  910 + QPDFObjGen og,
  911 + qpdf_offset_t offset,
  912 + size_t length,
  913 + QPDFObjectHandle stream_dict,
  914 + Pipeline* pipeline,
  915 + bool suppress_warnings,
  916 + bool will_retry)
  917 +{
  918 + return pipeStreamData(
  919 + m->encp,
  920 + m->file,
  921 + *this,
  922 + og,
  923 + offset,
  924 + length,
  925 + stream_dict,
  926 + pipeline,
  927 + suppress_warnings,
  928 + will_retry);
  929 +}
  930 +
  931 +bool
  932 +QPDF::pipeForeignStreamData(
  933 + std::shared_ptr<ForeignStreamData> foreign,
  934 + Pipeline* pipeline,
  935 + bool suppress_warnings,
  936 + bool will_retry)
  937 +{
  938 + if (foreign->encp->encrypted) {
  939 + QTC::TC("qpdf", "QPDF pipe foreign encrypted stream");
  940 + }
  941 + return pipeStreamData(
  942 + foreign->encp,
  943 + foreign->file,
  944 + *this,
  945 + foreign->foreign_og,
  946 + foreign->offset,
  947 + foreign->length,
  948 + foreign->local_dict,
  949 + pipeline,
  950 + suppress_warnings,
  951 + will_retry);
  952 +}
  953 +
  954 +// Throw a generic exception when we lack context for something more specific. New code should not
  955 +// use this. This method exists to improve somewhat from calling assert in very old code.
  956 +void
  957 +QPDF::stopOnError(std::string const& message)
  958 +{
  959 + throw damagedPDF("", message);
  960 +}
  961 +
  962 +// Return an exception of type qpdf_e_damaged_pdf.
  963 +QPDFExc
  964 +QPDF::damagedPDF(
  965 + InputSource& input, std::string const& object, qpdf_offset_t offset, std::string const& message)
  966 +{
  967 + return {qpdf_e_damaged_pdf, input.getName(), object, offset, message};
  968 +}
  969 +
  970 +// Return an exception of type qpdf_e_damaged_pdf. The object is taken from
  971 +// m->last_object_description.
  972 +QPDFExc
  973 +QPDF::damagedPDF(InputSource& input, qpdf_offset_t offset, std::string const& message)
  974 +{
  975 + return damagedPDF(input, m->last_object_description, offset, message);
  976 +}
  977 +
  978 +// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file.
  979 +QPDFExc
  980 +QPDF::damagedPDF(std::string const& object, qpdf_offset_t offset, std::string const& message)
  981 +{
  982 + return {qpdf_e_damaged_pdf, m->file->getName(), object, offset, message};
  983 +}
  984 +
  985 +// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the
  986 +// offset from .m->file->getLastOffset().
  987 +QPDFExc
  988 +QPDF::damagedPDF(std::string const& object, std::string const& message)
  989 +{
  990 + return damagedPDF(object, m->file->getLastOffset(), message);
  991 +}
  992 +
  993 +// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the object
  994 +// from .m->last_object_description.
  995 +QPDFExc
  996 +QPDF::damagedPDF(qpdf_offset_t offset, std::string const& message)
  997 +{
  998 + return damagedPDF(m->last_object_description, offset, message);
  999 +}
  1000 +
  1001 +// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file, the object
  1002 +// from m->last_object_description and the offset from m->file->getLastOffset().
  1003 +QPDFExc
  1004 +QPDF::damagedPDF(std::string const& message)
  1005 +{
  1006 + return damagedPDF(m->last_object_description, m->file->getLastOffset(), message);
  1007 +}
  1008 +
  1009 +bool
  1010 +QPDF::everCalledGetAllPages() const
  1011 +{
  1012 + return m->ever_called_get_all_pages;
  1013 +}
  1014 +
  1015 +bool
  1016 +QPDF::everPushedInheritedAttributesToPages() const
  1017 +{
  1018 + return m->ever_pushed_inherited_attributes_to_pages;
  1019 +}
  1020 +
  1021 +void
  1022 +QPDF::removeSecurityRestrictions()
  1023 +{
  1024 + auto root = getRoot();
  1025 + root.removeKey("/Perms");
  1026 + auto acroform = root.getKey("/AcroForm");
  1027 + if (acroform.isDictionary() && acroform.hasKey("/SigFlags")) {
  1028 + acroform.replaceKey("/SigFlags", QPDFObjectHandle::newInteger(0));
  1029 + }
  1030 +}
... ...
libqpdf/QPDF_objects.cc
... ... @@ -28,29 +28,6 @@
28 28 using namespace qpdf;
29 29 using namespace std::literals;
30 30  
31   -// This must be a fixed value. This API returns a const reference to it, and the C API relies on its
32   -// being static as well.
33   -std::string const QPDF::qpdf_version(QPDF_VERSION);
34   -
35   -static char const* EMPTY_PDF = (
36   - // force line break
37   - "%PDF-1.3\n"
38   - "1 0 obj\n"
39   - "<< /Type /Catalog /Pages 2 0 R >>\n"
40   - "endobj\n"
41   - "2 0 obj\n"
42   - "<< /Type /Pages /Kids [] /Count 0 >>\n"
43   - "endobj\n"
44   - "xref\n"
45   - "0 3\n"
46   - "0000000000 65535 f \n"
47   - "0000000009 00000 n \n"
48   - "0000000058 00000 n \n"
49   - "trailer << /Size 3 /Root 1 0 R >>\n"
50   - "startxref\n"
51   - "110\n"
52   - "%%EOF\n");
53   -
54 31 namespace
55 32 {
56 33 class InvalidInputSource: public InputSource
... ... @@ -109,310 +86,6 @@ namespace
109 86 };
110 87 } // namespace
111 88  
112   -QPDF::ForeignStreamData::ForeignStreamData(
113   - std::shared_ptr<EncryptionParameters> encp,
114   - std::shared_ptr<InputSource> file,
115   - QPDFObjGen foreign_og,
116   - qpdf_offset_t offset,
117   - size_t length,
118   - QPDFObjectHandle local_dict) :
119   - encp(encp),
120   - file(file),
121   - foreign_og(foreign_og),
122   - offset(offset),
123   - length(length),
124   - local_dict(local_dict)
125   -{
126   -}
127   -
128   -QPDF::CopiedStreamDataProvider::CopiedStreamDataProvider(QPDF& destination_qpdf) :
129   - QPDFObjectHandle::StreamDataProvider(true),
130   - destination_qpdf(destination_qpdf)
131   -{
132   -}
133   -
134   -bool
135   -QPDF::CopiedStreamDataProvider::provideStreamData(
136   - QPDFObjGen const& og, Pipeline* pipeline, bool suppress_warnings, bool will_retry)
137   -{
138   - std::shared_ptr<ForeignStreamData> foreign_data = foreign_stream_data[og];
139   - bool result = false;
140   - if (foreign_data.get()) {
141   - result = destination_qpdf.pipeForeignStreamData(
142   - foreign_data, pipeline, suppress_warnings, will_retry);
143   - QTC::TC("qpdf", "QPDF copy foreign with data", result ? 0 : 1);
144   - } else {
145   - auto foreign_stream = foreign_streams[og];
146   - result = foreign_stream.pipeStreamData(
147   - pipeline, nullptr, 0, qpdf_dl_none, suppress_warnings, will_retry);
148   - QTC::TC("qpdf", "QPDF copy foreign with foreign_stream", result ? 0 : 1);
149   - }
150   - return result;
151   -}
152   -
153   -void
154   -QPDF::CopiedStreamDataProvider::registerForeignStream(
155   - QPDFObjGen const& local_og, QPDFObjectHandle foreign_stream)
156   -{
157   - this->foreign_streams[local_og] = foreign_stream;
158   -}
159   -
160   -void
161   -QPDF::CopiedStreamDataProvider::registerForeignStream(
162   - QPDFObjGen const& local_og, std::shared_ptr<ForeignStreamData> foreign_stream)
163   -{
164   - this->foreign_stream_data[local_og] = foreign_stream;
165   -}
166   -
167   -QPDF::StringDecrypter::StringDecrypter(QPDF* qpdf, QPDFObjGen og) :
168   - qpdf(qpdf),
169   - og(og)
170   -{
171   -}
172   -
173   -std::string const&
174   -QPDF::QPDFVersion()
175   -{
176   - // The C API relies on this being a static value.
177   - return QPDF::qpdf_version;
178   -}
179   -
180   -QPDF::Members::Members() :
181   - log(QPDFLogger::defaultLogger()),
182   - file(new InvalidInputSource()),
183   - encp(new EncryptionParameters)
184   -{
185   -}
186   -
187   -QPDF::QPDF() :
188   - m(std::make_unique<Members>())
189   -{
190   - m->tokenizer.allowEOF();
191   - // Generate a unique ID. It just has to be unique among all QPDF objects allocated throughout
192   - // the lifetime of this running application.
193   - static std::atomic<unsigned long long> unique_id{0};
194   - m->unique_id = unique_id.fetch_add(1ULL);
195   -}
196   -
197   -// Provide access to disconnect(). Disconnect will in due course be merged into the current ObjCache
198   -// (future Objects::Entry) to centralize all QPDF access to QPDFObject.
199   -class Disconnect: BaseHandle
200   -{
201   - public:
202   - Disconnect(std::shared_ptr<QPDFObject> const& obj) :
203   - BaseHandle(obj)
204   - {
205   - }
206   - void
207   - disconnect()
208   - {
209   - BaseHandle::disconnect(false);
210   - if (raw_type_code() != ::ot_null) {
211   - obj->value = QPDF_Destroyed();
212   - }
213   - }
214   -};
215   -
216   -QPDF::~QPDF()
217   -{
218   - // If two objects are mutually referential (through each object having an array or dictionary
219   - // that contains an indirect reference to the other), the circular references in the
220   - // std::shared_ptr objects will prevent the objects from being deleted. Walk through all objects
221   - // in the object cache, which is those objects that we read from the file, and break all
222   - // resolved indirect references by replacing them with an internal object type representing that
223   - // they have been destroyed. Note that we can't break references like this at any time when the
224   - // QPDF object is active. The call to reset also causes all direct QPDFObjectHandle objects that
225   - // are reachable from this object to release their association with this QPDF. Direct objects
226   - // are not destroyed since they can be moved to other QPDF objects safely.
227   -
228   - // At this point, obviously no one is still using the QPDF object, but we'll explicitly clear
229   - // the xref table anyway just to prevent any possibility of resolve() succeeding.
230   - m->xref_table.clear();
231   - for (auto const& iter: m->obj_cache) {
232   - Disconnect(iter.second.object).disconnect();
233   - }
234   -}
235   -
236   -std::shared_ptr<QPDF>
237   -QPDF::create()
238   -{
239   - return std::make_shared<QPDF>();
240   -}
241   -
242   -void
243   -QPDF::processFile(char const* filename, char const* password)
244   -{
245   - auto* fi = new FileInputSource(filename);
246   - processInputSource(std::shared_ptr<InputSource>(fi), password);
247   -}
248   -
249   -void
250   -QPDF::processFile(char const* description, FILE* filep, bool close_file, char const* password)
251   -{
252   - auto* fi = new FileInputSource(description, filep, close_file);
253   - processInputSource(std::shared_ptr<InputSource>(fi), password);
254   -}
255   -
256   -void
257   -QPDF::processMemoryFile(
258   - char const* description, char const* buf, size_t length, char const* password)
259   -{
260   - processInputSource(
261   - std::shared_ptr<InputSource>(
262   - // line-break
263   - new BufferInputSource(
264   - description, new Buffer(QUtil::unsigned_char_pointer(buf), length), true)),
265   - password);
266   -}
267   -
268   -void
269   -QPDF::processInputSource(std::shared_ptr<InputSource> source, char const* password)
270   -{
271   - m->file = source;
272   - parse(password);
273   -}
274   -
275   -void
276   -QPDF::closeInputSource()
277   -{
278   - m->file = std::shared_ptr<InputSource>(new InvalidInputSource());
279   -}
280   -
281   -void
282   -QPDF::setPasswordIsHexKey(bool val)
283   -{
284   - m->provided_password_is_hex_key = val;
285   -}
286   -
287   -void
288   -QPDF::emptyPDF()
289   -{
290   - processMemoryFile("empty PDF", EMPTY_PDF, strlen(EMPTY_PDF));
291   -}
292   -
293   -void
294   -QPDF::registerStreamFilter(
295   - std::string const& filter_name, std::function<std::shared_ptr<QPDFStreamFilter>()> factory)
296   -{
297   - qpdf::Stream::registerStreamFilter(filter_name, factory);
298   -}
299   -
300   -void
301   -QPDF::setIgnoreXRefStreams(bool val)
302   -{
303   - m->ignore_xref_streams = val;
304   -}
305   -
306   -std::shared_ptr<QPDFLogger>
307   -QPDF::getLogger()
308   -{
309   - return m->log;
310   -}
311   -
312   -void
313   -QPDF::setLogger(std::shared_ptr<QPDFLogger> l)
314   -{
315   - m->log = l;
316   -}
317   -
318   -void
319   -QPDF::setOutputStreams(std::ostream* out, std::ostream* err)
320   -{
321   - setLogger(QPDFLogger::create());
322   - m->log->setOutputStreams(out, err);
323   -}
324   -
325   -void
326   -QPDF::setSuppressWarnings(bool val)
327   -{
328   - m->suppress_warnings = val;
329   -}
330   -
331   -void
332   -QPDF::setMaxWarnings(size_t val)
333   -{
334   - m->max_warnings = val;
335   -}
336   -
337   -void
338   -QPDF::setAttemptRecovery(bool val)
339   -{
340   - m->attempt_recovery = val;
341   -}
342   -
343   -void
344   -QPDF::setImmediateCopyFrom(bool val)
345   -{
346   - m->immediate_copy_from = val;
347   -}
348   -
349   -std::vector<QPDFExc>
350   -QPDF::getWarnings()
351   -{
352   - std::vector<QPDFExc> result = m->warnings;
353   - m->warnings.clear();
354   - return result;
355   -}
356   -
357   -bool
358   -QPDF::anyWarnings() const
359   -{
360   - return !m->warnings.empty();
361   -}
362   -
363   -size_t
364   -QPDF::numWarnings() const
365   -{
366   - return m->warnings.size();
367   -}
368   -
369   -bool
370   -QPDF::validatePDFVersion(char const*& p, std::string& version)
371   -{
372   - bool valid = util::is_digit(*p);
373   - if (valid) {
374   - while (util::is_digit(*p)) {
375   - version.append(1, *p++);
376   - }
377   - if ((*p == '.') && util::is_digit(*(p + 1))) {
378   - version.append(1, *p++);
379   - while (util::is_digit(*p)) {
380   - version.append(1, *p++);
381   - }
382   - } else {
383   - valid = false;
384   - }
385   - }
386   - return valid;
387   -}
388   -
389   -bool
390   -QPDF::findHeader()
391   -{
392   - qpdf_offset_t global_offset = m->file->tell();
393   - std::string line = m->file->readLine(1024);
394   - char const* p = line.c_str();
395   - if (strncmp(p, "%PDF-", 5) != 0) {
396   - throw std::logic_error("findHeader is not looking at %PDF-");
397   - }
398   - p += 5;
399   - std::string version;
400   - // Note: The string returned by line.c_str() is always null-terminated. The code below never
401   - // overruns the buffer because a null character always short-circuits further advancement.
402   - bool valid = validatePDFVersion(p, version);
403   - if (valid) {
404   - m->pdf_version = version;
405   - if (global_offset != 0) {
406   - // Empirical evidence strongly suggests that when there is leading material prior to the
407   - // PDF header, all explicit offsets in the file are such that 0 points to the beginning
408   - // of the header.
409   - QTC::TC("qpdf", "QPDF global offset");
410   - m->file = std::shared_ptr<InputSource>(new OffsetInputSource(m->file, global_offset));
411   - }
412   - }
413   - return valid;
414   -}
415   -
416 89 bool
417 90 QPDF::findStartxref()
418 91 {
... ... @@ -500,28 +173,6 @@ QPDF::inParse(bool v)
500 173 }
501 174  
502 175 void
503   -QPDF::warn(QPDFExc const& e)
504   -{
505   - if (m->max_warnings > 0 && m->warnings.size() >= m->max_warnings) {
506   - stopOnError("Too many warnings - file is too badly damaged");
507   - }
508   - m->warnings.push_back(e);
509   - if (!m->suppress_warnings) {
510   - *m->log->getWarn() << "WARNING: " << m->warnings.back().what() << "\n";
511   - }
512   -}
513   -
514   -void
515   -QPDF::warn(
516   - qpdf_error_code_e error_code,
517   - std::string const& object,
518   - qpdf_offset_t offset,
519   - std::string const& message)
520   -{
521   - warn(QPDFExc(error_code, getFilename(), object, offset, message));
522   -}
523   -
524   -void
525 176 QPDF::setTrailer(QPDFObjectHandle obj)
526 177 {
527 178 if (m->trailer) {
... ... @@ -2135,41 +1786,6 @@ QPDF::makeIndirectObject(QPDFObjectHandle oh)
2135 1786 return makeIndirectFromQPDFObject(oh.getObj());
2136 1787 }
2137 1788  
2138   -QPDFObjectHandle
2139   -QPDF::newReserved()
2140   -{
2141   - return makeIndirectFromQPDFObject(QPDFObject::create<QPDF_Reserved>());
2142   -}
2143   -
2144   -QPDFObjectHandle
2145   -QPDF::newIndirectNull()
2146   -{
2147   - return makeIndirectFromQPDFObject(QPDFObject::create<QPDF_Null>());
2148   -}
2149   -
2150   -QPDFObjectHandle
2151   -QPDF::newStream()
2152   -{
2153   - return makeIndirectObject(
2154   - qpdf::Stream(*this, nextObjGen(), QPDFObjectHandle::newDictionary(), 0, 0));
2155   -}
2156   -
2157   -QPDFObjectHandle
2158   -QPDF::newStream(std::shared_ptr<Buffer> data)
2159   -{
2160   - auto result = newStream();
2161   - result.replaceStreamData(data, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
2162   - return result;
2163   -}
2164   -
2165   -QPDFObjectHandle
2166   -QPDF::newStream(std::string const& data)
2167   -{
2168   - auto result = newStream();
2169   - result.replaceStreamData(data, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
2170   - return result;
2171   -}
2172   -
2173 1789 std::shared_ptr<QPDFObject>
2174 1790 QPDF::getObjectForParser(int id, int gen, bool parse_pdf)
2175 1791 {
... ... @@ -2216,24 +1832,6 @@ QPDF::getObject(QPDFObjGen og)
2216 1832 }
2217 1833 }
2218 1834  
2219   -QPDFObjectHandle
2220   -QPDF::getObject(int objid, int generation)
2221   -{
2222   - return getObject(QPDFObjGen(objid, generation));
2223   -}
2224   -
2225   -QPDFObjectHandle
2226   -QPDF::getObjectByObjGen(QPDFObjGen og)
2227   -{
2228   - return getObject(og);
2229   -}
2230   -
2231   -QPDFObjectHandle
2232   -QPDF::getObjectByID(int objid, int generation)
2233   -{
2234   - return getObject(QPDFObjGen(objid, generation));
2235   -}
2236   -
2237 1835 void
2238 1836 QPDF::replaceObject(int objid, int generation, QPDFObjectHandle oh)
2239 1837 {
... ... @@ -2273,265 +1871,6 @@ QPDF::replaceReserved(QPDFObjectHandle reserved, QPDFObjectHandle replacement)
2273 1871 replaceObject(reserved.getObjGen(), replacement);
2274 1872 }
2275 1873  
2276   -QPDFObjectHandle
2277   -QPDF::copyForeignObject(QPDFObjectHandle foreign)
2278   -{
2279   - // Here's an explanation of what's going on here.
2280   - //
2281   - // A QPDFObjectHandle that is an indirect object has an owning QPDF. The object ID and
2282   - // generation refers to an object in the owning QPDF. When we copy the QPDFObjectHandle from a
2283   - // foreign QPDF into the local QPDF, we have to replace all indirect object references with
2284   - // references to the corresponding object in the local file.
2285   - //
2286   - // To do this, we maintain mappings from foreign object IDs to local object IDs for each foreign
2287   - // QPDF that we are copying from. The mapping is stored in an ObjCopier, which contains a
2288   - // mapping from the foreign ObjGen to the local QPDFObjectHandle.
2289   - //
2290   - // To copy, we do a deep traversal of the foreign object with loop detection to discover all
2291   - // indirect objects that are encountered, stopping at page boundaries. Whenever we encounter an
2292   - // indirect object, we check to see if we have already created a local copy of it. If not, we
2293   - // allocate a "reserved" object (or, for a stream, just a new stream) and store in the map the
2294   - // mapping from the foreign object ID to the new object. While we
2295   - // do this, we keep a list of objects to copy.
2296   - //
2297   - // Once we are done with the traversal, we copy all the objects that we need to copy. However,
2298   - // the copies will contain indirect object IDs that refer to objects in the foreign file. We
2299   - // need to replace them with references to objects in the local file. This is what
2300   - // replaceForeignIndirectObjects does. Once we have created a copy of the foreign object with
2301   - // all the indirect references replaced with new ones in the local context, we can replace the
2302   - // local reserved object with the copy. This mechanism allows us to copy objects with circular
2303   - // references in any order.
2304   -
2305   - // For streams, rather than copying the objects, we set up the stream data to pull from the
2306   - // original stream by using a stream data provider. This is done in a manner that doesn't
2307   - // require the original QPDF object but may require the original source of the stream data with
2308   - // special handling for immediate_copy_from. This logic is also in
2309   - // replaceForeignIndirectObjects.
2310   -
2311   - // Note that we explicitly allow use of copyForeignObject on page objects. It is a documented
2312   - // use case to copy pages this way if the intention is to not update the pages tree.
2313   - if (!foreign.isIndirect()) {
2314   - QTC::TC("qpdf", "QPDF copyForeign direct");
2315   - throw std::logic_error("QPDF::copyForeign called with direct object handle");
2316   - }
2317   - QPDF& other = foreign.getQPDF();
2318   - if (&other == this) {
2319   - QTC::TC("qpdf", "QPDF copyForeign not foreign");
2320   - throw std::logic_error("QPDF::copyForeign called with object from this QPDF");
2321   - }
2322   -
2323   - ObjCopier& obj_copier = m->object_copiers[other.m->unique_id];
2324   - if (!obj_copier.visiting.empty()) {
2325   - throw std::logic_error(
2326   - "obj_copier.visiting is not empty at the beginning of copyForeignObject");
2327   - }
2328   -
2329   - // Make sure we have an object in this file for every referenced object in the old file.
2330   - // obj_copier.object_map maps foreign QPDFObjGen to local objects. For everything new that we
2331   - // have to copy, the local object will be a reservation, unless it is a stream, in which case
2332   - // the local object will already be a stream.
2333   - reserveObjects(foreign, obj_copier, true);
2334   -
2335   - if (!obj_copier.visiting.empty()) {
2336   - throw std::logic_error("obj_copier.visiting is not empty after reserving objects");
2337   - }
2338   -
2339   - // Copy any new objects and replace the reservations.
2340   - for (auto& to_copy: obj_copier.to_copy) {
2341   - QPDFObjectHandle copy = replaceForeignIndirectObjects(to_copy, obj_copier, true);
2342   - if (!to_copy.isStream()) {
2343   - QPDFObjGen og(to_copy.getObjGen());
2344   - replaceReserved(obj_copier.object_map[og], copy);
2345   - }
2346   - }
2347   - obj_copier.to_copy.clear();
2348   -
2349   - auto og = foreign.getObjGen();
2350   - if (!obj_copier.object_map.count(og)) {
2351   - warn(damagedPDF(
2352   - other.getFilename() + " object " + og.unparse(' '),
2353   - foreign.getParsedOffset(),
2354   - "unexpected reference to /Pages object while copying foreign object; replacing with "
2355   - "null"));
2356   - return QPDFObjectHandle::newNull();
2357   - }
2358   - return obj_copier.object_map[foreign.getObjGen()];
2359   -}
2360   -
2361   -void
2362   -QPDF::reserveObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top)
2363   -{
2364   - auto foreign_tc = foreign.getTypeCode();
2365   - if (foreign_tc == ::ot_reserved) {
2366   - throw std::logic_error("QPDF: attempting to copy a foreign reserved object");
2367   - }
2368   -
2369   - if (foreign.isPagesObject()) {
2370   - QTC::TC("qpdf", "QPDF not copying pages object");
2371   - return;
2372   - }
2373   -
2374   - if (foreign.isIndirect()) {
2375   - QPDFObjGen foreign_og(foreign.getObjGen());
2376   - if (!obj_copier.visiting.add(foreign_og)) {
2377   - QTC::TC("qpdf", "QPDF loop reserving objects");
2378   - return;
2379   - }
2380   - if (obj_copier.object_map.count(foreign_og) > 0) {
2381   - QTC::TC("qpdf", "QPDF already reserved object");
2382   - if (!(top && foreign.isPageObject() && obj_copier.object_map[foreign_og].isNull())) {
2383   - obj_copier.visiting.erase(foreign);
2384   - return;
2385   - }
2386   - } else {
2387   - QTC::TC("qpdf", "QPDF copy indirect");
2388   - obj_copier.object_map[foreign_og] =
2389   - foreign.isStream() ? newStream() : newIndirectNull();
2390   - if ((!top) && foreign.isPageObject()) {
2391   - QTC::TC("qpdf", "QPDF not crossing page boundary");
2392   - obj_copier.visiting.erase(foreign_og);
2393   - return;
2394   - }
2395   - }
2396   - obj_copier.to_copy.push_back(foreign);
2397   - }
2398   -
2399   - if (foreign_tc == ::ot_array) {
2400   - QTC::TC("qpdf", "QPDF reserve array");
2401   - for (auto const& item: foreign.as_array()) {
2402   - reserveObjects(item, obj_copier, false);
2403   - }
2404   - } else if (foreign_tc == ::ot_dictionary) {
2405   - QTC::TC("qpdf", "QPDF reserve dictionary");
2406   - for (auto const& item: foreign.as_dictionary()) {
2407   - if (!item.second.null()) {
2408   - reserveObjects(item.second, obj_copier, false);
2409   - }
2410   - }
2411   - } else if (foreign_tc == ::ot_stream) {
2412   - QTC::TC("qpdf", "QPDF reserve stream");
2413   - reserveObjects(foreign.getDict(), obj_copier, false);
2414   - }
2415   -
2416   - obj_copier.visiting.erase(foreign);
2417   -}
2418   -
2419   -QPDFObjectHandle
2420   -QPDF::replaceForeignIndirectObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top)
2421   -{
2422   - auto foreign_tc = foreign.getTypeCode();
2423   - QPDFObjectHandle result;
2424   - if ((!top) && foreign.isIndirect()) {
2425   - QTC::TC("qpdf", "QPDF replace indirect");
2426   - auto mapping = obj_copier.object_map.find(foreign.getObjGen());
2427   - if (mapping == obj_copier.object_map.end()) {
2428   - // This case would occur if this is a reference to a Pages object that we didn't
2429   - // traverse into.
2430   - QTC::TC("qpdf", "QPDF replace foreign indirect with null");
2431   - result = QPDFObjectHandle::newNull();
2432   - } else {
2433   - result = mapping->second;
2434   - }
2435   - } else if (foreign_tc == ::ot_array) {
2436   - QTC::TC("qpdf", "QPDF replace array");
2437   - result = QPDFObjectHandle::newArray();
2438   - for (auto const& item: foreign.as_array()) {
2439   - result.appendItem(replaceForeignIndirectObjects(item, obj_copier, false));
2440   - }
2441   - } else if (foreign_tc == ::ot_dictionary) {
2442   - QTC::TC("qpdf", "QPDF replace dictionary");
2443   - result = QPDFObjectHandle::newDictionary();
2444   - for (auto const& [key, value]: foreign.as_dictionary()) {
2445   - if (!value.null()) {
2446   - result.replaceKey(key, replaceForeignIndirectObjects(value, obj_copier, false));
2447   - }
2448   - }
2449   - } else if (foreign_tc == ::ot_stream) {
2450   - QTC::TC("qpdf", "QPDF replace stream");
2451   - result = obj_copier.object_map[foreign.getObjGen()];
2452   - QPDFObjectHandle dict = result.getDict();
2453   - QPDFObjectHandle old_dict = foreign.getDict();
2454   - for (auto const& [key, value]: old_dict.as_dictionary()) {
2455   - if (!value.null()) {
2456   - dict.replaceKey(key, replaceForeignIndirectObjects(value, obj_copier, false));
2457   - }
2458   - }
2459   - copyStreamData(result, foreign);
2460   - } else {
2461   - foreign.assertScalar();
2462   - result = foreign;
2463   - result.makeDirect();
2464   - }
2465   -
2466   - if (top && (!result.isStream()) && result.isIndirect()) {
2467   - throw std::logic_error("replacement for foreign object is indirect");
2468   - }
2469   -
2470   - return result;
2471   -}
2472   -
2473   -void
2474   -QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign)
2475   -{
2476   - // This method was originally written for copying foreign streams, but it is used by
2477   - // QPDFObjectHandle to copy streams from the same QPDF object as well.
2478   -
2479   - QPDFObjectHandle dict = result.getDict();
2480   - QPDFObjectHandle old_dict = foreign.getDict();
2481   - if (m->copied_stream_data_provider == nullptr) {
2482   - m->copied_stream_data_provider = new CopiedStreamDataProvider(*this);
2483   - m->copied_streams =
2484   - std::shared_ptr<QPDFObjectHandle::StreamDataProvider>(m->copied_stream_data_provider);
2485   - }
2486   - QPDFObjGen local_og(result.getObjGen());
2487   - // Copy information from the foreign stream so we can pipe its data later without keeping the
2488   - // original QPDF object around.
2489   -
2490   - QPDF& foreign_stream_qpdf =
2491   - foreign.getQPDF("unable to retrieve owning qpdf from foreign stream");
2492   -
2493   - auto stream = foreign.as_stream();
2494   - if (!stream) {
2495   - throw std::logic_error("unable to retrieve underlying stream object from foreign stream");
2496   - }
2497   - std::shared_ptr<Buffer> stream_buffer = stream.getStreamDataBuffer();
2498   - if ((foreign_stream_qpdf.m->immediate_copy_from) && (stream_buffer == nullptr)) {
2499   - // Pull the stream data into a buffer before attempting the copy operation. Do it on the
2500   - // source stream so that if the source stream is copied multiple times, we don't have to
2501   - // keep duplicating the memory.
2502   - QTC::TC("qpdf", "QPDF immediate copy stream data");
2503   - foreign.replaceStreamData(
2504   - foreign.getRawStreamData(),
2505   - old_dict.getKey("/Filter"),
2506   - old_dict.getKey("/DecodeParms"));
2507   - stream_buffer = stream.getStreamDataBuffer();
2508   - }
2509   - std::shared_ptr<QPDFObjectHandle::StreamDataProvider> stream_provider =
2510   - stream.getStreamDataProvider();
2511   - if (stream_buffer.get()) {
2512   - QTC::TC("qpdf", "QPDF copy foreign stream with buffer");
2513   - result.replaceStreamData(
2514   - stream_buffer, dict.getKey("/Filter"), dict.getKey("/DecodeParms"));
2515   - } else if (stream_provider.get()) {
2516   - // In this case, the remote stream's QPDF must stay in scope.
2517   - QTC::TC("qpdf", "QPDF copy foreign stream with provider");
2518   - m->copied_stream_data_provider->registerForeignStream(local_og, foreign);
2519   - result.replaceStreamData(
2520   - m->copied_streams, dict.getKey("/Filter"), dict.getKey("/DecodeParms"));
2521   - } else {
2522   - auto foreign_stream_data = std::make_shared<ForeignStreamData>(
2523   - foreign_stream_qpdf.m->encp,
2524   - foreign_stream_qpdf.m->file,
2525   - foreign,
2526   - foreign.getParsedOffset(),
2527   - stream.getLength(),
2528   - dict);
2529   - m->copied_stream_data_provider->registerForeignStream(local_og, foreign_stream_data);
2530   - result.replaceStreamData(
2531   - m->copied_streams, dict.getKey("/Filter"), dict.getKey("/DecodeParms"));
2532   - }
2533   -}
2534   -
2535 1874 void
2536 1875 QPDF::swapObjects(int objid1, int generation1, int objid2, int generation2)
2537 1876 {
... ... @@ -2547,99 +1886,6 @@ QPDF::swapObjects(QPDFObjGen og1, QPDFObjGen og2)
2547 1886 m->obj_cache[og1].object->swapWith(m->obj_cache[og2].object);
2548 1887 }
2549 1888  
2550   -unsigned long long
2551   -QPDF::getUniqueId() const
2552   -{
2553   - return m->unique_id;
2554   -}
2555   -
2556   -std::string
2557   -QPDF::getFilename() const
2558   -{
2559   - return m->file->getName();
2560   -}
2561   -
2562   -PDFVersion
2563   -QPDF::getVersionAsPDFVersion()
2564   -{
2565   - int major = 1;
2566   - int minor = 3;
2567   - int extension_level = getExtensionLevel();
2568   -
2569   - std::regex v("^[[:space:]]*([0-9]+)\\.([0-9]+)");
2570   - std::smatch match;
2571   - if (std::regex_search(m->pdf_version, match, v)) {
2572   - major = QUtil::string_to_int(match[1].str().c_str());
2573   - minor = QUtil::string_to_int(match[2].str().c_str());
2574   - }
2575   -
2576   - return {major, minor, extension_level};
2577   -}
2578   -
2579   -std::string
2580   -QPDF::getPDFVersion() const
2581   -{
2582   - return m->pdf_version;
2583   -}
2584   -
2585   -int
2586   -QPDF::getExtensionLevel()
2587   -{
2588   - int result = 0;
2589   - QPDFObjectHandle obj = getRoot();
2590   - if (obj.hasKey("/Extensions")) {
2591   - obj = obj.getKey("/Extensions");
2592   - if (obj.isDictionary() && obj.hasKey("/ADBE")) {
2593   - obj = obj.getKey("/ADBE");
2594   - if (obj.isDictionary() && obj.hasKey("/ExtensionLevel")) {
2595   - obj = obj.getKey("/ExtensionLevel");
2596   - if (obj.isInteger()) {
2597   - result = obj.getIntValueAsInt();
2598   - }
2599   - }
2600   - }
2601   - }
2602   - return result;
2603   -}
2604   -
2605   -QPDFObjectHandle
2606   -QPDF::getTrailer()
2607   -{
2608   - return m->trailer;
2609   -}
2610   -
2611   -QPDFObjectHandle
2612   -QPDF::getRoot()
2613   -{
2614   - QPDFObjectHandle root = m->trailer.getKey("/Root");
2615   - if (!root.isDictionary()) {
2616   - throw damagedPDF("", 0, "unable to find /Root dictionary");
2617   - } else if (
2618   - // Check_mode is an interim solution to request #810 pending a more comprehensive review of
2619   - // the approach to more extensive checks and warning levels.
2620   - m->check_mode && !root.getKey("/Type").isNameAndEquals("/Catalog")) {
2621   - warn(damagedPDF("", 0, "catalog /Type entry missing or invalid"));
2622   - root.replaceKey("/Type", "/Catalog"_qpdf);
2623   - }
2624   - return root;
2625   -}
2626   -
2627   -std::map<QPDFObjGen, QPDFXRefEntry>
2628   -QPDF::getXRefTable()
2629   -{
2630   - return getXRefTableInternal();
2631   -}
2632   -
2633   -std::map<QPDFObjGen, QPDFXRefEntry> const&
2634   -QPDF::getXRefTableInternal()
2635   -{
2636   - if (!m->parsed) {
2637   - throw std::logic_error("QPDF::getXRefTable called before parsing.");
2638   - }
2639   -
2640   - return m->xref_table;
2641   -}
2642   -
2643 1889 size_t
2644 1890 QPDF::tableSize()
2645 1891 {
... ... @@ -2769,192 +2015,3 @@ QPDF::getCompressibleObjGens()
2769 2015  
2770 2016 return result;
2771 2017 }
2772   -
2773   -bool
2774   -QPDF::pipeStreamData(
2775   - std::shared_ptr<EncryptionParameters> encp,
2776   - std::shared_ptr<InputSource> file,
2777   - QPDF& qpdf_for_warning,
2778   - QPDFObjGen og,
2779   - qpdf_offset_t offset,
2780   - size_t length,
2781   - QPDFObjectHandle stream_dict,
2782   - Pipeline* pipeline,
2783   - bool suppress_warnings,
2784   - bool will_retry)
2785   -{
2786   - std::unique_ptr<Pipeline> to_delete;
2787   - if (encp->encrypted) {
2788   - decryptStream(encp, file, qpdf_for_warning, pipeline, og, stream_dict, to_delete);
2789   - }
2790   -
2791   - bool attempted_finish = false;
2792   - try {
2793   - auto buf = file->read(length, offset);
2794   - if (buf.size() != length) {
2795   - throw damagedPDF(
2796   - *file, "", offset + toO(buf.size()), "unexpected EOF reading stream data");
2797   - }
2798   - pipeline->write(buf.data(), length);
2799   - attempted_finish = true;
2800   - pipeline->finish();
2801   - return true;
2802   - } catch (QPDFExc& e) {
2803   - if (!suppress_warnings) {
2804   - qpdf_for_warning.warn(e);
2805   - }
2806   - } catch (std::exception& e) {
2807   - if (!suppress_warnings) {
2808   - QTC::TC("qpdf", "QPDF decoding error warning");
2809   - qpdf_for_warning.warn(
2810   - // line-break
2811   - damagedPDF(
2812   - *file,
2813   - "",
2814   - file->getLastOffset(),
2815   - ("error decoding stream data for object " + og.unparse(' ') + ": " +
2816   - e.what())));
2817   - if (will_retry) {
2818   - qpdf_for_warning.warn(
2819   - // line-break
2820   - damagedPDF(
2821   - *file,
2822   - "",
2823   - file->getLastOffset(),
2824   - "stream will be re-processed without filtering to avoid data loss"));
2825   - }
2826   - }
2827   - }
2828   - if (!attempted_finish) {
2829   - try {
2830   - pipeline->finish();
2831   - } catch (std::exception&) {
2832   - // ignore
2833   - }
2834   - }
2835   - return false;
2836   -}
2837   -
2838   -bool
2839   -QPDF::pipeStreamData(
2840   - QPDFObjGen og,
2841   - qpdf_offset_t offset,
2842   - size_t length,
2843   - QPDFObjectHandle stream_dict,
2844   - Pipeline* pipeline,
2845   - bool suppress_warnings,
2846   - bool will_retry)
2847   -{
2848   - return pipeStreamData(
2849   - m->encp,
2850   - m->file,
2851   - *this,
2852   - og,
2853   - offset,
2854   - length,
2855   - stream_dict,
2856   - pipeline,
2857   - suppress_warnings,
2858   - will_retry);
2859   -}
2860   -
2861   -bool
2862   -QPDF::pipeForeignStreamData(
2863   - std::shared_ptr<ForeignStreamData> foreign,
2864   - Pipeline* pipeline,
2865   - bool suppress_warnings,
2866   - bool will_retry)
2867   -{
2868   - if (foreign->encp->encrypted) {
2869   - QTC::TC("qpdf", "QPDF pipe foreign encrypted stream");
2870   - }
2871   - return pipeStreamData(
2872   - foreign->encp,
2873   - foreign->file,
2874   - *this,
2875   - foreign->foreign_og,
2876   - foreign->offset,
2877   - foreign->length,
2878   - foreign->local_dict,
2879   - pipeline,
2880   - suppress_warnings,
2881   - will_retry);
2882   -}
2883   -
2884   -// Throw a generic exception when we lack context for something more specific. New code should not
2885   -// use this. This method exists to improve somewhat from calling assert in very old code.
2886   -void
2887   -QPDF::stopOnError(std::string const& message)
2888   -{
2889   - throw damagedPDF("", message);
2890   -}
2891   -
2892   -// Return an exception of type qpdf_e_damaged_pdf.
2893   -QPDFExc
2894   -QPDF::damagedPDF(
2895   - InputSource& input, std::string const& object, qpdf_offset_t offset, std::string const& message)
2896   -{
2897   - return {qpdf_e_damaged_pdf, input.getName(), object, offset, message};
2898   -}
2899   -
2900   -// Return an exception of type qpdf_e_damaged_pdf. The object is taken from
2901   -// m->last_object_description.
2902   -QPDFExc
2903   -QPDF::damagedPDF(InputSource& input, qpdf_offset_t offset, std::string const& message)
2904   -{
2905   - return damagedPDF(input, m->last_object_description, offset, message);
2906   -}
2907   -
2908   -// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file.
2909   -QPDFExc
2910   -QPDF::damagedPDF(std::string const& object, qpdf_offset_t offset, std::string const& message)
2911   -{
2912   - return {qpdf_e_damaged_pdf, m->file->getName(), object, offset, message};
2913   -}
2914   -
2915   -// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the
2916   -// offset from .m->file->getLastOffset().
2917   -QPDFExc
2918   -QPDF::damagedPDF(std::string const& object, std::string const& message)
2919   -{
2920   - return damagedPDF(object, m->file->getLastOffset(), message);
2921   -}
2922   -
2923   -// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the object
2924   -// from .m->last_object_description.
2925   -QPDFExc
2926   -QPDF::damagedPDF(qpdf_offset_t offset, std::string const& message)
2927   -{
2928   - return damagedPDF(m->last_object_description, offset, message);
2929   -}
2930   -
2931   -// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file, the object
2932   -// from m->last_object_description and the offset from m->file->getLastOffset().
2933   -QPDFExc
2934   -QPDF::damagedPDF(std::string const& message)
2935   -{
2936   - return damagedPDF(m->last_object_description, m->file->getLastOffset(), message);
2937   -}
2938   -
2939   -bool
2940   -QPDF::everCalledGetAllPages() const
2941   -{
2942   - return m->ever_called_get_all_pages;
2943   -}
2944   -
2945   -bool
2946   -QPDF::everPushedInheritedAttributesToPages() const
2947   -{
2948   - return m->ever_pushed_inherited_attributes_to_pages;
2949   -}
2950   -
2951   -void
2952   -QPDF::removeSecurityRestrictions()
2953   -{
2954   - auto root = getRoot();
2955   - root.removeKey("/Perms");
2956   - auto acroform = root.getKey("/AcroForm");
2957   - if (acroform.isDictionary() && acroform.hasKey("/SigFlags")) {
2958   - acroform.replaceKey("/SigFlags", QPDFObjectHandle::newInteger(0));
2959   - }
2960   -}
... ...
manual/release-notes.rst
... ... @@ -21,16 +21,15 @@ more detail.
21 21 integer object. Previously the method returned false if the first
22 22 dictionary object was not a linearization parameter dictionary.
23 23  
24   -.. _r12-0-0:
25   -
26   -12.0.1: not yet released
27   - - Other enhancements
  24 + - Other enhancements
28 25  
29   - - There have been further enhancements to how files with damaged xref
30   - tables are recovered.
  26 + - There have been further enhancements to how files with damaged xref
  27 + tables are recovered.
31 28  
32 29 .. cSpell:ignore substract
33 30  
  31 +.. _r12-0-0:
  32 +
34 33 12.0.0: March 9, 2025
35 34 - API breaking changes
36 35  
... ...