Commit 675777bb2721eafe4c2cf0435166a8e4221de1bc

Authored by m-holger
1 parent 66efa438

Add `inspection_mode` for optional restricted PDF inspection

Introduce `qpdf::global::inspection_mode` for analyzing and repairing damaged PDFs with a restricted feature set. Update relevant code to enforce restrictions and add supporting test cases.
include/qpdf/Constants.h
... ... @@ -277,10 +277,11 @@ enum qpdf_param_e {
277 277 qpdf_p_limit_errors = 0x10020,
278 278  
279 279 /* global options */
  280 + qpdf_p_inspection_mode = 0x11000,
280 281 qpdf_p_default_limits = 0x11100,
281 282 /* global limits */
282 283  
283   - /* object - parser limits */
  284 + /* parser limits */
284 285 qpdf_p_parser_max_nesting = 0x13000,
285 286 qpdf_p_parser_max_errors,
286 287 qpdf_p_parser_max_container_size,
... ...
include/qpdf/global.hh
... ... @@ -68,6 +68,30 @@ namespace qpdf::global
68 68  
69 69 namespace options
70 70 {
  71 + /// @brief Retrieves whether inspection mode is set.
  72 + ///
  73 + /// @return True if inspection mode is set.
  74 + ///
  75 + /// @since 12.3
  76 + bool inline inspection_mode()
  77 + {
  78 + return get_uint32(qpdf_p_inspection_mode) != 0;
  79 + }
  80 +
  81 + /// @brief Set inspection mode if `true` is passed.
  82 + ///
  83 + /// This function enables restrictive inspection mode if `true` is passed. Inspection mode
  84 + /// must be enabled before a QPDF object is created. By default inspection mode is off.
  85 + /// Calling `inspection_mode(false)` is not supported and currently is a no-op.
  86 + ///
  87 + /// @param value A boolean indicating whether to enable (true) inspection mode.
  88 + ///
  89 + /// @since 12.3
  90 + void inline inspection_mode(bool value)
  91 + {
  92 + set_uint32(qpdf_p_inspection_mode, value ? QPDF_TRUE : QPDF_FALSE);
  93 + }
  94 +
71 95 /// @brief Retrieves whether default limits are enabled.
72 96 ///
73 97 /// @return True if default limits are enabled.
... ...
libqpdf/QPDF_encryption.cc
... ... @@ -758,7 +758,7 @@ QPDF::EncryptionParameters::initialize(QPDF& qpdf)
758 758 // at /Encrypt again. Otherwise, things could go wrong if someone mutates the encryption
759 759 // dictionary.
760 760  
761   - if (!trailer.hasKey("/Encrypt")) {
  761 + if (!trailer.contains("/Encrypt")) {
762 762 return;
763 763 }
764 764  
... ...
libqpdf/QPDF_objects.cc
... ... @@ -257,11 +257,21 @@ Objects::parse(char const* password)
257 257 throw damagedPDF("", -1, std::string("error reading xref: ") + e.what());
258 258 }
259 259 } catch (QPDFExc& e) {
260   - if (!cf.surpress_recovery()) {
261   - reconstruct_xref(e, xref_offset > 0);
262   - } else {
  260 + if (global::Options::inspection_mode()) {
  261 + try {
  262 + reconstruct_xref(e, xref_offset > 0);
  263 + } catch (std::exception& er) {
  264 + warn(damagedPDF("", -1, "error reconstructing xref: "s + er.what()));
  265 + }
  266 + if (!m->trailer) {
  267 + m->trailer = Dictionary::empty();
  268 + }
  269 + return;
  270 + }
  271 + if (cf.surpress_recovery()) {
263 272 throw;
264 273 }
  274 + reconstruct_xref(e, xref_offset > 0);
265 275 }
266 276  
267 277 m->encp->initialize(qpdf);
... ...
libqpdf/global.cc
... ... @@ -35,6 +35,9 @@ qpdf_global_get_uint32(qpdf_param_e param, uint32_t* value)
35 35 {
36 36 qpdf_expect(value);
37 37 switch (param) {
  38 + case qpdf_p_inspection_mode:
  39 + *value = Options::inspection_mode();
  40 + return qpdf_r_ok;
38 41 case qpdf_p_default_limits:
39 42 *value = Options::default_limits();
40 43 return qpdf_r_ok;
... ... @@ -62,6 +65,9 @@ qpdf_result_e
62 65 qpdf_global_set_uint32(qpdf_param_e param, uint32_t value)
63 66 {
64 67 switch (param) {
  68 + case qpdf_p_inspection_mode:
  69 + Options::inspection_mode(value);
  70 + return qpdf_r_ok;
65 71 case qpdf_p_default_limits:
66 72 Options::default_limits(value);
67 73 return qpdf_r_ok;
... ...
libqpdf/qpdf/QPDF_private.hh
... ... @@ -12,6 +12,9 @@
12 12 #include <qpdf/QPDFPageDocumentHelper.hh>
13 13 #include <qpdf/QPDFPageLabelDocumentHelper.hh>
14 14 #include <qpdf/QPDFTokenizer_private.hh>
  15 +#include <qpdf/global_private.hh>
  16 +
  17 +#include <exception>
15 18  
16 19 using namespace qpdf;
17 20  
... ... @@ -374,6 +377,7 @@ class QPDF::Doc
374 377 acroform()
375 378 {
376 379 if (!acroform_) {
  380 + no_inspection();
377 381 acroform_ = std::make_unique<QPDFAcroFormDocumentHelper>(qpdf);
378 382 }
379 383 return *acroform_;
... ... @@ -383,6 +387,7 @@ class QPDF::Doc
383 387 embedded_files()
384 388 {
385 389 if (!embedded_files_) {
  390 + no_inspection();
386 391 embedded_files_ = std::make_unique<QPDFEmbeddedFileDocumentHelper>(qpdf);
387 392 }
388 393 return *embedded_files_;
... ... @@ -392,6 +397,7 @@ class QPDF::Doc
392 397 outlines()
393 398 {
394 399 if (!outlines_) {
  400 + no_inspection();
395 401 outlines_ = std::make_unique<QPDFOutlineDocumentHelper>(qpdf);
396 402 }
397 403 return *outlines_;
... ... @@ -401,6 +407,7 @@ class QPDF::Doc
401 407 page_dh()
402 408 {
403 409 if (!page_dh_) {
  410 + no_inspection();
404 411 page_dh_ = std::make_unique<QPDFPageDocumentHelper>(qpdf);
405 412 }
406 413 return *page_dh_;
... ... @@ -410,12 +417,21 @@ class QPDF::Doc
410 417 page_labels()
411 418 {
412 419 if (!page_labels_) {
  420 + no_inspection();
413 421 page_labels_ = std::make_unique<QPDFPageLabelDocumentHelper>(qpdf);
414 422 }
415 423 return *page_labels_;
416 424 }
417 425  
418 426 protected:
  427 + void
  428 + no_inspection()
  429 + {
  430 + if (global::Options::inspection_mode()) {
  431 + throw std::logic_error("Attempted unsupported operation in inspection mode");
  432 + }
  433 + }
  434 +
419 435 QPDF& qpdf;
420 436 QPDF::Members* m;
421 437  
... ...
libqpdf/qpdf/global_private.hh
... ... @@ -85,6 +85,20 @@ namespace qpdf::global
85 85 {
86 86 public:
87 87 static bool
  88 + inspection_mode()
  89 + {
  90 + return static_cast<bool>(o.inspection_mode_);
  91 + }
  92 +
  93 + static void
  94 + inspection_mode(bool value)
  95 + {
  96 + if (value) {
  97 + o.inspection_mode_ = true;
  98 + }
  99 + }
  100 +
  101 + static bool
88 102 default_limits()
89 103 {
90 104 return static_cast<bool>(o.default_limits_);
... ... @@ -102,6 +116,7 @@ namespace qpdf::global
102 116 private:
103 117 static Options o;
104 118  
  119 + bool inspection_mode_{false};
105 120 bool default_limits_{true};
106 121 };
107 122 } // namespace qpdf::global
... ...
qpdf/qtest/inspection-mode.test 0 โ†’ 100644
  1 +#!/usr/bin/env perl
  2 +require 5.008;
  3 +use warnings;
  4 +use strict;
  5 +
  6 +unshift(@INC, '.');
  7 +require qpdf_test_helpers;
  8 +
  9 +chdir("qpdf") or die "chdir testdir failed: $!\n";
  10 +
  11 +require TestDriver;
  12 +
  13 +cleanup();
  14 +
  15 +my $td = new TestDriver('inspection-mode');
  16 +
  17 +my $n_tests = 1;
  18 +
  19 +$td->runtest("inspection mode",
  20 + {$td->COMMAND => "test_driver 101 - -"},
  21 + {$td->FILE => "inspection-mode.out", $td->EXIT_STATUS => 0},
  22 + $td->NORMALIZE_NEWLINES);
  23 +
  24 +cleanup();
  25 +$td->report($n_tests);
... ...
qpdf/qtest/qpdf/inspect.pdf 0 โ†’ 100644
No preview for this file type
qpdf/qtest/qpdf/inspection-mode.out 0 โ†’ 100644
  1 +WARNING: inspect.pdf: can't find PDF header
  2 +WARNING: inspect.pdf: file is damaged
  3 +WARNING: inspect.pdf: can't find startxref
  4 +WARNING: inspect.pdf: Attempting to reconstruct cross-reference table
  5 +WARNING: inspect.pdf (trailer, offset 38): unknown token while reading object; treating as null
  6 +WARNING: inspect.pdf (trailer, offset 60): unknown token while reading object; treating as null
  7 +WARNING: inspect.pdf (trailer, offset 82): treating bad indirect reference (0 0 R) as null
  8 +WARNING: inspect.pdf (trailer, offset 90): unknown token while reading object; treating as null
  9 +WARNING: inspect.pdf (trailer, offset 100): unexpected >
  10 +WARNING: inspect.pdf (trailer, offset 71): expected dictionary keys but found non-name objects; ignoring
  11 +WARNING: inspect.pdf (trailer, offset 202): unexpected 'endobj' or 'endstream' while reading object; giving up on reading object
  12 +WARNING: inspect.pdf: error reconstructing xref: inspect.pdf: unable to find trailer dictionary while recovering damaged file
  13 +5 0
  14 +null
  15 +20 0
  16 +<< /Fields [ 21 0 R ] >>
  17 +21 0
  18 +<< /Kids [ 22 0 R 23 0 R 24 0 R 25 0 R ] /Rect [ 100 100 500 500 ] /Subtype /Widget /T (MyFie\224d) /Type /Annot >>
  19 +22 0
  20 +null
  21 +23 0
  22 +<< /FT /Tx /Rect [ 401 401 421 421 ] /Subtype /Widget /T (Sub_RightTop) /Type /Annot >>
  23 +24 0
  24 +<< /FT /Tx /Rect [ 201 400 221 420 ] /Subtype /Widget /T (Sub_LeftTop) /Type /Annot >>
  25 +25 0
  26 +<< /FT /Tx /Rect [ 400 201 420 221 ] /Subtype /Widget /T (Sub_RightBottom) /Type /Annot >>
  27 +test 101 done
... ...
qpdf/test_driver.cc
... ... @@ -3550,6 +3550,41 @@ test_100(QPDF&amp; pdf, char const* arg2)
3550 3550 }
3551 3551 }
3552 3552  
  3553 +static void
  3554 +test_101(QPDF& pdf, char const* arg2)
  3555 +{
  3556 + // Test inspection mode
  3557 + QPDF qpdf;
  3558 + assert(!qpdf::global::options::inspection_mode());
  3559 + qpdf::global::options::inspection_mode(true);
  3560 + assert(qpdf::global::options::inspection_mode());
  3561 + qpdf::global::options::inspection_mode(false);
  3562 + // Setting inspection mode is irreversible
  3563 + assert(qpdf::global::options::inspection_mode());
  3564 + qpdf.processFile("inspect.pdf");
  3565 + for (auto& oh: qpdf.getAllObjects()) {
  3566 + std::cout << oh.getObjGen().unparse(' ') << '\n';
  3567 + std::cout << oh.unparseResolved() << '\n';
  3568 + }
  3569 +
  3570 +
  3571 + auto test_helper_throws = [&qpdf](auto helper_func) {
  3572 + bool thrown = false;
  3573 + try {
  3574 + helper_func(qpdf);
  3575 + } catch (std::logic_error&) {
  3576 + thrown = true;
  3577 + }
  3578 + assert(thrown);
  3579 + };
  3580 +
  3581 + test_helper_throws([](QPDF& q) { (void)QPDFAcroFormDocumentHelper::get(q); });
  3582 + test_helper_throws([](QPDF& q) { (void)QPDFEmbeddedFileDocumentHelper::get(q); });
  3583 + test_helper_throws([](QPDF& q) { (void)QPDFOutlineDocumentHelper::get(q); });
  3584 + test_helper_throws([](QPDF& q) { (void)QPDFPageDocumentHelper::get(q); });
  3585 + test_helper_throws([](QPDF& q) { (void)QPDFPageLabelDocumentHelper::get(q); });
  3586 +}
  3587 +
3553 3588 void
3554 3589 runtest(int n, char const* filename1, char const* arg2)
3555 3590 {
... ... @@ -3557,7 +3592,7 @@ runtest(int n, char const* filename1, char const* arg2)
3557 3592 // the test suite to see how the test is invoked to find the file
3558 3593 // that the test is supposed to operate on.
3559 3594  
3560   - std::set<int> ignore_filename = {61, 62, 81, 83, 84, 85, 86, 87, 92, 95, 96};
  3595 + std::set<int> ignore_filename = {61, 62, 81, 83, 84, 85, 86, 87, 92, 95, 96, 101};
3561 3596  
3562 3597 if (n == 0) {
3563 3598 // Throw in some random test cases that don't fit anywhere
... ... @@ -3631,23 +3666,27 @@ runtest(int n, char const* filename1, char const* arg2)
3631 3666 }
3632 3667  
3633 3668 std::map<int, void (*)(QPDF&, char const*)> test_functions = {
3634   - {0, test_0_1}, {1, test_0_1}, {2, test_2}, {3, test_3}, {4, test_4}, {5, test_5},
3635   - {6, test_6}, {7, test_7}, {8, test_8}, {9, test_9}, {10, test_10}, {11, test_11},
3636   - {12, test_12}, {13, test_13}, {14, test_14}, {15, test_15}, {16, test_16}, {17, test_17},
3637   - {18, test_18}, {19, test_19}, {20, test_20}, {21, test_21}, {22, test_22}, {23, test_23},
3638   - {24, test_24}, {25, test_25}, {26, test_26}, {27, test_27}, {28, test_28}, {29, test_29},
3639   - {30, test_30}, {31, test_31}, {32, test_32}, {33, test_33}, {34, test_34}, {35, test_35},
3640   - {36, test_36}, {37, test_37}, {38, test_38}, {39, test_39}, {40, test_40}, {41, test_41},
3641   - {42, test_42}, {43, test_43}, {44, test_44}, {45, test_45}, {46, test_46}, {47, test_47},
3642   - {48, test_48}, {49, test_49}, {50, test_50}, {51, test_51}, {52, test_52}, {53, test_53},
3643   - {54, test_54}, {55, test_55}, {56, test_56}, {57, test_57}, {58, test_58}, {59, test_59},
3644   - {60, test_60}, {61, test_61}, {62, test_62}, {63, test_63}, {64, test_64}, {65, test_65},
3645   - {66, test_66}, {67, test_67}, {68, test_68}, {69, test_69}, {70, test_70}, {71, test_71},
3646   - {72, test_72}, {73, test_73}, {74, test_74}, {75, test_75}, {76, test_76}, {77, test_77},
3647   - {78, test_78}, {79, test_79}, {80, test_80}, {81, test_81}, {82, test_82}, {83, test_83},
3648   - {84, test_84}, {85, test_85}, {86, test_86}, {87, test_87}, {88, test_88}, {89, test_89},
3649   - {90, test_90}, {91, test_91}, {92, test_92}, {93, test_93}, {94, test_94}, {95, test_95},
3650   - {96, test_96}, {97, test_97}, {98, test_98}, {99, test_99}, {100, test_100}};
  3669 + {0, test_0_1}, {1, test_0_1}, {2, test_2}, {3, test_3}, {4, test_4},
  3670 + {5, test_5}, {6, test_6}, {7, test_7}, {8, test_8}, {9, test_9},
  3671 + {10, test_10}, {11, test_11}, {12, test_12}, {13, test_13}, {14, test_14},
  3672 + {15, test_15}, {16, test_16}, {17, test_17}, {18, test_18}, {19, test_19},
  3673 + {20, test_20}, {21, test_21}, {22, test_22}, {23, test_23}, {24, test_24},
  3674 + {25, test_25}, {26, test_26}, {27, test_27}, {28, test_28}, {29, test_29},
  3675 + {30, test_30}, {31, test_31}, {32, test_32}, {33, test_33}, {34, test_34},
  3676 + {35, test_35}, {36, test_36}, {37, test_37}, {38, test_38}, {39, test_39},
  3677 + {40, test_40}, {41, test_41}, {42, test_42}, {43, test_43}, {44, test_44},
  3678 + {45, test_45}, {46, test_46}, {47, test_47}, {48, test_48}, {49, test_49},
  3679 + {50, test_50}, {51, test_51}, {52, test_52}, {53, test_53}, {54, test_54},
  3680 + {55, test_55}, {56, test_56}, {57, test_57}, {58, test_58}, {59, test_59},
  3681 + {60, test_60}, {61, test_61}, {62, test_62}, {63, test_63}, {64, test_64},
  3682 + {65, test_65}, {66, test_66}, {67, test_67}, {68, test_68}, {69, test_69},
  3683 + {70, test_70}, {71, test_71}, {72, test_72}, {73, test_73}, {74, test_74},
  3684 + {75, test_75}, {76, test_76}, {77, test_77}, {78, test_78}, {79, test_79},
  3685 + {80, test_80}, {81, test_81}, {82, test_82}, {83, test_83}, {84, test_84},
  3686 + {85, test_85}, {86, test_86}, {87, test_87}, {88, test_88}, {89, test_89},
  3687 + {90, test_90}, {91, test_91}, {92, test_92}, {93, test_93}, {94, test_94},
  3688 + {95, test_95}, {96, test_96}, {97, test_97}, {98, test_98}, {99, test_99},
  3689 + {100, test_100}, {101, test_101}};
3651 3690  
3652 3691 auto fn = test_functions.find(n);
3653 3692 if (fn == test_functions.end()) {
... ...