Commit 675777bb2721eafe4c2cf0435166a8e4221de1bc

Authored by m-holger
1 parent 66efa438

Add `inspection_mode` for optional restricted PDF inspection

Introduce `qpdf::global::inspection_mode` for analyzing and repairing damaged PDFs with a restricted feature set. Update relevant code to enforce restrictions and add supporting test cases.
include/qpdf/Constants.h
@@ -277,10 +277,11 @@ enum qpdf_param_e { @@ -277,10 +277,11 @@ enum qpdf_param_e {
277 qpdf_p_limit_errors = 0x10020, 277 qpdf_p_limit_errors = 0x10020,
278 278
279 /* global options */ 279 /* global options */
  280 + qpdf_p_inspection_mode = 0x11000,
280 qpdf_p_default_limits = 0x11100, 281 qpdf_p_default_limits = 0x11100,
281 /* global limits */ 282 /* global limits */
282 283
283 - /* object - parser limits */ 284 + /* parser limits */
284 qpdf_p_parser_max_nesting = 0x13000, 285 qpdf_p_parser_max_nesting = 0x13000,
285 qpdf_p_parser_max_errors, 286 qpdf_p_parser_max_errors,
286 qpdf_p_parser_max_container_size, 287 qpdf_p_parser_max_container_size,
include/qpdf/global.hh
@@ -68,6 +68,30 @@ namespace qpdf::global @@ -68,6 +68,30 @@ namespace qpdf::global
68 68
69 namespace options 69 namespace options
70 { 70 {
  71 + /// @brief Retrieves whether inspection mode is set.
  72 + ///
  73 + /// @return True if inspection mode is set.
  74 + ///
  75 + /// @since 12.3
  76 + bool inline inspection_mode()
  77 + {
  78 + return get_uint32(qpdf_p_inspection_mode) != 0;
  79 + }
  80 +
  81 + /// @brief Set inspection mode if `true` is passed.
  82 + ///
  83 + /// This function enables restrictive inspection mode if `true` is passed. Inspection mode
  84 + /// must be enabled before a QPDF object is created. By default inspection mode is off.
  85 + /// Calling `inspection_mode(false)` is not supported and currently is a no-op.
  86 + ///
  87 + /// @param value A boolean indicating whether to enable (true) inspection mode.
  88 + ///
  89 + /// @since 12.3
  90 + void inline inspection_mode(bool value)
  91 + {
  92 + set_uint32(qpdf_p_inspection_mode, value ? QPDF_TRUE : QPDF_FALSE);
  93 + }
  94 +
71 /// @brief Retrieves whether default limits are enabled. 95 /// @brief Retrieves whether default limits are enabled.
72 /// 96 ///
73 /// @return True if default limits are enabled. 97 /// @return True if default limits are enabled.
libqpdf/QPDF_encryption.cc
@@ -758,7 +758,7 @@ QPDF::EncryptionParameters::initialize(QPDF& qpdf) @@ -758,7 +758,7 @@ QPDF::EncryptionParameters::initialize(QPDF& qpdf)
758 // at /Encrypt again. Otherwise, things could go wrong if someone mutates the encryption 758 // at /Encrypt again. Otherwise, things could go wrong if someone mutates the encryption
759 // dictionary. 759 // dictionary.
760 760
761 - if (!trailer.hasKey("/Encrypt")) { 761 + if (!trailer.contains("/Encrypt")) {
762 return; 762 return;
763 } 763 }
764 764
libqpdf/QPDF_objects.cc
@@ -257,11 +257,21 @@ Objects::parse(char const* password) @@ -257,11 +257,21 @@ Objects::parse(char const* password)
257 throw damagedPDF("", -1, std::string("error reading xref: ") + e.what()); 257 throw damagedPDF("", -1, std::string("error reading xref: ") + e.what());
258 } 258 }
259 } catch (QPDFExc& e) { 259 } catch (QPDFExc& e) {
260 - if (!cf.surpress_recovery()) {  
261 - reconstruct_xref(e, xref_offset > 0);  
262 - } else { 260 + if (global::Options::inspection_mode()) {
  261 + try {
  262 + reconstruct_xref(e, xref_offset > 0);
  263 + } catch (std::exception& er) {
  264 + warn(damagedPDF("", -1, "error reconstructing xref: "s + er.what()));
  265 + }
  266 + if (!m->trailer) {
  267 + m->trailer = Dictionary::empty();
  268 + }
  269 + return;
  270 + }
  271 + if (cf.surpress_recovery()) {
263 throw; 272 throw;
264 } 273 }
  274 + reconstruct_xref(e, xref_offset > 0);
265 } 275 }
266 276
267 m->encp->initialize(qpdf); 277 m->encp->initialize(qpdf);
libqpdf/global.cc
@@ -35,6 +35,9 @@ qpdf_global_get_uint32(qpdf_param_e param, uint32_t* value) @@ -35,6 +35,9 @@ qpdf_global_get_uint32(qpdf_param_e param, uint32_t* value)
35 { 35 {
36 qpdf_expect(value); 36 qpdf_expect(value);
37 switch (param) { 37 switch (param) {
  38 + case qpdf_p_inspection_mode:
  39 + *value = Options::inspection_mode();
  40 + return qpdf_r_ok;
38 case qpdf_p_default_limits: 41 case qpdf_p_default_limits:
39 *value = Options::default_limits(); 42 *value = Options::default_limits();
40 return qpdf_r_ok; 43 return qpdf_r_ok;
@@ -62,6 +65,9 @@ qpdf_result_e @@ -62,6 +65,9 @@ qpdf_result_e
62 qpdf_global_set_uint32(qpdf_param_e param, uint32_t value) 65 qpdf_global_set_uint32(qpdf_param_e param, uint32_t value)
63 { 66 {
64 switch (param) { 67 switch (param) {
  68 + case qpdf_p_inspection_mode:
  69 + Options::inspection_mode(value);
  70 + return qpdf_r_ok;
65 case qpdf_p_default_limits: 71 case qpdf_p_default_limits:
66 Options::default_limits(value); 72 Options::default_limits(value);
67 return qpdf_r_ok; 73 return qpdf_r_ok;
libqpdf/qpdf/QPDF_private.hh
@@ -12,6 +12,9 @@ @@ -12,6 +12,9 @@
12 #include <qpdf/QPDFPageDocumentHelper.hh> 12 #include <qpdf/QPDFPageDocumentHelper.hh>
13 #include <qpdf/QPDFPageLabelDocumentHelper.hh> 13 #include <qpdf/QPDFPageLabelDocumentHelper.hh>
14 #include <qpdf/QPDFTokenizer_private.hh> 14 #include <qpdf/QPDFTokenizer_private.hh>
  15 +#include <qpdf/global_private.hh>
  16 +
  17 +#include <exception>
15 18
16 using namespace qpdf; 19 using namespace qpdf;
17 20
@@ -374,6 +377,7 @@ class QPDF::Doc @@ -374,6 +377,7 @@ class QPDF::Doc
374 acroform() 377 acroform()
375 { 378 {
376 if (!acroform_) { 379 if (!acroform_) {
  380 + no_inspection();
377 acroform_ = std::make_unique<QPDFAcroFormDocumentHelper>(qpdf); 381 acroform_ = std::make_unique<QPDFAcroFormDocumentHelper>(qpdf);
378 } 382 }
379 return *acroform_; 383 return *acroform_;
@@ -383,6 +387,7 @@ class QPDF::Doc @@ -383,6 +387,7 @@ class QPDF::Doc
383 embedded_files() 387 embedded_files()
384 { 388 {
385 if (!embedded_files_) { 389 if (!embedded_files_) {
  390 + no_inspection();
386 embedded_files_ = std::make_unique<QPDFEmbeddedFileDocumentHelper>(qpdf); 391 embedded_files_ = std::make_unique<QPDFEmbeddedFileDocumentHelper>(qpdf);
387 } 392 }
388 return *embedded_files_; 393 return *embedded_files_;
@@ -392,6 +397,7 @@ class QPDF::Doc @@ -392,6 +397,7 @@ class QPDF::Doc
392 outlines() 397 outlines()
393 { 398 {
394 if (!outlines_) { 399 if (!outlines_) {
  400 + no_inspection();
395 outlines_ = std::make_unique<QPDFOutlineDocumentHelper>(qpdf); 401 outlines_ = std::make_unique<QPDFOutlineDocumentHelper>(qpdf);
396 } 402 }
397 return *outlines_; 403 return *outlines_;
@@ -401,6 +407,7 @@ class QPDF::Doc @@ -401,6 +407,7 @@ class QPDF::Doc
401 page_dh() 407 page_dh()
402 { 408 {
403 if (!page_dh_) { 409 if (!page_dh_) {
  410 + no_inspection();
404 page_dh_ = std::make_unique<QPDFPageDocumentHelper>(qpdf); 411 page_dh_ = std::make_unique<QPDFPageDocumentHelper>(qpdf);
405 } 412 }
406 return *page_dh_; 413 return *page_dh_;
@@ -410,12 +417,21 @@ class QPDF::Doc @@ -410,12 +417,21 @@ class QPDF::Doc
410 page_labels() 417 page_labels()
411 { 418 {
412 if (!page_labels_) { 419 if (!page_labels_) {
  420 + no_inspection();
413 page_labels_ = std::make_unique<QPDFPageLabelDocumentHelper>(qpdf); 421 page_labels_ = std::make_unique<QPDFPageLabelDocumentHelper>(qpdf);
414 } 422 }
415 return *page_labels_; 423 return *page_labels_;
416 } 424 }
417 425
418 protected: 426 protected:
  427 + void
  428 + no_inspection()
  429 + {
  430 + if (global::Options::inspection_mode()) {
  431 + throw std::logic_error("Attempted unsupported operation in inspection mode");
  432 + }
  433 + }
  434 +
419 QPDF& qpdf; 435 QPDF& qpdf;
420 QPDF::Members* m; 436 QPDF::Members* m;
421 437
libqpdf/qpdf/global_private.hh
@@ -85,6 +85,20 @@ namespace qpdf::global @@ -85,6 +85,20 @@ namespace qpdf::global
85 { 85 {
86 public: 86 public:
87 static bool 87 static bool
  88 + inspection_mode()
  89 + {
  90 + return static_cast<bool>(o.inspection_mode_);
  91 + }
  92 +
  93 + static void
  94 + inspection_mode(bool value)
  95 + {
  96 + if (value) {
  97 + o.inspection_mode_ = true;
  98 + }
  99 + }
  100 +
  101 + static bool
88 default_limits() 102 default_limits()
89 { 103 {
90 return static_cast<bool>(o.default_limits_); 104 return static_cast<bool>(o.default_limits_);
@@ -102,6 +116,7 @@ namespace qpdf::global @@ -102,6 +116,7 @@ namespace qpdf::global
102 private: 116 private:
103 static Options o; 117 static Options o;
104 118
  119 + bool inspection_mode_{false};
105 bool default_limits_{true}; 120 bool default_limits_{true};
106 }; 121 };
107 } // namespace qpdf::global 122 } // namespace qpdf::global
qpdf/qtest/inspection-mode.test 0 โ†’ 100644
  1 +#!/usr/bin/env perl
  2 +require 5.008;
  3 +use warnings;
  4 +use strict;
  5 +
  6 +unshift(@INC, '.');
  7 +require qpdf_test_helpers;
  8 +
  9 +chdir("qpdf") or die "chdir testdir failed: $!\n";
  10 +
  11 +require TestDriver;
  12 +
  13 +cleanup();
  14 +
  15 +my $td = new TestDriver('inspection-mode');
  16 +
  17 +my $n_tests = 1;
  18 +
  19 +$td->runtest("inspection mode",
  20 + {$td->COMMAND => "test_driver 101 - -"},
  21 + {$td->FILE => "inspection-mode.out", $td->EXIT_STATUS => 0},
  22 + $td->NORMALIZE_NEWLINES);
  23 +
  24 +cleanup();
  25 +$td->report($n_tests);
qpdf/qtest/qpdf/inspect.pdf 0 โ†’ 100644
No preview for this file type
qpdf/qtest/qpdf/inspection-mode.out 0 โ†’ 100644
  1 +WARNING: inspect.pdf: can't find PDF header
  2 +WARNING: inspect.pdf: file is damaged
  3 +WARNING: inspect.pdf: can't find startxref
  4 +WARNING: inspect.pdf: Attempting to reconstruct cross-reference table
  5 +WARNING: inspect.pdf (trailer, offset 38): unknown token while reading object; treating as null
  6 +WARNING: inspect.pdf (trailer, offset 60): unknown token while reading object; treating as null
  7 +WARNING: inspect.pdf (trailer, offset 82): treating bad indirect reference (0 0 R) as null
  8 +WARNING: inspect.pdf (trailer, offset 90): unknown token while reading object; treating as null
  9 +WARNING: inspect.pdf (trailer, offset 100): unexpected >
  10 +WARNING: inspect.pdf (trailer, offset 71): expected dictionary keys but found non-name objects; ignoring
  11 +WARNING: inspect.pdf (trailer, offset 202): unexpected 'endobj' or 'endstream' while reading object; giving up on reading object
  12 +WARNING: inspect.pdf: error reconstructing xref: inspect.pdf: unable to find trailer dictionary while recovering damaged file
  13 +5 0
  14 +null
  15 +20 0
  16 +<< /Fields [ 21 0 R ] >>
  17 +21 0
  18 +<< /Kids [ 22 0 R 23 0 R 24 0 R 25 0 R ] /Rect [ 100 100 500 500 ] /Subtype /Widget /T (MyFie\224d) /Type /Annot >>
  19 +22 0
  20 +null
  21 +23 0
  22 +<< /FT /Tx /Rect [ 401 401 421 421 ] /Subtype /Widget /T (Sub_RightTop) /Type /Annot >>
  23 +24 0
  24 +<< /FT /Tx /Rect [ 201 400 221 420 ] /Subtype /Widget /T (Sub_LeftTop) /Type /Annot >>
  25 +25 0
  26 +<< /FT /Tx /Rect [ 400 201 420 221 ] /Subtype /Widget /T (Sub_RightBottom) /Type /Annot >>
  27 +test 101 done
qpdf/test_driver.cc
@@ -3550,6 +3550,41 @@ test_100(QPDF&amp; pdf, char const* arg2) @@ -3550,6 +3550,41 @@ test_100(QPDF&amp; pdf, char const* arg2)
3550 } 3550 }
3551 } 3551 }
3552 3552
  3553 +static void
  3554 +test_101(QPDF& pdf, char const* arg2)
  3555 +{
  3556 + // Test inspection mode
  3557 + QPDF qpdf;
  3558 + assert(!qpdf::global::options::inspection_mode());
  3559 + qpdf::global::options::inspection_mode(true);
  3560 + assert(qpdf::global::options::inspection_mode());
  3561 + qpdf::global::options::inspection_mode(false);
  3562 + // Setting inspection mode is irreversible
  3563 + assert(qpdf::global::options::inspection_mode());
  3564 + qpdf.processFile("inspect.pdf");
  3565 + for (auto& oh: qpdf.getAllObjects()) {
  3566 + std::cout << oh.getObjGen().unparse(' ') << '\n';
  3567 + std::cout << oh.unparseResolved() << '\n';
  3568 + }
  3569 +
  3570 +
  3571 + auto test_helper_throws = [&qpdf](auto helper_func) {
  3572 + bool thrown = false;
  3573 + try {
  3574 + helper_func(qpdf);
  3575 + } catch (std::logic_error&) {
  3576 + thrown = true;
  3577 + }
  3578 + assert(thrown);
  3579 + };
  3580 +
  3581 + test_helper_throws([](QPDF& q) { (void)QPDFAcroFormDocumentHelper::get(q); });
  3582 + test_helper_throws([](QPDF& q) { (void)QPDFEmbeddedFileDocumentHelper::get(q); });
  3583 + test_helper_throws([](QPDF& q) { (void)QPDFOutlineDocumentHelper::get(q); });
  3584 + test_helper_throws([](QPDF& q) { (void)QPDFPageDocumentHelper::get(q); });
  3585 + test_helper_throws([](QPDF& q) { (void)QPDFPageLabelDocumentHelper::get(q); });
  3586 +}
  3587 +
3553 void 3588 void
3554 runtest(int n, char const* filename1, char const* arg2) 3589 runtest(int n, char const* filename1, char const* arg2)
3555 { 3590 {
@@ -3557,7 +3592,7 @@ runtest(int n, char const* filename1, char const* arg2) @@ -3557,7 +3592,7 @@ runtest(int n, char const* filename1, char const* arg2)
3557 // the test suite to see how the test is invoked to find the file 3592 // the test suite to see how the test is invoked to find the file
3558 // that the test is supposed to operate on. 3593 // that the test is supposed to operate on.
3559 3594
3560 - std::set<int> ignore_filename = {61, 62, 81, 83, 84, 85, 86, 87, 92, 95, 96}; 3595 + std::set<int> ignore_filename = {61, 62, 81, 83, 84, 85, 86, 87, 92, 95, 96, 101};
3561 3596
3562 if (n == 0) { 3597 if (n == 0) {
3563 // Throw in some random test cases that don't fit anywhere 3598 // Throw in some random test cases that don't fit anywhere
@@ -3631,23 +3666,27 @@ runtest(int n, char const* filename1, char const* arg2) @@ -3631,23 +3666,27 @@ runtest(int n, char const* filename1, char const* arg2)
3631 } 3666 }
3632 3667
3633 std::map<int, void (*)(QPDF&, char const*)> test_functions = { 3668 std::map<int, void (*)(QPDF&, char const*)> test_functions = {
3634 - {0, test_0_1}, {1, test_0_1}, {2, test_2}, {3, test_3}, {4, test_4}, {5, test_5},  
3635 - {6, test_6}, {7, test_7}, {8, test_8}, {9, test_9}, {10, test_10}, {11, test_11},  
3636 - {12, test_12}, {13, test_13}, {14, test_14}, {15, test_15}, {16, test_16}, {17, test_17},  
3637 - {18, test_18}, {19, test_19}, {20, test_20}, {21, test_21}, {22, test_22}, {23, test_23},  
3638 - {24, test_24}, {25, test_25}, {26, test_26}, {27, test_27}, {28, test_28}, {29, test_29},  
3639 - {30, test_30}, {31, test_31}, {32, test_32}, {33, test_33}, {34, test_34}, {35, test_35},  
3640 - {36, test_36}, {37, test_37}, {38, test_38}, {39, test_39}, {40, test_40}, {41, test_41},  
3641 - {42, test_42}, {43, test_43}, {44, test_44}, {45, test_45}, {46, test_46}, {47, test_47},  
3642 - {48, test_48}, {49, test_49}, {50, test_50}, {51, test_51}, {52, test_52}, {53, test_53},  
3643 - {54, test_54}, {55, test_55}, {56, test_56}, {57, test_57}, {58, test_58}, {59, test_59},  
3644 - {60, test_60}, {61, test_61}, {62, test_62}, {63, test_63}, {64, test_64}, {65, test_65},  
3645 - {66, test_66}, {67, test_67}, {68, test_68}, {69, test_69}, {70, test_70}, {71, test_71},  
3646 - {72, test_72}, {73, test_73}, {74, test_74}, {75, test_75}, {76, test_76}, {77, test_77},  
3647 - {78, test_78}, {79, test_79}, {80, test_80}, {81, test_81}, {82, test_82}, {83, test_83},  
3648 - {84, test_84}, {85, test_85}, {86, test_86}, {87, test_87}, {88, test_88}, {89, test_89},  
3649 - {90, test_90}, {91, test_91}, {92, test_92}, {93, test_93}, {94, test_94}, {95, test_95},  
3650 - {96, test_96}, {97, test_97}, {98, test_98}, {99, test_99}, {100, test_100}}; 3669 + {0, test_0_1}, {1, test_0_1}, {2, test_2}, {3, test_3}, {4, test_4},
  3670 + {5, test_5}, {6, test_6}, {7, test_7}, {8, test_8}, {9, test_9},
  3671 + {10, test_10}, {11, test_11}, {12, test_12}, {13, test_13}, {14, test_14},
  3672 + {15, test_15}, {16, test_16}, {17, test_17}, {18, test_18}, {19, test_19},
  3673 + {20, test_20}, {21, test_21}, {22, test_22}, {23, test_23}, {24, test_24},
  3674 + {25, test_25}, {26, test_26}, {27, test_27}, {28, test_28}, {29, test_29},
  3675 + {30, test_30}, {31, test_31}, {32, test_32}, {33, test_33}, {34, test_34},
  3676 + {35, test_35}, {36, test_36}, {37, test_37}, {38, test_38}, {39, test_39},
  3677 + {40, test_40}, {41, test_41}, {42, test_42}, {43, test_43}, {44, test_44},
  3678 + {45, test_45}, {46, test_46}, {47, test_47}, {48, test_48}, {49, test_49},
  3679 + {50, test_50}, {51, test_51}, {52, test_52}, {53, test_53}, {54, test_54},
  3680 + {55, test_55}, {56, test_56}, {57, test_57}, {58, test_58}, {59, test_59},
  3681 + {60, test_60}, {61, test_61}, {62, test_62}, {63, test_63}, {64, test_64},
  3682 + {65, test_65}, {66, test_66}, {67, test_67}, {68, test_68}, {69, test_69},
  3683 + {70, test_70}, {71, test_71}, {72, test_72}, {73, test_73}, {74, test_74},
  3684 + {75, test_75}, {76, test_76}, {77, test_77}, {78, test_78}, {79, test_79},
  3685 + {80, test_80}, {81, test_81}, {82, test_82}, {83, test_83}, {84, test_84},
  3686 + {85, test_85}, {86, test_86}, {87, test_87}, {88, test_88}, {89, test_89},
  3687 + {90, test_90}, {91, test_91}, {92, test_92}, {93, test_93}, {94, test_94},
  3688 + {95, test_95}, {96, test_96}, {97, test_97}, {98, test_98}, {99, test_99},
  3689 + {100, test_100}, {101, test_101}};
3651 3690
3652 auto fn = test_functions.find(n); 3691 auto fn = test_functions.find(n);
3653 if (fn == test_functions.end()) { 3692 if (fn == test_functions.end()) {