Commit 675777bb2721eafe4c2cf0435166a8e4221de1bc
1 parent
66efa438
Add `inspection_mode` for optional restricted PDF inspection
Introduce `qpdf::global::inspection_mode` for analyzing and repairing damaged PDFs with a restricted feature set. Update relevant code to enforce restrictions and add supporting test cases.
Showing
11 changed files
with
186 additions
and
23 deletions
include/qpdf/Constants.h
| ... | ... | @@ -277,10 +277,11 @@ enum qpdf_param_e { |
| 277 | 277 | qpdf_p_limit_errors = 0x10020, |
| 278 | 278 | |
| 279 | 279 | /* global options */ |
| 280 | + qpdf_p_inspection_mode = 0x11000, | |
| 280 | 281 | qpdf_p_default_limits = 0x11100, |
| 281 | 282 | /* global limits */ |
| 282 | 283 | |
| 283 | - /* object - parser limits */ | |
| 284 | + /* parser limits */ | |
| 284 | 285 | qpdf_p_parser_max_nesting = 0x13000, |
| 285 | 286 | qpdf_p_parser_max_errors, |
| 286 | 287 | qpdf_p_parser_max_container_size, | ... | ... |
include/qpdf/global.hh
| ... | ... | @@ -68,6 +68,30 @@ namespace qpdf::global |
| 68 | 68 | |
| 69 | 69 | namespace options |
| 70 | 70 | { |
| 71 | + /// @brief Retrieves whether inspection mode is set. | |
| 72 | + /// | |
| 73 | + /// @return True if inspection mode is set. | |
| 74 | + /// | |
| 75 | + /// @since 12.3 | |
| 76 | + bool inline inspection_mode() | |
| 77 | + { | |
| 78 | + return get_uint32(qpdf_p_inspection_mode) != 0; | |
| 79 | + } | |
| 80 | + | |
| 81 | + /// @brief Set inspection mode if `true` is passed. | |
| 82 | + /// | |
| 83 | + /// This function enables restrictive inspection mode if `true` is passed. Inspection mode | |
| 84 | + /// must be enabled before a QPDF object is created. By default inspection mode is off. | |
| 85 | + /// Calling `inspection_mode(false)` is not supported and currently is a no-op. | |
| 86 | + /// | |
| 87 | + /// @param value A boolean indicating whether to enable (true) inspection mode. | |
| 88 | + /// | |
| 89 | + /// @since 12.3 | |
| 90 | + void inline inspection_mode(bool value) | |
| 91 | + { | |
| 92 | + set_uint32(qpdf_p_inspection_mode, value ? QPDF_TRUE : QPDF_FALSE); | |
| 93 | + } | |
| 94 | + | |
| 71 | 95 | /// @brief Retrieves whether default limits are enabled. |
| 72 | 96 | /// |
| 73 | 97 | /// @return True if default limits are enabled. | ... | ... |
libqpdf/QPDF_encryption.cc
| ... | ... | @@ -758,7 +758,7 @@ QPDF::EncryptionParameters::initialize(QPDF& qpdf) |
| 758 | 758 | // at /Encrypt again. Otherwise, things could go wrong if someone mutates the encryption |
| 759 | 759 | // dictionary. |
| 760 | 760 | |
| 761 | - if (!trailer.hasKey("/Encrypt")) { | |
| 761 | + if (!trailer.contains("/Encrypt")) { | |
| 762 | 762 | return; |
| 763 | 763 | } |
| 764 | 764 | ... | ... |
libqpdf/QPDF_objects.cc
| ... | ... | @@ -257,11 +257,21 @@ Objects::parse(char const* password) |
| 257 | 257 | throw damagedPDF("", -1, std::string("error reading xref: ") + e.what()); |
| 258 | 258 | } |
| 259 | 259 | } catch (QPDFExc& e) { |
| 260 | - if (!cf.surpress_recovery()) { | |
| 261 | - reconstruct_xref(e, xref_offset > 0); | |
| 262 | - } else { | |
| 260 | + if (global::Options::inspection_mode()) { | |
| 261 | + try { | |
| 262 | + reconstruct_xref(e, xref_offset > 0); | |
| 263 | + } catch (std::exception& er) { | |
| 264 | + warn(damagedPDF("", -1, "error reconstructing xref: "s + er.what())); | |
| 265 | + } | |
| 266 | + if (!m->trailer) { | |
| 267 | + m->trailer = Dictionary::empty(); | |
| 268 | + } | |
| 269 | + return; | |
| 270 | + } | |
| 271 | + if (cf.surpress_recovery()) { | |
| 263 | 272 | throw; |
| 264 | 273 | } |
| 274 | + reconstruct_xref(e, xref_offset > 0); | |
| 265 | 275 | } |
| 266 | 276 | |
| 267 | 277 | m->encp->initialize(qpdf); | ... | ... |
libqpdf/global.cc
| ... | ... | @@ -35,6 +35,9 @@ qpdf_global_get_uint32(qpdf_param_e param, uint32_t* value) |
| 35 | 35 | { |
| 36 | 36 | qpdf_expect(value); |
| 37 | 37 | switch (param) { |
| 38 | + case qpdf_p_inspection_mode: | |
| 39 | + *value = Options::inspection_mode(); | |
| 40 | + return qpdf_r_ok; | |
| 38 | 41 | case qpdf_p_default_limits: |
| 39 | 42 | *value = Options::default_limits(); |
| 40 | 43 | return qpdf_r_ok; |
| ... | ... | @@ -62,6 +65,9 @@ qpdf_result_e |
| 62 | 65 | qpdf_global_set_uint32(qpdf_param_e param, uint32_t value) |
| 63 | 66 | { |
| 64 | 67 | switch (param) { |
| 68 | + case qpdf_p_inspection_mode: | |
| 69 | + Options::inspection_mode(value); | |
| 70 | + return qpdf_r_ok; | |
| 65 | 71 | case qpdf_p_default_limits: |
| 66 | 72 | Options::default_limits(value); |
| 67 | 73 | return qpdf_r_ok; | ... | ... |
libqpdf/qpdf/QPDF_private.hh
| ... | ... | @@ -12,6 +12,9 @@ |
| 12 | 12 | #include <qpdf/QPDFPageDocumentHelper.hh> |
| 13 | 13 | #include <qpdf/QPDFPageLabelDocumentHelper.hh> |
| 14 | 14 | #include <qpdf/QPDFTokenizer_private.hh> |
| 15 | +#include <qpdf/global_private.hh> | |
| 16 | + | |
| 17 | +#include <exception> | |
| 15 | 18 | |
| 16 | 19 | using namespace qpdf; |
| 17 | 20 | |
| ... | ... | @@ -374,6 +377,7 @@ class QPDF::Doc |
| 374 | 377 | acroform() |
| 375 | 378 | { |
| 376 | 379 | if (!acroform_) { |
| 380 | + no_inspection(); | |
| 377 | 381 | acroform_ = std::make_unique<QPDFAcroFormDocumentHelper>(qpdf); |
| 378 | 382 | } |
| 379 | 383 | return *acroform_; |
| ... | ... | @@ -383,6 +387,7 @@ class QPDF::Doc |
| 383 | 387 | embedded_files() |
| 384 | 388 | { |
| 385 | 389 | if (!embedded_files_) { |
| 390 | + no_inspection(); | |
| 386 | 391 | embedded_files_ = std::make_unique<QPDFEmbeddedFileDocumentHelper>(qpdf); |
| 387 | 392 | } |
| 388 | 393 | return *embedded_files_; |
| ... | ... | @@ -392,6 +397,7 @@ class QPDF::Doc |
| 392 | 397 | outlines() |
| 393 | 398 | { |
| 394 | 399 | if (!outlines_) { |
| 400 | + no_inspection(); | |
| 395 | 401 | outlines_ = std::make_unique<QPDFOutlineDocumentHelper>(qpdf); |
| 396 | 402 | } |
| 397 | 403 | return *outlines_; |
| ... | ... | @@ -401,6 +407,7 @@ class QPDF::Doc |
| 401 | 407 | page_dh() |
| 402 | 408 | { |
| 403 | 409 | if (!page_dh_) { |
| 410 | + no_inspection(); | |
| 404 | 411 | page_dh_ = std::make_unique<QPDFPageDocumentHelper>(qpdf); |
| 405 | 412 | } |
| 406 | 413 | return *page_dh_; |
| ... | ... | @@ -410,12 +417,21 @@ class QPDF::Doc |
| 410 | 417 | page_labels() |
| 411 | 418 | { |
| 412 | 419 | if (!page_labels_) { |
| 420 | + no_inspection(); | |
| 413 | 421 | page_labels_ = std::make_unique<QPDFPageLabelDocumentHelper>(qpdf); |
| 414 | 422 | } |
| 415 | 423 | return *page_labels_; |
| 416 | 424 | } |
| 417 | 425 | |
| 418 | 426 | protected: |
| 427 | + void | |
| 428 | + no_inspection() | |
| 429 | + { | |
| 430 | + if (global::Options::inspection_mode()) { | |
| 431 | + throw std::logic_error("Attempted unsupported operation in inspection mode"); | |
| 432 | + } | |
| 433 | + } | |
| 434 | + | |
| 419 | 435 | QPDF& qpdf; |
| 420 | 436 | QPDF::Members* m; |
| 421 | 437 | ... | ... |
libqpdf/qpdf/global_private.hh
| ... | ... | @@ -85,6 +85,20 @@ namespace qpdf::global |
| 85 | 85 | { |
| 86 | 86 | public: |
| 87 | 87 | static bool |
| 88 | + inspection_mode() | |
| 89 | + { | |
| 90 | + return static_cast<bool>(o.inspection_mode_); | |
| 91 | + } | |
| 92 | + | |
| 93 | + static void | |
| 94 | + inspection_mode(bool value) | |
| 95 | + { | |
| 96 | + if (value) { | |
| 97 | + o.inspection_mode_ = true; | |
| 98 | + } | |
| 99 | + } | |
| 100 | + | |
| 101 | + static bool | |
| 88 | 102 | default_limits() |
| 89 | 103 | { |
| 90 | 104 | return static_cast<bool>(o.default_limits_); |
| ... | ... | @@ -102,6 +116,7 @@ namespace qpdf::global |
| 102 | 116 | private: |
| 103 | 117 | static Options o; |
| 104 | 118 | |
| 119 | + bool inspection_mode_{false}; | |
| 105 | 120 | bool default_limits_{true}; |
| 106 | 121 | }; |
| 107 | 122 | } // namespace qpdf::global | ... | ... |
qpdf/qtest/inspection-mode.test
0 โ 100644
| 1 | +#!/usr/bin/env perl | |
| 2 | +require 5.008; | |
| 3 | +use warnings; | |
| 4 | +use strict; | |
| 5 | + | |
| 6 | +unshift(@INC, '.'); | |
| 7 | +require qpdf_test_helpers; | |
| 8 | + | |
| 9 | +chdir("qpdf") or die "chdir testdir failed: $!\n"; | |
| 10 | + | |
| 11 | +require TestDriver; | |
| 12 | + | |
| 13 | +cleanup(); | |
| 14 | + | |
| 15 | +my $td = new TestDriver('inspection-mode'); | |
| 16 | + | |
| 17 | +my $n_tests = 1; | |
| 18 | + | |
| 19 | +$td->runtest("inspection mode", | |
| 20 | + {$td->COMMAND => "test_driver 101 - -"}, | |
| 21 | + {$td->FILE => "inspection-mode.out", $td->EXIT_STATUS => 0}, | |
| 22 | + $td->NORMALIZE_NEWLINES); | |
| 23 | + | |
| 24 | +cleanup(); | |
| 25 | +$td->report($n_tests); | ... | ... |
qpdf/qtest/qpdf/inspect.pdf
0 โ 100644
No preview for this file type
qpdf/qtest/qpdf/inspection-mode.out
0 โ 100644
| 1 | +WARNING: inspect.pdf: can't find PDF header | |
| 2 | +WARNING: inspect.pdf: file is damaged | |
| 3 | +WARNING: inspect.pdf: can't find startxref | |
| 4 | +WARNING: inspect.pdf: Attempting to reconstruct cross-reference table | |
| 5 | +WARNING: inspect.pdf (trailer, offset 38): unknown token while reading object; treating as null | |
| 6 | +WARNING: inspect.pdf (trailer, offset 60): unknown token while reading object; treating as null | |
| 7 | +WARNING: inspect.pdf (trailer, offset 82): treating bad indirect reference (0 0 R) as null | |
| 8 | +WARNING: inspect.pdf (trailer, offset 90): unknown token while reading object; treating as null | |
| 9 | +WARNING: inspect.pdf (trailer, offset 100): unexpected > | |
| 10 | +WARNING: inspect.pdf (trailer, offset 71): expected dictionary keys but found non-name objects; ignoring | |
| 11 | +WARNING: inspect.pdf (trailer, offset 202): unexpected 'endobj' or 'endstream' while reading object; giving up on reading object | |
| 12 | +WARNING: inspect.pdf: error reconstructing xref: inspect.pdf: unable to find trailer dictionary while recovering damaged file | |
| 13 | +5 0 | |
| 14 | +null | |
| 15 | +20 0 | |
| 16 | +<< /Fields [ 21 0 R ] >> | |
| 17 | +21 0 | |
| 18 | +<< /Kids [ 22 0 R 23 0 R 24 0 R 25 0 R ] /Rect [ 100 100 500 500 ] /Subtype /Widget /T (MyFie\224d) /Type /Annot >> | |
| 19 | +22 0 | |
| 20 | +null | |
| 21 | +23 0 | |
| 22 | +<< /FT /Tx /Rect [ 401 401 421 421 ] /Subtype /Widget /T (Sub_RightTop) /Type /Annot >> | |
| 23 | +24 0 | |
| 24 | +<< /FT /Tx /Rect [ 201 400 221 420 ] /Subtype /Widget /T (Sub_LeftTop) /Type /Annot >> | |
| 25 | +25 0 | |
| 26 | +<< /FT /Tx /Rect [ 400 201 420 221 ] /Subtype /Widget /T (Sub_RightBottom) /Type /Annot >> | |
| 27 | +test 101 done | ... | ... |
qpdf/test_driver.cc
| ... | ... | @@ -3550,6 +3550,41 @@ test_100(QPDF& pdf, char const* arg2) |
| 3550 | 3550 | } |
| 3551 | 3551 | } |
| 3552 | 3552 | |
| 3553 | +static void | |
| 3554 | +test_101(QPDF& pdf, char const* arg2) | |
| 3555 | +{ | |
| 3556 | + // Test inspection mode | |
| 3557 | + QPDF qpdf; | |
| 3558 | + assert(!qpdf::global::options::inspection_mode()); | |
| 3559 | + qpdf::global::options::inspection_mode(true); | |
| 3560 | + assert(qpdf::global::options::inspection_mode()); | |
| 3561 | + qpdf::global::options::inspection_mode(false); | |
| 3562 | + // Setting inspection mode is irreversible | |
| 3563 | + assert(qpdf::global::options::inspection_mode()); | |
| 3564 | + qpdf.processFile("inspect.pdf"); | |
| 3565 | + for (auto& oh: qpdf.getAllObjects()) { | |
| 3566 | + std::cout << oh.getObjGen().unparse(' ') << '\n'; | |
| 3567 | + std::cout << oh.unparseResolved() << '\n'; | |
| 3568 | + } | |
| 3569 | + | |
| 3570 | + | |
| 3571 | + auto test_helper_throws = [&qpdf](auto helper_func) { | |
| 3572 | + bool thrown = false; | |
| 3573 | + try { | |
| 3574 | + helper_func(qpdf); | |
| 3575 | + } catch (std::logic_error&) { | |
| 3576 | + thrown = true; | |
| 3577 | + } | |
| 3578 | + assert(thrown); | |
| 3579 | + }; | |
| 3580 | + | |
| 3581 | + test_helper_throws([](QPDF& q) { (void)QPDFAcroFormDocumentHelper::get(q); }); | |
| 3582 | + test_helper_throws([](QPDF& q) { (void)QPDFEmbeddedFileDocumentHelper::get(q); }); | |
| 3583 | + test_helper_throws([](QPDF& q) { (void)QPDFOutlineDocumentHelper::get(q); }); | |
| 3584 | + test_helper_throws([](QPDF& q) { (void)QPDFPageDocumentHelper::get(q); }); | |
| 3585 | + test_helper_throws([](QPDF& q) { (void)QPDFPageLabelDocumentHelper::get(q); }); | |
| 3586 | +} | |
| 3587 | + | |
| 3553 | 3588 | void |
| 3554 | 3589 | runtest(int n, char const* filename1, char const* arg2) |
| 3555 | 3590 | { |
| ... | ... | @@ -3557,7 +3592,7 @@ runtest(int n, char const* filename1, char const* arg2) |
| 3557 | 3592 | // the test suite to see how the test is invoked to find the file |
| 3558 | 3593 | // that the test is supposed to operate on. |
| 3559 | 3594 | |
| 3560 | - std::set<int> ignore_filename = {61, 62, 81, 83, 84, 85, 86, 87, 92, 95, 96}; | |
| 3595 | + std::set<int> ignore_filename = {61, 62, 81, 83, 84, 85, 86, 87, 92, 95, 96, 101}; | |
| 3561 | 3596 | |
| 3562 | 3597 | if (n == 0) { |
| 3563 | 3598 | // Throw in some random test cases that don't fit anywhere |
| ... | ... | @@ -3631,23 +3666,27 @@ runtest(int n, char const* filename1, char const* arg2) |
| 3631 | 3666 | } |
| 3632 | 3667 | |
| 3633 | 3668 | std::map<int, void (*)(QPDF&, char const*)> test_functions = { |
| 3634 | - {0, test_0_1}, {1, test_0_1}, {2, test_2}, {3, test_3}, {4, test_4}, {5, test_5}, | |
| 3635 | - {6, test_6}, {7, test_7}, {8, test_8}, {9, test_9}, {10, test_10}, {11, test_11}, | |
| 3636 | - {12, test_12}, {13, test_13}, {14, test_14}, {15, test_15}, {16, test_16}, {17, test_17}, | |
| 3637 | - {18, test_18}, {19, test_19}, {20, test_20}, {21, test_21}, {22, test_22}, {23, test_23}, | |
| 3638 | - {24, test_24}, {25, test_25}, {26, test_26}, {27, test_27}, {28, test_28}, {29, test_29}, | |
| 3639 | - {30, test_30}, {31, test_31}, {32, test_32}, {33, test_33}, {34, test_34}, {35, test_35}, | |
| 3640 | - {36, test_36}, {37, test_37}, {38, test_38}, {39, test_39}, {40, test_40}, {41, test_41}, | |
| 3641 | - {42, test_42}, {43, test_43}, {44, test_44}, {45, test_45}, {46, test_46}, {47, test_47}, | |
| 3642 | - {48, test_48}, {49, test_49}, {50, test_50}, {51, test_51}, {52, test_52}, {53, test_53}, | |
| 3643 | - {54, test_54}, {55, test_55}, {56, test_56}, {57, test_57}, {58, test_58}, {59, test_59}, | |
| 3644 | - {60, test_60}, {61, test_61}, {62, test_62}, {63, test_63}, {64, test_64}, {65, test_65}, | |
| 3645 | - {66, test_66}, {67, test_67}, {68, test_68}, {69, test_69}, {70, test_70}, {71, test_71}, | |
| 3646 | - {72, test_72}, {73, test_73}, {74, test_74}, {75, test_75}, {76, test_76}, {77, test_77}, | |
| 3647 | - {78, test_78}, {79, test_79}, {80, test_80}, {81, test_81}, {82, test_82}, {83, test_83}, | |
| 3648 | - {84, test_84}, {85, test_85}, {86, test_86}, {87, test_87}, {88, test_88}, {89, test_89}, | |
| 3649 | - {90, test_90}, {91, test_91}, {92, test_92}, {93, test_93}, {94, test_94}, {95, test_95}, | |
| 3650 | - {96, test_96}, {97, test_97}, {98, test_98}, {99, test_99}, {100, test_100}}; | |
| 3669 | + {0, test_0_1}, {1, test_0_1}, {2, test_2}, {3, test_3}, {4, test_4}, | |
| 3670 | + {5, test_5}, {6, test_6}, {7, test_7}, {8, test_8}, {9, test_9}, | |
| 3671 | + {10, test_10}, {11, test_11}, {12, test_12}, {13, test_13}, {14, test_14}, | |
| 3672 | + {15, test_15}, {16, test_16}, {17, test_17}, {18, test_18}, {19, test_19}, | |
| 3673 | + {20, test_20}, {21, test_21}, {22, test_22}, {23, test_23}, {24, test_24}, | |
| 3674 | + {25, test_25}, {26, test_26}, {27, test_27}, {28, test_28}, {29, test_29}, | |
| 3675 | + {30, test_30}, {31, test_31}, {32, test_32}, {33, test_33}, {34, test_34}, | |
| 3676 | + {35, test_35}, {36, test_36}, {37, test_37}, {38, test_38}, {39, test_39}, | |
| 3677 | + {40, test_40}, {41, test_41}, {42, test_42}, {43, test_43}, {44, test_44}, | |
| 3678 | + {45, test_45}, {46, test_46}, {47, test_47}, {48, test_48}, {49, test_49}, | |
| 3679 | + {50, test_50}, {51, test_51}, {52, test_52}, {53, test_53}, {54, test_54}, | |
| 3680 | + {55, test_55}, {56, test_56}, {57, test_57}, {58, test_58}, {59, test_59}, | |
| 3681 | + {60, test_60}, {61, test_61}, {62, test_62}, {63, test_63}, {64, test_64}, | |
| 3682 | + {65, test_65}, {66, test_66}, {67, test_67}, {68, test_68}, {69, test_69}, | |
| 3683 | + {70, test_70}, {71, test_71}, {72, test_72}, {73, test_73}, {74, test_74}, | |
| 3684 | + {75, test_75}, {76, test_76}, {77, test_77}, {78, test_78}, {79, test_79}, | |
| 3685 | + {80, test_80}, {81, test_81}, {82, test_82}, {83, test_83}, {84, test_84}, | |
| 3686 | + {85, test_85}, {86, test_86}, {87, test_87}, {88, test_88}, {89, test_89}, | |
| 3687 | + {90, test_90}, {91, test_91}, {92, test_92}, {93, test_93}, {94, test_94}, | |
| 3688 | + {95, test_95}, {96, test_96}, {97, test_97}, {98, test_98}, {99, test_99}, | |
| 3689 | + {100, test_100}, {101, test_101}}; | |
| 3651 | 3690 | |
| 3652 | 3691 | auto fn = test_functions.find(n); |
| 3653 | 3692 | if (fn == test_functions.end()) { | ... | ... |