Commit 3340dbe9761ef35d580d77a73e17d204579624f1
1 parent
b2b2a175
Use a specific error code for type warnings and clarify docs
Showing
8 changed files
with
196 additions
and
19 deletions
README-maintainer
| @@ -132,7 +132,9 @@ RELEASE PREPARATION | @@ -132,7 +132,9 @@ RELEASE PREPARATION | ||
| 132 | 132 | ||
| 133 | * Check all open issues and pull requests in github and the | 133 | * Check all open issues and pull requests in github and the |
| 134 | sourceforge trackers. See ~/scripts/github-issues. Don't forget pull | 134 | sourceforge trackers. See ~/scripts/github-issues. Don't forget pull |
| 135 | - requests. | 135 | + requests. Note: If the location for reporting issues changes, do a |
| 136 | + careful check of documentation and code to make sure any comments | ||
| 137 | + that include the issue creation URL are updated. | ||
| 136 | 138 | ||
| 137 | * Check `TODO` file to make sure all planned items for the release are | 139 | * Check `TODO` file to make sure all planned items for the release are |
| 138 | done or retargeted. | 140 | done or retargeted. |
cSpell.json
| @@ -143,6 +143,7 @@ | @@ -143,6 +143,7 @@ | ||
| 143 | "hcryptprov", | 143 | "hcryptprov", |
| 144 | "hdict", | 144 | "hdict", |
| 145 | "hoffmann", | 145 | "hoffmann", |
| 146 | + "holger", | ||
| 146 | "hosoda", | 147 | "hosoda", |
| 147 | "htcondor", | 148 | "htcondor", |
| 148 | "htdocs", | 149 | "htdocs", |
| @@ -205,6 +206,7 @@ | @@ -205,6 +206,7 @@ | ||
| 205 | "linp", | 206 | "linp", |
| 206 | "listitem", | 207 | "listitem", |
| 207 | "ljpeg", | 208 | "ljpeg", |
| 209 | + "longjmp", | ||
| 208 | "lpstr", | 210 | "lpstr", |
| 209 | "lqpdf", | 211 | "lqpdf", |
| 210 | "lssl", | 212 | "lssl", |
| @@ -367,6 +369,7 @@ | @@ -367,6 +369,7 @@ | ||
| 367 | "scarff", | 369 | "scarff", |
| 368 | "seekable", | 370 | "seekable", |
| 369 | "segfaulting", | 371 | "segfaulting", |
| 372 | + "setjmp", | ||
| 370 | "sharedresources", | 373 | "sharedresources", |
| 371 | "smatch", | 374 | "smatch", |
| 372 | "softlink", | 375 | "softlink", |
include/qpdf/Constants.h
| @@ -38,6 +38,7 @@ enum qpdf_error_code_e | @@ -38,6 +38,7 @@ enum qpdf_error_code_e | ||
| 38 | qpdf_e_password, /* incorrect password for encrypted file */ | 38 | qpdf_e_password, /* incorrect password for encrypted file */ |
| 39 | qpdf_e_damaged_pdf, /* syntax errors or other damage in PDF */ | 39 | qpdf_e_damaged_pdf, /* syntax errors or other damage in PDF */ |
| 40 | qpdf_e_pages, /* erroneous or unsupported pages structure */ | 40 | qpdf_e_pages, /* erroneous or unsupported pages structure */ |
| 41 | + qpdf_e_object, /* type/bounds errors accessing objects */ | ||
| 41 | }; | 42 | }; |
| 42 | 43 | ||
| 43 | /* Write Parameters. See QPDFWriter.hh for details. */ | 44 | /* Write Parameters. See QPDFWriter.hh for details. */ |
include/qpdf/QPDFObjectHandle.hh
| @@ -609,15 +609,65 @@ class QPDFObjectHandle | @@ -609,15 +609,65 @@ class QPDFObjectHandle | ||
| 609 | QPDF_DLL | 609 | QPDF_DLL |
| 610 | bool hasObjectDescription(); | 610 | bool hasObjectDescription(); |
| 611 | 611 | ||
| 612 | - // Accessor methods. If an accessor method that is valid for only | ||
| 613 | - // a particular object type is called on an object of the wrong | ||
| 614 | - // type, an exception is thrown. | 612 | + // Accessor methods |
| 613 | + // | ||
| 614 | + // (Note: this comment is referenced in qpdf-c.h and the manual.) | ||
| 615 | + // | ||
| 616 | + // In PDF files, objects have specific types, but there is nothing | ||
| 617 | + // that prevents PDF files from containing objects of types that | ||
| 618 | + // aren't expected by the specification. Many of the accessors | ||
| 619 | + // here expect objects of a particular type. Prior to qpdf 8, | ||
| 620 | + // calling an accessor on a method of the wrong type, such as | ||
| 621 | + // trying to get a dictionary key from an array, trying to get the | ||
| 622 | + // string value of a number, etc., would throw an exception, but | ||
| 623 | + // since qpdf 8, qpdf issues a warning and recovers using the | ||
| 624 | + // following behavior: | ||
| 625 | + // | ||
| 626 | + // * Requesting a value of the wrong type (int value from string, | ||
| 627 | + // array item from a scalar or dictionary, etc.) will return a | ||
| 628 | + // zero-like value for that type: false for boolean, 0 for | ||
| 629 | + // number, the empty string for string, or the null object for | ||
| 630 | + // an object handle. | ||
| 631 | + // | ||
| 632 | + // * Accessing an array item that is out of bounds will return a | ||
| 633 | + // null object. | ||
| 634 | + // | ||
| 635 | + // * Attempts to mutate an object of the wrong type (e.g., | ||
| 636 | + // attempting to add a dictionary key to a scalar or array) will | ||
| 637 | + // be ignored. | ||
| 638 | + // | ||
| 639 | + // When any of these fallback behaviors are used, qpdf issues a | ||
| 640 | + // warning. Starting in qpdf 10.5, these warnings have the error | ||
| 641 | + // code qpdf_e_object. Prior to 10.5, they had the error code | ||
| 642 | + // qpdf_e_damaged_pdf. If the QPDFObjectHandle is associated with | ||
| 643 | + // a QPDF object (as is the case for all objects whose origin was | ||
| 644 | + // a PDF file), the warning is issued using the normal warning | ||
| 645 | + // mechanism (as described in QPDF.hh), making it possible to | ||
| 646 | + // suppress or otherwise detect them. If the QPDFObjectHandle is | ||
| 647 | + // not associated with a QPDF object (meaning it was created | ||
| 648 | + // programmatically), an exception will be thrown. | ||
| 649 | + // | ||
| 650 | + // The way to avoid getting any type warnings or exceptions, even | ||
| 651 | + // when working with malformed PDF files, is to always check the | ||
| 652 | + // type of a QPDFObjectHandle before accessing it (for example, | ||
| 653 | + // make sure that isString() returns true before calling | ||
| 654 | + // getStringValue()) and to always be sure that any array indices | ||
| 655 | + // are in bounds. | ||
| 656 | + // | ||
| 657 | + // For additional discussion and rationale for this behavior, see | ||
| 658 | + // the section in the QPDF manual entitled "Object Accessor | ||
| 659 | + // Methods". | ||
| 615 | 660 | ||
| 616 | // Methods for bool objects | 661 | // Methods for bool objects |
| 617 | QPDF_DLL | 662 | QPDF_DLL |
| 618 | bool getBoolValue(); | 663 | bool getBoolValue(); |
| 619 | 664 | ||
| 620 | - // Methods for integer objects | 665 | + // Methods for integer objects. Note: if an integer value is too |
| 666 | + // big (too far away from zero in either direction) to fit in the | ||
| 667 | + // requested return type, the maximum or minimum value for that | ||
| 668 | + // return type may be returned. For example, on a system with | ||
| 669 | + // 32-bit int, a numeric object with a value of 2^40 (or anything | ||
| 670 | + // too big for 32 bits) will be returned as INT_MAX. | ||
| 621 | QPDF_DLL | 671 | QPDF_DLL |
| 622 | long long getIntValue(); | 672 | long long getIntValue(); |
| 623 | QPDF_DLL | 673 | QPDF_DLL |
libqpdf/QPDFObjectHandle.cc
| @@ -3048,23 +3048,17 @@ void | @@ -3048,23 +3048,17 @@ void | ||
| 3048 | QPDFObjectHandle::typeWarning(char const* expected_type, | 3048 | QPDFObjectHandle::typeWarning(char const* expected_type, |
| 3049 | std::string const& warning) | 3049 | std::string const& warning) |
| 3050 | { | 3050 | { |
| 3051 | - QPDF* context = 0; | 3051 | + QPDF* context = nullptr; |
| 3052 | std::string description; | 3052 | std::string description; |
| 3053 | dereference(); | 3053 | dereference(); |
| 3054 | - if (this->obj->getDescription(context, description)) | ||
| 3055 | - { | ||
| 3056 | - warn(context, | ||
| 3057 | - QPDFExc( | ||
| 3058 | - qpdf_e_damaged_pdf, | 3054 | + this->obj->getDescription(context, description); |
| 3055 | + // Null context handled by warn | ||
| 3056 | + warn(context, | ||
| 3057 | + QPDFExc(qpdf_e_object, | ||
| 3059 | "", description, 0, | 3058 | "", description, 0, |
| 3060 | std::string("operation for ") + expected_type + | 3059 | std::string("operation for ") + expected_type + |
| 3061 | " attempted on object of type " + | 3060 | " attempted on object of type " + |
| 3062 | getTypeName() + ": " + warning)); | 3061 | getTypeName() + ": " + warning)); |
| 3063 | - } | ||
| 3064 | - else | ||
| 3065 | - { | ||
| 3066 | - assertType(expected_type, false); | ||
| 3067 | - } | ||
| 3068 | } | 3062 | } |
| 3069 | 3063 | ||
| 3070 | void | 3064 | void |
| @@ -3091,7 +3085,12 @@ QPDFObjectHandle::warnIfPossible(std::string const& warning, | @@ -3091,7 +3085,12 @@ QPDFObjectHandle::warnIfPossible(std::string const& warning, | ||
| 3091 | void | 3085 | void |
| 3092 | QPDFObjectHandle::objectWarning(std::string const& warning) | 3086 | QPDFObjectHandle::objectWarning(std::string const& warning) |
| 3093 | { | 3087 | { |
| 3094 | - warnIfPossible(warning, true); | 3088 | + QPDF* context = nullptr; |
| 3089 | + std::string description; | ||
| 3090 | + dereference(); | ||
| 3091 | + this->obj->getDescription(context, description); | ||
| 3092 | + // Null context handled by warn | ||
| 3093 | + warn(context, QPDFExc(qpdf_e_object, "", description, 0, warning)); | ||
| 3095 | } | 3094 | } |
| 3096 | 3095 | ||
| 3097 | void | 3096 | void |
manual/qpdf-manual.xml
| @@ -4560,6 +4560,96 @@ outfile.pdf</option> | @@ -4560,6 +4560,96 @@ outfile.pdf</option> | ||
| 4560 | filtered stream contents to a given pipeline. | 4560 | filtered stream contents to a given pipeline. |
| 4561 | </para> | 4561 | </para> |
| 4562 | </sect1> | 4562 | </sect1> |
| 4563 | + <sect1 id="ref.object-accessors"> | ||
| 4564 | + <!-- This section is referenced in QPDFObjectHandle.hh --> | ||
| 4565 | + <title>Object Accessor Methods</title> | ||
| 4566 | + <para> | ||
| 4567 | + For general information about how to access instances of | ||
| 4568 | + <classname>QPDFObjectHandle</classname>, please see the comments | ||
| 4569 | + in <filename>QPDFObjectHandle.hh</filename>. Search for | ||
| 4570 | + “Accessor methods”. This section provides a more | ||
| 4571 | + in-depth discussion of the behavior and the rationale for the | ||
| 4572 | + behavior. | ||
| 4573 | + </para> | ||
| 4574 | + <para> | ||
| 4575 | + <emphasis>Why were type errors made into warnings?</emphasis> When | ||
| 4576 | + type checks were introduced into qpdf in the early days, it was | ||
| 4577 | + expected that type errors would only occur as a result of | ||
| 4578 | + programmer error. However, in practice, type errors would occur | ||
| 4579 | + with malformed PDF files because of assumptions made in code, | ||
| 4580 | + including code within the qpdf library and code written by library | ||
| 4581 | + users. The most common case would be chaining calls to | ||
| 4582 | + <function>getKey()</function> to access keys deep within a | ||
| 4583 | + dictionary. In many cases, qpdf would be able to recover from | ||
| 4584 | + these situations, but the old behavior often resulted in crashes | ||
| 4585 | + rather than graceful recovery. For this reason, the errors were | ||
| 4586 | + changed to warnings. | ||
| 4587 | + </para> | ||
| 4588 | + <para> | ||
| 4589 | + <emphasis>Why even warn about type errors when the user can't | ||
| 4590 | + usually do anything about them?</emphasis> Type warnings are | ||
| 4591 | + extremely valuable during development. Since it's impossible to | ||
| 4592 | + catch at compile time things like typos in dictionary key names or | ||
| 4593 | + logic errors around what the structure of a PDF file might be, the | ||
| 4594 | + presence of type warnings can save lots of developer time. They | ||
| 4595 | + have also proven useful in exposing issues in qpdf itself that | ||
| 4596 | + would have otherwise gone undetected. | ||
| 4597 | + </para> | ||
| 4598 | + <para> | ||
| 4599 | + <emphasis>Can there be a type-safe | ||
| 4600 | + <classname>QPDFObjectHandle</classname>?</emphasis> It would be | ||
| 4601 | + great if <classname>QPDFObjectHandle</classname> could be more | ||
| 4602 | + strongly typed so that you'd have to have check that something was | ||
| 4603 | + of a particular type before calling type-specific accessor | ||
| 4604 | + methods. However, implementing this at this stage of the library's | ||
| 4605 | + history would be quite difficult, and it would make a the common | ||
| 4606 | + pattern of drilling into an object no longer work. While it would | ||
| 4607 | + be possible to have a parallel interface, it would create a lot of | ||
| 4608 | + extra code. If qpdf were written in a language like rust, an | ||
| 4609 | + interface like this would make a lot of sense, but, for a variety | ||
| 4610 | + of reasons, the qpdf API is consistent with other APIs of its | ||
| 4611 | + time, relying on exception handling to catch errors. The | ||
| 4612 | + underlying PDF objects are inherently not type-safe. Forcing | ||
| 4613 | + stronger type safety in <classname>QPDFObjectHandle</classname> | ||
| 4614 | + would ultimately cause a lot more code to have to be written and | ||
| 4615 | + would like make software that uses qpdf more brittle, and even so, | ||
| 4616 | + checks would have to occur at runtime. | ||
| 4617 | + </para> | ||
| 4618 | + <para> | ||
| 4619 | + <emphasis>Why do type errors sometimes raise | ||
| 4620 | + exceptions?</emphasis> The way warnings work in qpdf requires a | ||
| 4621 | + <classname>QPDF</classname> object to be associated with an object | ||
| 4622 | + handle for a warning to be issued. It would be nice if this could | ||
| 4623 | + be fixed, but it would require major changes to the API. Rather | ||
| 4624 | + than throwing away these conditions, we convert them to | ||
| 4625 | + exceptions. It's not that bad though. Since any object handle that | ||
| 4626 | + was read from a file has an associated <classname>QPDF</classname> | ||
| 4627 | + object, it would only be type errors on objects that were created | ||
| 4628 | + explicitly that would cause exceptions, and in that case, type | ||
| 4629 | + errors are much more likely to be the result of a coding error | ||
| 4630 | + than invalid input. | ||
| 4631 | + </para> | ||
| 4632 | + <para> | ||
| 4633 | + <emphasis>Why does the behavior of a type exception differ between | ||
| 4634 | + the C and C++ API?</emphasis> There is no way to throw and catch | ||
| 4635 | + exceptions in C short of something like | ||
| 4636 | + <function>setjmp</function> and <function>longjmp</function>, and | ||
| 4637 | + that approach is not portable across language barriers. Since the | ||
| 4638 | + C API is often used from other languages, it's important to keep | ||
| 4639 | + things as simple as possible. Starting in qpdf 10.5, exceptions | ||
| 4640 | + that used to crash code using the C API will be written to stderr | ||
| 4641 | + by default, and it is possible to register an error handler. | ||
| 4642 | + There's no reason that the error handler can't simulate exception | ||
| 4643 | + handling in some way, such as by using <function>setjmp</function> | ||
| 4644 | + and <function>longjmp</function> or by setting some variable that | ||
| 4645 | + can be checked after library calls are made. In retrospect, it | ||
| 4646 | + might have been better if the C API object handle methods returned | ||
| 4647 | + error codes like the other methods and set return values in | ||
| 4648 | + passed-in pointers, but this would complicate both the | ||
| 4649 | + implementation and the use of the library for a case that is | ||
| 4650 | + actually quite rare and largely avoidable. | ||
| 4651 | + </para> | ||
| 4652 | + </sect1> | ||
| 4563 | </chapter> | 4653 | </chapter> |
| 4564 | <chapter id="ref.linearization"> | 4654 | <chapter id="ref.linearization"> |
| 4565 | <title>Linearization</title> | 4655 | <title>Linearization</title> |
| @@ -5127,6 +5217,20 @@ print "\n"; | @@ -5127,6 +5217,20 @@ print "\n"; | ||
| 5127 | <itemizedlist> | 5217 | <itemizedlist> |
| 5128 | <listitem> | 5218 | <listitem> |
| 5129 | <para> | 5219 | <para> |
| 5220 | + Since qpdf version 8, using object accessor methods on an | ||
| 5221 | + instance of <classname>QPDFObjectHandle</classname> may | ||
| 5222 | + create warnings if the object is not of the expected type. | ||
| 5223 | + These warnings now have an error code of | ||
| 5224 | + <literal>qpdf_e_object</literal> instead of | ||
| 5225 | + <literal>qpdf_e_damaged_pdf</literal>. Also, comments have | ||
| 5226 | + been added to <filename>QPDFObjectHandle.hh</filename> to | ||
| 5227 | + explain in more detail what the behavior is. See <xref | ||
| 5228 | + linkend="ref.object-accessors"/> for a more in-depth | ||
| 5229 | + discussion. | ||
| 5230 | + </para> | ||
| 5231 | + </listitem> | ||
| 5232 | + <listitem> | ||
| 5233 | + <para> | ||
| 5130 | Add <function>qpdf_get_last_string_length</function> to the | 5234 | Add <function>qpdf_get_last_string_length</function> to the |
| 5131 | C API to get the length of the last string that was | 5235 | C API to get the length of the last string that was |
| 5132 | returned. This is needed to handle strings that contain | 5236 | returned. This is needed to handle strings that contain |
qpdf/qtest/qpdf.test
| @@ -273,12 +273,16 @@ $td->runtest("check final version", | @@ -273,12 +273,16 @@ $td->runtest("check final version", | ||
| 273 | show_ntests(); | 273 | show_ntests(); |
| 274 | # ---------- | 274 | # ---------- |
| 275 | $td->notify("--- Exceptions ---"); | 275 | $td->notify("--- Exceptions ---"); |
| 276 | -$n_tests += 1; | 276 | +$n_tests += 2; |
| 277 | 277 | ||
| 278 | $td->runtest("check exception handling", | 278 | $td->runtest("check exception handling", |
| 279 | {$td->COMMAND => "test_driver 61 -"}, | 279 | {$td->COMMAND => "test_driver 61 -"}, |
| 280 | {$td->FILE => "exceptions.out", $td->EXIT_STATUS => 0}, | 280 | {$td->FILE => "exceptions.out", $td->EXIT_STATUS => 0}, |
| 281 | $td->NORMALIZE_NEWLINES); | 281 | $td->NORMALIZE_NEWLINES); |
| 282 | +$td->runtest("check certain exception types", | ||
| 283 | + {$td->COMMAND => "test_driver 81 -"}, | ||
| 284 | + {$td->STRING => "test 81 done\n", $td->EXIT_STATUS => 0}, | ||
| 285 | + $td->NORMALIZE_NEWLINES); | ||
| 282 | 286 | ||
| 283 | show_ntests(); | 287 | show_ntests(); |
| 284 | # ---------- | 288 | # ---------- |
| @@ -5303,6 +5307,7 @@ for (my $large = 0; $large < $nlarge; ++$large) | @@ -5303,6 +5307,7 @@ for (my $large = 0; $large < $nlarge; ++$large) | ||
| 5303 | $td->NORMALIZE_NEWLINES); | 5307 | $td->NORMALIZE_NEWLINES); |
| 5304 | unlink $file; | 5308 | unlink $file; |
| 5305 | } | 5309 | } |
| 5310 | +show_ntests(); | ||
| 5306 | # ---------- | 5311 | # ---------- |
| 5307 | 5312 | ||
| 5308 | cleanup(); | 5313 | cleanup(); |
qpdf/test_driver.cc
| @@ -259,7 +259,7 @@ void runtest(int n, char const* filename1, char const* arg2) | @@ -259,7 +259,7 @@ void runtest(int n, char const* filename1, char const* arg2) | ||
| 259 | pdf.processMemoryFile((std::string(filename1) + ".pdf").c_str(), | 259 | pdf.processMemoryFile((std::string(filename1) + ".pdf").c_str(), |
| 260 | p, size); | 260 | p, size); |
| 261 | } | 261 | } |
| 262 | - else if (n == 61) | 262 | + else if ((n == 61) || (n == 81)) |
| 263 | { | 263 | { |
| 264 | // Ignore filename argument entirely | 264 | // Ignore filename argument entirely |
| 265 | } | 265 | } |
| @@ -3049,6 +3049,19 @@ void runtest(int n, char const* filename1, char const* arg2) | @@ -3049,6 +3049,19 @@ void runtest(int n, char const* filename1, char const* arg2) | ||
| 3049 | w2.setQDFMode(true); | 3049 | w2.setQDFMode(true); |
| 3050 | w2.write(); | 3050 | w2.write(); |
| 3051 | } | 3051 | } |
| 3052 | + else if (n == 81) | ||
| 3053 | + { | ||
| 3054 | + // Exercise that type errors get their own special type | ||
| 3055 | + try | ||
| 3056 | + { | ||
| 3057 | + QPDFObjectHandle::newNull().getIntValue(); | ||
| 3058 | + assert(false); | ||
| 3059 | + } | ||
| 3060 | + catch (QPDFExc& e) | ||
| 3061 | + { | ||
| 3062 | + assert(e.getErrorCode() == qpdf_e_object); | ||
| 3063 | + } | ||
| 3064 | + } | ||
| 3052 | else | 3065 | else |
| 3053 | { | 3066 | { |
| 3054 | throw std::runtime_error(std::string("invalid test ") + | 3067 | throw std::runtime_error(std::string("invalid test ") + |