Commit 3340dbe9761ef35d580d77a73e17d204579624f1
1 parent
b2b2a175
Use a specific error code for type warnings and clarify docs
Showing
8 changed files
with
196 additions
and
19 deletions
README-maintainer
| ... | ... | @@ -132,7 +132,9 @@ RELEASE PREPARATION |
| 132 | 132 | |
| 133 | 133 | * Check all open issues and pull requests in github and the |
| 134 | 134 | sourceforge trackers. See ~/scripts/github-issues. Don't forget pull |
| 135 | - requests. | |
| 135 | + requests. Note: If the location for reporting issues changes, do a | |
| 136 | + careful check of documentation and code to make sure any comments | |
| 137 | + that include the issue creation URL are updated. | |
| 136 | 138 | |
| 137 | 139 | * Check `TODO` file to make sure all planned items for the release are |
| 138 | 140 | done or retargeted. | ... | ... |
cSpell.json
| ... | ... | @@ -143,6 +143,7 @@ |
| 143 | 143 | "hcryptprov", |
| 144 | 144 | "hdict", |
| 145 | 145 | "hoffmann", |
| 146 | + "holger", | |
| 146 | 147 | "hosoda", |
| 147 | 148 | "htcondor", |
| 148 | 149 | "htdocs", |
| ... | ... | @@ -205,6 +206,7 @@ |
| 205 | 206 | "linp", |
| 206 | 207 | "listitem", |
| 207 | 208 | "ljpeg", |
| 209 | + "longjmp", | |
| 208 | 210 | "lpstr", |
| 209 | 211 | "lqpdf", |
| 210 | 212 | "lssl", |
| ... | ... | @@ -367,6 +369,7 @@ |
| 367 | 369 | "scarff", |
| 368 | 370 | "seekable", |
| 369 | 371 | "segfaulting", |
| 372 | + "setjmp", | |
| 370 | 373 | "sharedresources", |
| 371 | 374 | "smatch", |
| 372 | 375 | "softlink", | ... | ... |
include/qpdf/Constants.h
| ... | ... | @@ -38,6 +38,7 @@ enum qpdf_error_code_e |
| 38 | 38 | qpdf_e_password, /* incorrect password for encrypted file */ |
| 39 | 39 | qpdf_e_damaged_pdf, /* syntax errors or other damage in PDF */ |
| 40 | 40 | qpdf_e_pages, /* erroneous or unsupported pages structure */ |
| 41 | + qpdf_e_object, /* type/bounds errors accessing objects */ | |
| 41 | 42 | }; |
| 42 | 43 | |
| 43 | 44 | /* Write Parameters. See QPDFWriter.hh for details. */ | ... | ... |
include/qpdf/QPDFObjectHandle.hh
| ... | ... | @@ -609,15 +609,65 @@ class QPDFObjectHandle |
| 609 | 609 | QPDF_DLL |
| 610 | 610 | bool hasObjectDescription(); |
| 611 | 611 | |
| 612 | - // Accessor methods. If an accessor method that is valid for only | |
| 613 | - // a particular object type is called on an object of the wrong | |
| 614 | - // type, an exception is thrown. | |
| 612 | + // Accessor methods | |
| 613 | + // | |
| 614 | + // (Note: this comment is referenced in qpdf-c.h and the manual.) | |
| 615 | + // | |
| 616 | + // In PDF files, objects have specific types, but there is nothing | |
| 617 | + // that prevents PDF files from containing objects of types that | |
| 618 | + // aren't expected by the specification. Many of the accessors | |
| 619 | + // here expect objects of a particular type. Prior to qpdf 8, | |
| 620 | + // calling an accessor on a method of the wrong type, such as | |
| 621 | + // trying to get a dictionary key from an array, trying to get the | |
| 622 | + // string value of a number, etc., would throw an exception, but | |
| 623 | + // since qpdf 8, qpdf issues a warning and recovers using the | |
| 624 | + // following behavior: | |
| 625 | + // | |
| 626 | + // * Requesting a value of the wrong type (int value from string, | |
| 627 | + // array item from a scalar or dictionary, etc.) will return a | |
| 628 | + // zero-like value for that type: false for boolean, 0 for | |
| 629 | + // number, the empty string for string, or the null object for | |
| 630 | + // an object handle. | |
| 631 | + // | |
| 632 | + // * Accessing an array item that is out of bounds will return a | |
| 633 | + // null object. | |
| 634 | + // | |
| 635 | + // * Attempts to mutate an object of the wrong type (e.g., | |
| 636 | + // attempting to add a dictionary key to a scalar or array) will | |
| 637 | + // be ignored. | |
| 638 | + // | |
| 639 | + // When any of these fallback behaviors are used, qpdf issues a | |
| 640 | + // warning. Starting in qpdf 10.5, these warnings have the error | |
| 641 | + // code qpdf_e_object. Prior to 10.5, they had the error code | |
| 642 | + // qpdf_e_damaged_pdf. If the QPDFObjectHandle is associated with | |
| 643 | + // a QPDF object (as is the case for all objects whose origin was | |
| 644 | + // a PDF file), the warning is issued using the normal warning | |
| 645 | + // mechanism (as described in QPDF.hh), making it possible to | |
| 646 | + // suppress or otherwise detect them. If the QPDFObjectHandle is | |
| 647 | + // not associated with a QPDF object (meaning it was created | |
| 648 | + // programmatically), an exception will be thrown. | |
| 649 | + // | |
| 650 | + // The way to avoid getting any type warnings or exceptions, even | |
| 651 | + // when working with malformed PDF files, is to always check the | |
| 652 | + // type of a QPDFObjectHandle before accessing it (for example, | |
| 653 | + // make sure that isString() returns true before calling | |
| 654 | + // getStringValue()) and to always be sure that any array indices | |
| 655 | + // are in bounds. | |
| 656 | + // | |
| 657 | + // For additional discussion and rationale for this behavior, see | |
| 658 | + // the section in the QPDF manual entitled "Object Accessor | |
| 659 | + // Methods". | |
| 615 | 660 | |
| 616 | 661 | // Methods for bool objects |
| 617 | 662 | QPDF_DLL |
| 618 | 663 | bool getBoolValue(); |
| 619 | 664 | |
| 620 | - // Methods for integer objects | |
| 665 | + // Methods for integer objects. Note: if an integer value is too | |
| 666 | + // big (too far away from zero in either direction) to fit in the | |
| 667 | + // requested return type, the maximum or minimum value for that | |
| 668 | + // return type may be returned. For example, on a system with | |
| 669 | + // 32-bit int, a numeric object with a value of 2^40 (or anything | |
| 670 | + // too big for 32 bits) will be returned as INT_MAX. | |
| 621 | 671 | QPDF_DLL |
| 622 | 672 | long long getIntValue(); |
| 623 | 673 | QPDF_DLL | ... | ... |
libqpdf/QPDFObjectHandle.cc
| ... | ... | @@ -3048,23 +3048,17 @@ void |
| 3048 | 3048 | QPDFObjectHandle::typeWarning(char const* expected_type, |
| 3049 | 3049 | std::string const& warning) |
| 3050 | 3050 | { |
| 3051 | - QPDF* context = 0; | |
| 3051 | + QPDF* context = nullptr; | |
| 3052 | 3052 | std::string description; |
| 3053 | 3053 | dereference(); |
| 3054 | - if (this->obj->getDescription(context, description)) | |
| 3055 | - { | |
| 3056 | - warn(context, | |
| 3057 | - QPDFExc( | |
| 3058 | - qpdf_e_damaged_pdf, | |
| 3054 | + this->obj->getDescription(context, description); | |
| 3055 | + // Null context handled by warn | |
| 3056 | + warn(context, | |
| 3057 | + QPDFExc(qpdf_e_object, | |
| 3059 | 3058 | "", description, 0, |
| 3060 | 3059 | std::string("operation for ") + expected_type + |
| 3061 | 3060 | " attempted on object of type " + |
| 3062 | 3061 | getTypeName() + ": " + warning)); |
| 3063 | - } | |
| 3064 | - else | |
| 3065 | - { | |
| 3066 | - assertType(expected_type, false); | |
| 3067 | - } | |
| 3068 | 3062 | } |
| 3069 | 3063 | |
| 3070 | 3064 | void |
| ... | ... | @@ -3091,7 +3085,12 @@ QPDFObjectHandle::warnIfPossible(std::string const& warning, |
| 3091 | 3085 | void |
| 3092 | 3086 | QPDFObjectHandle::objectWarning(std::string const& warning) |
| 3093 | 3087 | { |
| 3094 | - warnIfPossible(warning, true); | |
| 3088 | + QPDF* context = nullptr; | |
| 3089 | + std::string description; | |
| 3090 | + dereference(); | |
| 3091 | + this->obj->getDescription(context, description); | |
| 3092 | + // Null context handled by warn | |
| 3093 | + warn(context, QPDFExc(qpdf_e_object, "", description, 0, warning)); | |
| 3095 | 3094 | } |
| 3096 | 3095 | |
| 3097 | 3096 | void | ... | ... |
manual/qpdf-manual.xml
| ... | ... | @@ -4560,6 +4560,96 @@ outfile.pdf</option> |
| 4560 | 4560 | filtered stream contents to a given pipeline. |
| 4561 | 4561 | </para> |
| 4562 | 4562 | </sect1> |
| 4563 | + <sect1 id="ref.object-accessors"> | |
| 4564 | + <!-- This section is referenced in QPDFObjectHandle.hh --> | |
| 4565 | + <title>Object Accessor Methods</title> | |
| 4566 | + <para> | |
| 4567 | + For general information about how to access instances of | |
| 4568 | + <classname>QPDFObjectHandle</classname>, please see the comments | |
| 4569 | + in <filename>QPDFObjectHandle.hh</filename>. Search for | |
| 4570 | + “Accessor methods”. This section provides a more | |
| 4571 | + in-depth discussion of the behavior and the rationale for the | |
| 4572 | + behavior. | |
| 4573 | + </para> | |
| 4574 | + <para> | |
| 4575 | + <emphasis>Why were type errors made into warnings?</emphasis> When | |
| 4576 | + type checks were introduced into qpdf in the early days, it was | |
| 4577 | + expected that type errors would only occur as a result of | |
| 4578 | + programmer error. However, in practice, type errors would occur | |
| 4579 | + with malformed PDF files because of assumptions made in code, | |
| 4580 | + including code within the qpdf library and code written by library | |
| 4581 | + users. The most common case would be chaining calls to | |
| 4582 | + <function>getKey()</function> to access keys deep within a | |
| 4583 | + dictionary. In many cases, qpdf would be able to recover from | |
| 4584 | + these situations, but the old behavior often resulted in crashes | |
| 4585 | + rather than graceful recovery. For this reason, the errors were | |
| 4586 | + changed to warnings. | |
| 4587 | + </para> | |
| 4588 | + <para> | |
| 4589 | + <emphasis>Why even warn about type errors when the user can't | |
| 4590 | + usually do anything about them?</emphasis> Type warnings are | |
| 4591 | + extremely valuable during development. Since it's impossible to | |
| 4592 | + catch at compile time things like typos in dictionary key names or | |
| 4593 | + logic errors around what the structure of a PDF file might be, the | |
| 4594 | + presence of type warnings can save lots of developer time. They | |
| 4595 | + have also proven useful in exposing issues in qpdf itself that | |
| 4596 | + would have otherwise gone undetected. | |
| 4597 | + </para> | |
| 4598 | + <para> | |
| 4599 | + <emphasis>Can there be a type-safe | |
| 4600 | + <classname>QPDFObjectHandle</classname>?</emphasis> It would be | |
| 4601 | + great if <classname>QPDFObjectHandle</classname> could be more | |
| 4602 | + strongly typed so that you'd have to have check that something was | |
| 4603 | + of a particular type before calling type-specific accessor | |
| 4604 | + methods. However, implementing this at this stage of the library's | |
| 4605 | + history would be quite difficult, and it would make a the common | |
| 4606 | + pattern of drilling into an object no longer work. While it would | |
| 4607 | + be possible to have a parallel interface, it would create a lot of | |
| 4608 | + extra code. If qpdf were written in a language like rust, an | |
| 4609 | + interface like this would make a lot of sense, but, for a variety | |
| 4610 | + of reasons, the qpdf API is consistent with other APIs of its | |
| 4611 | + time, relying on exception handling to catch errors. The | |
| 4612 | + underlying PDF objects are inherently not type-safe. Forcing | |
| 4613 | + stronger type safety in <classname>QPDFObjectHandle</classname> | |
| 4614 | + would ultimately cause a lot more code to have to be written and | |
| 4615 | + would like make software that uses qpdf more brittle, and even so, | |
| 4616 | + checks would have to occur at runtime. | |
| 4617 | + </para> | |
| 4618 | + <para> | |
| 4619 | + <emphasis>Why do type errors sometimes raise | |
| 4620 | + exceptions?</emphasis> The way warnings work in qpdf requires a | |
| 4621 | + <classname>QPDF</classname> object to be associated with an object | |
| 4622 | + handle for a warning to be issued. It would be nice if this could | |
| 4623 | + be fixed, but it would require major changes to the API. Rather | |
| 4624 | + than throwing away these conditions, we convert them to | |
| 4625 | + exceptions. It's not that bad though. Since any object handle that | |
| 4626 | + was read from a file has an associated <classname>QPDF</classname> | |
| 4627 | + object, it would only be type errors on objects that were created | |
| 4628 | + explicitly that would cause exceptions, and in that case, type | |
| 4629 | + errors are much more likely to be the result of a coding error | |
| 4630 | + than invalid input. | |
| 4631 | + </para> | |
| 4632 | + <para> | |
| 4633 | + <emphasis>Why does the behavior of a type exception differ between | |
| 4634 | + the C and C++ API?</emphasis> There is no way to throw and catch | |
| 4635 | + exceptions in C short of something like | |
| 4636 | + <function>setjmp</function> and <function>longjmp</function>, and | |
| 4637 | + that approach is not portable across language barriers. Since the | |
| 4638 | + C API is often used from other languages, it's important to keep | |
| 4639 | + things as simple as possible. Starting in qpdf 10.5, exceptions | |
| 4640 | + that used to crash code using the C API will be written to stderr | |
| 4641 | + by default, and it is possible to register an error handler. | |
| 4642 | + There's no reason that the error handler can't simulate exception | |
| 4643 | + handling in some way, such as by using <function>setjmp</function> | |
| 4644 | + and <function>longjmp</function> or by setting some variable that | |
| 4645 | + can be checked after library calls are made. In retrospect, it | |
| 4646 | + might have been better if the C API object handle methods returned | |
| 4647 | + error codes like the other methods and set return values in | |
| 4648 | + passed-in pointers, but this would complicate both the | |
| 4649 | + implementation and the use of the library for a case that is | |
| 4650 | + actually quite rare and largely avoidable. | |
| 4651 | + </para> | |
| 4652 | + </sect1> | |
| 4563 | 4653 | </chapter> |
| 4564 | 4654 | <chapter id="ref.linearization"> |
| 4565 | 4655 | <title>Linearization</title> |
| ... | ... | @@ -5127,6 +5217,20 @@ print "\n"; |
| 5127 | 5217 | <itemizedlist> |
| 5128 | 5218 | <listitem> |
| 5129 | 5219 | <para> |
| 5220 | + Since qpdf version 8, using object accessor methods on an | |
| 5221 | + instance of <classname>QPDFObjectHandle</classname> may | |
| 5222 | + create warnings if the object is not of the expected type. | |
| 5223 | + These warnings now have an error code of | |
| 5224 | + <literal>qpdf_e_object</literal> instead of | |
| 5225 | + <literal>qpdf_e_damaged_pdf</literal>. Also, comments have | |
| 5226 | + been added to <filename>QPDFObjectHandle.hh</filename> to | |
| 5227 | + explain in more detail what the behavior is. See <xref | |
| 5228 | + linkend="ref.object-accessors"/> for a more in-depth | |
| 5229 | + discussion. | |
| 5230 | + </para> | |
| 5231 | + </listitem> | |
| 5232 | + <listitem> | |
| 5233 | + <para> | |
| 5130 | 5234 | Add <function>qpdf_get_last_string_length</function> to the |
| 5131 | 5235 | C API to get the length of the last string that was |
| 5132 | 5236 | returned. This is needed to handle strings that contain | ... | ... |
qpdf/qtest/qpdf.test
| ... | ... | @@ -273,12 +273,16 @@ $td->runtest("check final version", |
| 273 | 273 | show_ntests(); |
| 274 | 274 | # ---------- |
| 275 | 275 | $td->notify("--- Exceptions ---"); |
| 276 | -$n_tests += 1; | |
| 276 | +$n_tests += 2; | |
| 277 | 277 | |
| 278 | 278 | $td->runtest("check exception handling", |
| 279 | 279 | {$td->COMMAND => "test_driver 61 -"}, |
| 280 | 280 | {$td->FILE => "exceptions.out", $td->EXIT_STATUS => 0}, |
| 281 | 281 | $td->NORMALIZE_NEWLINES); |
| 282 | +$td->runtest("check certain exception types", | |
| 283 | + {$td->COMMAND => "test_driver 81 -"}, | |
| 284 | + {$td->STRING => "test 81 done\n", $td->EXIT_STATUS => 0}, | |
| 285 | + $td->NORMALIZE_NEWLINES); | |
| 282 | 286 | |
| 283 | 287 | show_ntests(); |
| 284 | 288 | # ---------- |
| ... | ... | @@ -5303,6 +5307,7 @@ for (my $large = 0; $large < $nlarge; ++$large) |
| 5303 | 5307 | $td->NORMALIZE_NEWLINES); |
| 5304 | 5308 | unlink $file; |
| 5305 | 5309 | } |
| 5310 | +show_ntests(); | |
| 5306 | 5311 | # ---------- |
| 5307 | 5312 | |
| 5308 | 5313 | cleanup(); | ... | ... |
qpdf/test_driver.cc
| ... | ... | @@ -259,7 +259,7 @@ void runtest(int n, char const* filename1, char const* arg2) |
| 259 | 259 | pdf.processMemoryFile((std::string(filename1) + ".pdf").c_str(), |
| 260 | 260 | p, size); |
| 261 | 261 | } |
| 262 | - else if (n == 61) | |
| 262 | + else if ((n == 61) || (n == 81)) | |
| 263 | 263 | { |
| 264 | 264 | // Ignore filename argument entirely |
| 265 | 265 | } |
| ... | ... | @@ -3049,6 +3049,19 @@ void runtest(int n, char const* filename1, char const* arg2) |
| 3049 | 3049 | w2.setQDFMode(true); |
| 3050 | 3050 | w2.write(); |
| 3051 | 3051 | } |
| 3052 | + else if (n == 81) | |
| 3053 | + { | |
| 3054 | + // Exercise that type errors get their own special type | |
| 3055 | + try | |
| 3056 | + { | |
| 3057 | + QPDFObjectHandle::newNull().getIntValue(); | |
| 3058 | + assert(false); | |
| 3059 | + } | |
| 3060 | + catch (QPDFExc& e) | |
| 3061 | + { | |
| 3062 | + assert(e.getErrorCode() == qpdf_e_object); | |
| 3063 | + } | |
| 3064 | + } | |
| 3052 | 3065 | else |
| 3053 | 3066 | { |
| 3054 | 3067 | throw std::runtime_error(std::string("invalid test ") + | ... | ... |