Commit 3340dbe9761ef35d580d77a73e17d204579624f1

Authored by Jay Berkenbilt
1 parent b2b2a175

Use a specific error code for type warnings and clarify docs

README-maintainer
... ... @@ -132,7 +132,9 @@ RELEASE PREPARATION
132 132  
133 133 * Check all open issues and pull requests in github and the
134 134 sourceforge trackers. See ~/scripts/github-issues. Don't forget pull
135   - requests.
  135 + requests. Note: If the location for reporting issues changes, do a
  136 + careful check of documentation and code to make sure any comments
  137 + that include the issue creation URL are updated.
136 138  
137 139 * Check `TODO` file to make sure all planned items for the release are
138 140 done or retargeted.
... ...
cSpell.json
... ... @@ -143,6 +143,7 @@
143 143 "hcryptprov",
144 144 "hdict",
145 145 "hoffmann",
  146 + "holger",
146 147 "hosoda",
147 148 "htcondor",
148 149 "htdocs",
... ... @@ -205,6 +206,7 @@
205 206 "linp",
206 207 "listitem",
207 208 "ljpeg",
  209 + "longjmp",
208 210 "lpstr",
209 211 "lqpdf",
210 212 "lssl",
... ... @@ -367,6 +369,7 @@
367 369 "scarff",
368 370 "seekable",
369 371 "segfaulting",
  372 + "setjmp",
370 373 "sharedresources",
371 374 "smatch",
372 375 "softlink",
... ...
include/qpdf/Constants.h
... ... @@ -38,6 +38,7 @@ enum qpdf_error_code_e
38 38 qpdf_e_password, /* incorrect password for encrypted file */
39 39 qpdf_e_damaged_pdf, /* syntax errors or other damage in PDF */
40 40 qpdf_e_pages, /* erroneous or unsupported pages structure */
  41 + qpdf_e_object, /* type/bounds errors accessing objects */
41 42 };
42 43  
43 44 /* Write Parameters. See QPDFWriter.hh for details. */
... ...
include/qpdf/QPDFObjectHandle.hh
... ... @@ -609,15 +609,65 @@ class QPDFObjectHandle
609 609 QPDF_DLL
610 610 bool hasObjectDescription();
611 611  
612   - // Accessor methods. If an accessor method that is valid for only
613   - // a particular object type is called on an object of the wrong
614   - // type, an exception is thrown.
  612 + // Accessor methods
  613 + //
  614 + // (Note: this comment is referenced in qpdf-c.h and the manual.)
  615 + //
  616 + // In PDF files, objects have specific types, but there is nothing
  617 + // that prevents PDF files from containing objects of types that
  618 + // aren't expected by the specification. Many of the accessors
  619 + // here expect objects of a particular type. Prior to qpdf 8,
  620 + // calling an accessor on a method of the wrong type, such as
  621 + // trying to get a dictionary key from an array, trying to get the
  622 + // string value of a number, etc., would throw an exception, but
  623 + // since qpdf 8, qpdf issues a warning and recovers using the
  624 + // following behavior:
  625 + //
  626 + // * Requesting a value of the wrong type (int value from string,
  627 + // array item from a scalar or dictionary, etc.) will return a
  628 + // zero-like value for that type: false for boolean, 0 for
  629 + // number, the empty string for string, or the null object for
  630 + // an object handle.
  631 + //
  632 + // * Accessing an array item that is out of bounds will return a
  633 + // null object.
  634 + //
  635 + // * Attempts to mutate an object of the wrong type (e.g.,
  636 + // attempting to add a dictionary key to a scalar or array) will
  637 + // be ignored.
  638 + //
  639 + // When any of these fallback behaviors are used, qpdf issues a
  640 + // warning. Starting in qpdf 10.5, these warnings have the error
  641 + // code qpdf_e_object. Prior to 10.5, they had the error code
  642 + // qpdf_e_damaged_pdf. If the QPDFObjectHandle is associated with
  643 + // a QPDF object (as is the case for all objects whose origin was
  644 + // a PDF file), the warning is issued using the normal warning
  645 + // mechanism (as described in QPDF.hh), making it possible to
  646 + // suppress or otherwise detect them. If the QPDFObjectHandle is
  647 + // not associated with a QPDF object (meaning it was created
  648 + // programmatically), an exception will be thrown.
  649 + //
  650 + // The way to avoid getting any type warnings or exceptions, even
  651 + // when working with malformed PDF files, is to always check the
  652 + // type of a QPDFObjectHandle before accessing it (for example,
  653 + // make sure that isString() returns true before calling
  654 + // getStringValue()) and to always be sure that any array indices
  655 + // are in bounds.
  656 + //
  657 + // For additional discussion and rationale for this behavior, see
  658 + // the section in the QPDF manual entitled "Object Accessor
  659 + // Methods".
615 660  
616 661 // Methods for bool objects
617 662 QPDF_DLL
618 663 bool getBoolValue();
619 664  
620   - // Methods for integer objects
  665 + // Methods for integer objects. Note: if an integer value is too
  666 + // big (too far away from zero in either direction) to fit in the
  667 + // requested return type, the maximum or minimum value for that
  668 + // return type may be returned. For example, on a system with
  669 + // 32-bit int, a numeric object with a value of 2^40 (or anything
  670 + // too big for 32 bits) will be returned as INT_MAX.
621 671 QPDF_DLL
622 672 long long getIntValue();
623 673 QPDF_DLL
... ...
libqpdf/QPDFObjectHandle.cc
... ... @@ -3048,23 +3048,17 @@ void
3048 3048 QPDFObjectHandle::typeWarning(char const* expected_type,
3049 3049 std::string const& warning)
3050 3050 {
3051   - QPDF* context = 0;
  3051 + QPDF* context = nullptr;
3052 3052 std::string description;
3053 3053 dereference();
3054   - if (this->obj->getDescription(context, description))
3055   - {
3056   - warn(context,
3057   - QPDFExc(
3058   - qpdf_e_damaged_pdf,
  3054 + this->obj->getDescription(context, description);
  3055 + // Null context handled by warn
  3056 + warn(context,
  3057 + QPDFExc(qpdf_e_object,
3059 3058 "", description, 0,
3060 3059 std::string("operation for ") + expected_type +
3061 3060 " attempted on object of type " +
3062 3061 getTypeName() + ": " + warning));
3063   - }
3064   - else
3065   - {
3066   - assertType(expected_type, false);
3067   - }
3068 3062 }
3069 3063  
3070 3064 void
... ... @@ -3091,7 +3085,12 @@ QPDFObjectHandle::warnIfPossible(std::string const& warning,
3091 3085 void
3092 3086 QPDFObjectHandle::objectWarning(std::string const& warning)
3093 3087 {
3094   - warnIfPossible(warning, true);
  3088 + QPDF* context = nullptr;
  3089 + std::string description;
  3090 + dereference();
  3091 + this->obj->getDescription(context, description);
  3092 + // Null context handled by warn
  3093 + warn(context, QPDFExc(qpdf_e_object, "", description, 0, warning));
3095 3094 }
3096 3095  
3097 3096 void
... ...
manual/qpdf-manual.xml
... ... @@ -4560,6 +4560,96 @@ outfile.pdf</option>
4560 4560 filtered stream contents to a given pipeline.
4561 4561 </para>
4562 4562 </sect1>
  4563 + <sect1 id="ref.object-accessors">
  4564 + <!-- This section is referenced in QPDFObjectHandle.hh -->
  4565 + <title>Object Accessor Methods</title>
  4566 + <para>
  4567 + For general information about how to access instances of
  4568 + <classname>QPDFObjectHandle</classname>, please see the comments
  4569 + in <filename>QPDFObjectHandle.hh</filename>. Search for
  4570 + &ldquo;Accessor methods&rdquo;. This section provides a more
  4571 + in-depth discussion of the behavior and the rationale for the
  4572 + behavior.
  4573 + </para>
  4574 + <para>
  4575 + <emphasis>Why were type errors made into warnings?</emphasis> When
  4576 + type checks were introduced into qpdf in the early days, it was
  4577 + expected that type errors would only occur as a result of
  4578 + programmer error. However, in practice, type errors would occur
  4579 + with malformed PDF files because of assumptions made in code,
  4580 + including code within the qpdf library and code written by library
  4581 + users. The most common case would be chaining calls to
  4582 + <function>getKey()</function> to access keys deep within a
  4583 + dictionary. In many cases, qpdf would be able to recover from
  4584 + these situations, but the old behavior often resulted in crashes
  4585 + rather than graceful recovery. For this reason, the errors were
  4586 + changed to warnings.
  4587 + </para>
  4588 + <para>
  4589 + <emphasis>Why even warn about type errors when the user can't
  4590 + usually do anything about them?</emphasis> Type warnings are
  4591 + extremely valuable during development. Since it's impossible to
  4592 + catch at compile time things like typos in dictionary key names or
  4593 + logic errors around what the structure of a PDF file might be, the
  4594 + presence of type warnings can save lots of developer time. They
  4595 + have also proven useful in exposing issues in qpdf itself that
  4596 + would have otherwise gone undetected.
  4597 + </para>
  4598 + <para>
  4599 + <emphasis>Can there be a type-safe
  4600 + <classname>QPDFObjectHandle</classname>?</emphasis> It would be
  4601 + great if <classname>QPDFObjectHandle</classname> could be more
  4602 + strongly typed so that you'd have to have check that something was
  4603 + of a particular type before calling type-specific accessor
  4604 + methods. However, implementing this at this stage of the library's
  4605 + history would be quite difficult, and it would make a the common
  4606 + pattern of drilling into an object no longer work. While it would
  4607 + be possible to have a parallel interface, it would create a lot of
  4608 + extra code. If qpdf were written in a language like rust, an
  4609 + interface like this would make a lot of sense, but, for a variety
  4610 + of reasons, the qpdf API is consistent with other APIs of its
  4611 + time, relying on exception handling to catch errors. The
  4612 + underlying PDF objects are inherently not type-safe. Forcing
  4613 + stronger type safety in <classname>QPDFObjectHandle</classname>
  4614 + would ultimately cause a lot more code to have to be written and
  4615 + would like make software that uses qpdf more brittle, and even so,
  4616 + checks would have to occur at runtime.
  4617 + </para>
  4618 + <para>
  4619 + <emphasis>Why do type errors sometimes raise
  4620 + exceptions?</emphasis> The way warnings work in qpdf requires a
  4621 + <classname>QPDF</classname> object to be associated with an object
  4622 + handle for a warning to be issued. It would be nice if this could
  4623 + be fixed, but it would require major changes to the API. Rather
  4624 + than throwing away these conditions, we convert them to
  4625 + exceptions. It's not that bad though. Since any object handle that
  4626 + was read from a file has an associated <classname>QPDF</classname>
  4627 + object, it would only be type errors on objects that were created
  4628 + explicitly that would cause exceptions, and in that case, type
  4629 + errors are much more likely to be the result of a coding error
  4630 + than invalid input.
  4631 + </para>
  4632 + <para>
  4633 + <emphasis>Why does the behavior of a type exception differ between
  4634 + the C and C++ API?</emphasis> There is no way to throw and catch
  4635 + exceptions in C short of something like
  4636 + <function>setjmp</function> and <function>longjmp</function>, and
  4637 + that approach is not portable across language barriers. Since the
  4638 + C API is often used from other languages, it's important to keep
  4639 + things as simple as possible. Starting in qpdf 10.5, exceptions
  4640 + that used to crash code using the C API will be written to stderr
  4641 + by default, and it is possible to register an error handler.
  4642 + There's no reason that the error handler can't simulate exception
  4643 + handling in some way, such as by using <function>setjmp</function>
  4644 + and <function>longjmp</function> or by setting some variable that
  4645 + can be checked after library calls are made. In retrospect, it
  4646 + might have been better if the C API object handle methods returned
  4647 + error codes like the other methods and set return values in
  4648 + passed-in pointers, but this would complicate both the
  4649 + implementation and the use of the library for a case that is
  4650 + actually quite rare and largely avoidable.
  4651 + </para>
  4652 + </sect1>
4563 4653 </chapter>
4564 4654 <chapter id="ref.linearization">
4565 4655 <title>Linearization</title>
... ... @@ -5127,6 +5217,20 @@ print &quot;\n&quot;;
5127 5217 <itemizedlist>
5128 5218 <listitem>
5129 5219 <para>
  5220 + Since qpdf version 8, using object accessor methods on an
  5221 + instance of <classname>QPDFObjectHandle</classname> may
  5222 + create warnings if the object is not of the expected type.
  5223 + These warnings now have an error code of
  5224 + <literal>qpdf_e_object</literal> instead of
  5225 + <literal>qpdf_e_damaged_pdf</literal>. Also, comments have
  5226 + been added to <filename>QPDFObjectHandle.hh</filename> to
  5227 + explain in more detail what the behavior is. See <xref
  5228 + linkend="ref.object-accessors"/> for a more in-depth
  5229 + discussion.
  5230 + </para>
  5231 + </listitem>
  5232 + <listitem>
  5233 + <para>
5130 5234 Add <function>qpdf_get_last_string_length</function> to the
5131 5235 C API to get the length of the last string that was
5132 5236 returned. This is needed to handle strings that contain
... ...
qpdf/qtest/qpdf.test
... ... @@ -273,12 +273,16 @@ $td-&gt;runtest(&quot;check final version&quot;,
273 273 show_ntests();
274 274 # ----------
275 275 $td->notify("--- Exceptions ---");
276   -$n_tests += 1;
  276 +$n_tests += 2;
277 277  
278 278 $td->runtest("check exception handling",
279 279 {$td->COMMAND => "test_driver 61 -"},
280 280 {$td->FILE => "exceptions.out", $td->EXIT_STATUS => 0},
281 281 $td->NORMALIZE_NEWLINES);
  282 +$td->runtest("check certain exception types",
  283 + {$td->COMMAND => "test_driver 81 -"},
  284 + {$td->STRING => "test 81 done\n", $td->EXIT_STATUS => 0},
  285 + $td->NORMALIZE_NEWLINES);
282 286  
283 287 show_ntests();
284 288 # ----------
... ... @@ -5303,6 +5307,7 @@ for (my $large = 0; $large &lt; $nlarge; ++$large)
5303 5307 $td->NORMALIZE_NEWLINES);
5304 5308 unlink $file;
5305 5309 }
  5310 +show_ntests();
5306 5311 # ----------
5307 5312  
5308 5313 cleanup();
... ...
qpdf/test_driver.cc
... ... @@ -259,7 +259,7 @@ void runtest(int n, char const* filename1, char const* arg2)
259 259 pdf.processMemoryFile((std::string(filename1) + ".pdf").c_str(),
260 260 p, size);
261 261 }
262   - else if (n == 61)
  262 + else if ((n == 61) || (n == 81))
263 263 {
264 264 // Ignore filename argument entirely
265 265 }
... ... @@ -3049,6 +3049,19 @@ void runtest(int n, char const* filename1, char const* arg2)
3049 3049 w2.setQDFMode(true);
3050 3050 w2.write();
3051 3051 }
  3052 + else if (n == 81)
  3053 + {
  3054 + // Exercise that type errors get their own special type
  3055 + try
  3056 + {
  3057 + QPDFObjectHandle::newNull().getIntValue();
  3058 + assert(false);
  3059 + }
  3060 + catch (QPDFExc& e)
  3061 + {
  3062 + assert(e.getErrorCode() == qpdf_e_object);
  3063 + }
  3064 + }
3052 3065 else
3053 3066 {
3054 3067 throw std::runtime_error(std::string("invalid test ") +
... ...