Commit 3340dbe9761ef35d580d77a73e17d204579624f1

Authored by Jay Berkenbilt
1 parent b2b2a175

Use a specific error code for type warnings and clarify docs

README-maintainer
@@ -132,7 +132,9 @@ RELEASE PREPARATION @@ -132,7 +132,9 @@ RELEASE PREPARATION
132 132
133 * Check all open issues and pull requests in github and the 133 * Check all open issues and pull requests in github and the
134 sourceforge trackers. See ~/scripts/github-issues. Don't forget pull 134 sourceforge trackers. See ~/scripts/github-issues. Don't forget pull
135 - requests. 135 + requests. Note: If the location for reporting issues changes, do a
  136 + careful check of documentation and code to make sure any comments
  137 + that include the issue creation URL are updated.
136 138
137 * Check `TODO` file to make sure all planned items for the release are 139 * Check `TODO` file to make sure all planned items for the release are
138 done or retargeted. 140 done or retargeted.
cSpell.json
@@ -143,6 +143,7 @@ @@ -143,6 +143,7 @@
143 "hcryptprov", 143 "hcryptprov",
144 "hdict", 144 "hdict",
145 "hoffmann", 145 "hoffmann",
  146 + "holger",
146 "hosoda", 147 "hosoda",
147 "htcondor", 148 "htcondor",
148 "htdocs", 149 "htdocs",
@@ -205,6 +206,7 @@ @@ -205,6 +206,7 @@
205 "linp", 206 "linp",
206 "listitem", 207 "listitem",
207 "ljpeg", 208 "ljpeg",
  209 + "longjmp",
208 "lpstr", 210 "lpstr",
209 "lqpdf", 211 "lqpdf",
210 "lssl", 212 "lssl",
@@ -367,6 +369,7 @@ @@ -367,6 +369,7 @@
367 "scarff", 369 "scarff",
368 "seekable", 370 "seekable",
369 "segfaulting", 371 "segfaulting",
  372 + "setjmp",
370 "sharedresources", 373 "sharedresources",
371 "smatch", 374 "smatch",
372 "softlink", 375 "softlink",
include/qpdf/Constants.h
@@ -38,6 +38,7 @@ enum qpdf_error_code_e @@ -38,6 +38,7 @@ enum qpdf_error_code_e
38 qpdf_e_password, /* incorrect password for encrypted file */ 38 qpdf_e_password, /* incorrect password for encrypted file */
39 qpdf_e_damaged_pdf, /* syntax errors or other damage in PDF */ 39 qpdf_e_damaged_pdf, /* syntax errors or other damage in PDF */
40 qpdf_e_pages, /* erroneous or unsupported pages structure */ 40 qpdf_e_pages, /* erroneous or unsupported pages structure */
  41 + qpdf_e_object, /* type/bounds errors accessing objects */
41 }; 42 };
42 43
43 /* Write Parameters. See QPDFWriter.hh for details. */ 44 /* Write Parameters. See QPDFWriter.hh for details. */
include/qpdf/QPDFObjectHandle.hh
@@ -609,15 +609,65 @@ class QPDFObjectHandle @@ -609,15 +609,65 @@ class QPDFObjectHandle
609 QPDF_DLL 609 QPDF_DLL
610 bool hasObjectDescription(); 610 bool hasObjectDescription();
611 611
612 - // Accessor methods. If an accessor method that is valid for only  
613 - // a particular object type is called on an object of the wrong  
614 - // type, an exception is thrown. 612 + // Accessor methods
  613 + //
  614 + // (Note: this comment is referenced in qpdf-c.h and the manual.)
  615 + //
  616 + // In PDF files, objects have specific types, but there is nothing
  617 + // that prevents PDF files from containing objects of types that
  618 + // aren't expected by the specification. Many of the accessors
  619 + // here expect objects of a particular type. Prior to qpdf 8,
  620 + // calling an accessor on a method of the wrong type, such as
  621 + // trying to get a dictionary key from an array, trying to get the
  622 + // string value of a number, etc., would throw an exception, but
  623 + // since qpdf 8, qpdf issues a warning and recovers using the
  624 + // following behavior:
  625 + //
  626 + // * Requesting a value of the wrong type (int value from string,
  627 + // array item from a scalar or dictionary, etc.) will return a
  628 + // zero-like value for that type: false for boolean, 0 for
  629 + // number, the empty string for string, or the null object for
  630 + // an object handle.
  631 + //
  632 + // * Accessing an array item that is out of bounds will return a
  633 + // null object.
  634 + //
  635 + // * Attempts to mutate an object of the wrong type (e.g.,
  636 + // attempting to add a dictionary key to a scalar or array) will
  637 + // be ignored.
  638 + //
  639 + // When any of these fallback behaviors are used, qpdf issues a
  640 + // warning. Starting in qpdf 10.5, these warnings have the error
  641 + // code qpdf_e_object. Prior to 10.5, they had the error code
  642 + // qpdf_e_damaged_pdf. If the QPDFObjectHandle is associated with
  643 + // a QPDF object (as is the case for all objects whose origin was
  644 + // a PDF file), the warning is issued using the normal warning
  645 + // mechanism (as described in QPDF.hh), making it possible to
  646 + // suppress or otherwise detect them. If the QPDFObjectHandle is
  647 + // not associated with a QPDF object (meaning it was created
  648 + // programmatically), an exception will be thrown.
  649 + //
  650 + // The way to avoid getting any type warnings or exceptions, even
  651 + // when working with malformed PDF files, is to always check the
  652 + // type of a QPDFObjectHandle before accessing it (for example,
  653 + // make sure that isString() returns true before calling
  654 + // getStringValue()) and to always be sure that any array indices
  655 + // are in bounds.
  656 + //
  657 + // For additional discussion and rationale for this behavior, see
  658 + // the section in the QPDF manual entitled "Object Accessor
  659 + // Methods".
615 660
616 // Methods for bool objects 661 // Methods for bool objects
617 QPDF_DLL 662 QPDF_DLL
618 bool getBoolValue(); 663 bool getBoolValue();
619 664
620 - // Methods for integer objects 665 + // Methods for integer objects. Note: if an integer value is too
  666 + // big (too far away from zero in either direction) to fit in the
  667 + // requested return type, the maximum or minimum value for that
  668 + // return type may be returned. For example, on a system with
  669 + // 32-bit int, a numeric object with a value of 2^40 (or anything
  670 + // too big for 32 bits) will be returned as INT_MAX.
621 QPDF_DLL 671 QPDF_DLL
622 long long getIntValue(); 672 long long getIntValue();
623 QPDF_DLL 673 QPDF_DLL
libqpdf/QPDFObjectHandle.cc
@@ -3048,23 +3048,17 @@ void @@ -3048,23 +3048,17 @@ void
3048 QPDFObjectHandle::typeWarning(char const* expected_type, 3048 QPDFObjectHandle::typeWarning(char const* expected_type,
3049 std::string const& warning) 3049 std::string const& warning)
3050 { 3050 {
3051 - QPDF* context = 0; 3051 + QPDF* context = nullptr;
3052 std::string description; 3052 std::string description;
3053 dereference(); 3053 dereference();
3054 - if (this->obj->getDescription(context, description))  
3055 - {  
3056 - warn(context,  
3057 - QPDFExc(  
3058 - qpdf_e_damaged_pdf, 3054 + this->obj->getDescription(context, description);
  3055 + // Null context handled by warn
  3056 + warn(context,
  3057 + QPDFExc(qpdf_e_object,
3059 "", description, 0, 3058 "", description, 0,
3060 std::string("operation for ") + expected_type + 3059 std::string("operation for ") + expected_type +
3061 " attempted on object of type " + 3060 " attempted on object of type " +
3062 getTypeName() + ": " + warning)); 3061 getTypeName() + ": " + warning));
3063 - }  
3064 - else  
3065 - {  
3066 - assertType(expected_type, false);  
3067 - }  
3068 } 3062 }
3069 3063
3070 void 3064 void
@@ -3091,7 +3085,12 @@ QPDFObjectHandle::warnIfPossible(std::string const& warning, @@ -3091,7 +3085,12 @@ QPDFObjectHandle::warnIfPossible(std::string const& warning,
3091 void 3085 void
3092 QPDFObjectHandle::objectWarning(std::string const& warning) 3086 QPDFObjectHandle::objectWarning(std::string const& warning)
3093 { 3087 {
3094 - warnIfPossible(warning, true); 3088 + QPDF* context = nullptr;
  3089 + std::string description;
  3090 + dereference();
  3091 + this->obj->getDescription(context, description);
  3092 + // Null context handled by warn
  3093 + warn(context, QPDFExc(qpdf_e_object, "", description, 0, warning));
3095 } 3094 }
3096 3095
3097 void 3096 void
manual/qpdf-manual.xml
@@ -4560,6 +4560,96 @@ outfile.pdf</option> @@ -4560,6 +4560,96 @@ outfile.pdf</option>
4560 filtered stream contents to a given pipeline. 4560 filtered stream contents to a given pipeline.
4561 </para> 4561 </para>
4562 </sect1> 4562 </sect1>
  4563 + <sect1 id="ref.object-accessors">
  4564 + <!-- This section is referenced in QPDFObjectHandle.hh -->
  4565 + <title>Object Accessor Methods</title>
  4566 + <para>
  4567 + For general information about how to access instances of
  4568 + <classname>QPDFObjectHandle</classname>, please see the comments
  4569 + in <filename>QPDFObjectHandle.hh</filename>. Search for
  4570 + &ldquo;Accessor methods&rdquo;. This section provides a more
  4571 + in-depth discussion of the behavior and the rationale for the
  4572 + behavior.
  4573 + </para>
  4574 + <para>
  4575 + <emphasis>Why were type errors made into warnings?</emphasis> When
  4576 + type checks were introduced into qpdf in the early days, it was
  4577 + expected that type errors would only occur as a result of
  4578 + programmer error. However, in practice, type errors would occur
  4579 + with malformed PDF files because of assumptions made in code,
  4580 + including code within the qpdf library and code written by library
  4581 + users. The most common case would be chaining calls to
  4582 + <function>getKey()</function> to access keys deep within a
  4583 + dictionary. In many cases, qpdf would be able to recover from
  4584 + these situations, but the old behavior often resulted in crashes
  4585 + rather than graceful recovery. For this reason, the errors were
  4586 + changed to warnings.
  4587 + </para>
  4588 + <para>
  4589 + <emphasis>Why even warn about type errors when the user can't
  4590 + usually do anything about them?</emphasis> Type warnings are
  4591 + extremely valuable during development. Since it's impossible to
  4592 + catch at compile time things like typos in dictionary key names or
  4593 + logic errors around what the structure of a PDF file might be, the
  4594 + presence of type warnings can save lots of developer time. They
  4595 + have also proven useful in exposing issues in qpdf itself that
  4596 + would have otherwise gone undetected.
  4597 + </para>
  4598 + <para>
  4599 + <emphasis>Can there be a type-safe
  4600 + <classname>QPDFObjectHandle</classname>?</emphasis> It would be
  4601 + great if <classname>QPDFObjectHandle</classname> could be more
  4602 + strongly typed so that you'd have to have check that something was
  4603 + of a particular type before calling type-specific accessor
  4604 + methods. However, implementing this at this stage of the library's
  4605 + history would be quite difficult, and it would make a the common
  4606 + pattern of drilling into an object no longer work. While it would
  4607 + be possible to have a parallel interface, it would create a lot of
  4608 + extra code. If qpdf were written in a language like rust, an
  4609 + interface like this would make a lot of sense, but, for a variety
  4610 + of reasons, the qpdf API is consistent with other APIs of its
  4611 + time, relying on exception handling to catch errors. The
  4612 + underlying PDF objects are inherently not type-safe. Forcing
  4613 + stronger type safety in <classname>QPDFObjectHandle</classname>
  4614 + would ultimately cause a lot more code to have to be written and
  4615 + would like make software that uses qpdf more brittle, and even so,
  4616 + checks would have to occur at runtime.
  4617 + </para>
  4618 + <para>
  4619 + <emphasis>Why do type errors sometimes raise
  4620 + exceptions?</emphasis> The way warnings work in qpdf requires a
  4621 + <classname>QPDF</classname> object to be associated with an object
  4622 + handle for a warning to be issued. It would be nice if this could
  4623 + be fixed, but it would require major changes to the API. Rather
  4624 + than throwing away these conditions, we convert them to
  4625 + exceptions. It's not that bad though. Since any object handle that
  4626 + was read from a file has an associated <classname>QPDF</classname>
  4627 + object, it would only be type errors on objects that were created
  4628 + explicitly that would cause exceptions, and in that case, type
  4629 + errors are much more likely to be the result of a coding error
  4630 + than invalid input.
  4631 + </para>
  4632 + <para>
  4633 + <emphasis>Why does the behavior of a type exception differ between
  4634 + the C and C++ API?</emphasis> There is no way to throw and catch
  4635 + exceptions in C short of something like
  4636 + <function>setjmp</function> and <function>longjmp</function>, and
  4637 + that approach is not portable across language barriers. Since the
  4638 + C API is often used from other languages, it's important to keep
  4639 + things as simple as possible. Starting in qpdf 10.5, exceptions
  4640 + that used to crash code using the C API will be written to stderr
  4641 + by default, and it is possible to register an error handler.
  4642 + There's no reason that the error handler can't simulate exception
  4643 + handling in some way, such as by using <function>setjmp</function>
  4644 + and <function>longjmp</function> or by setting some variable that
  4645 + can be checked after library calls are made. In retrospect, it
  4646 + might have been better if the C API object handle methods returned
  4647 + error codes like the other methods and set return values in
  4648 + passed-in pointers, but this would complicate both the
  4649 + implementation and the use of the library for a case that is
  4650 + actually quite rare and largely avoidable.
  4651 + </para>
  4652 + </sect1>
4563 </chapter> 4653 </chapter>
4564 <chapter id="ref.linearization"> 4654 <chapter id="ref.linearization">
4565 <title>Linearization</title> 4655 <title>Linearization</title>
@@ -5127,6 +5217,20 @@ print &quot;\n&quot;; @@ -5127,6 +5217,20 @@ print &quot;\n&quot;;
5127 <itemizedlist> 5217 <itemizedlist>
5128 <listitem> 5218 <listitem>
5129 <para> 5219 <para>
  5220 + Since qpdf version 8, using object accessor methods on an
  5221 + instance of <classname>QPDFObjectHandle</classname> may
  5222 + create warnings if the object is not of the expected type.
  5223 + These warnings now have an error code of
  5224 + <literal>qpdf_e_object</literal> instead of
  5225 + <literal>qpdf_e_damaged_pdf</literal>. Also, comments have
  5226 + been added to <filename>QPDFObjectHandle.hh</filename> to
  5227 + explain in more detail what the behavior is. See <xref
  5228 + linkend="ref.object-accessors"/> for a more in-depth
  5229 + discussion.
  5230 + </para>
  5231 + </listitem>
  5232 + <listitem>
  5233 + <para>
5130 Add <function>qpdf_get_last_string_length</function> to the 5234 Add <function>qpdf_get_last_string_length</function> to the
5131 C API to get the length of the last string that was 5235 C API to get the length of the last string that was
5132 returned. This is needed to handle strings that contain 5236 returned. This is needed to handle strings that contain
qpdf/qtest/qpdf.test
@@ -273,12 +273,16 @@ $td-&gt;runtest(&quot;check final version&quot;, @@ -273,12 +273,16 @@ $td-&gt;runtest(&quot;check final version&quot;,
273 show_ntests(); 273 show_ntests();
274 # ---------- 274 # ----------
275 $td->notify("--- Exceptions ---"); 275 $td->notify("--- Exceptions ---");
276 -$n_tests += 1; 276 +$n_tests += 2;
277 277
278 $td->runtest("check exception handling", 278 $td->runtest("check exception handling",
279 {$td->COMMAND => "test_driver 61 -"}, 279 {$td->COMMAND => "test_driver 61 -"},
280 {$td->FILE => "exceptions.out", $td->EXIT_STATUS => 0}, 280 {$td->FILE => "exceptions.out", $td->EXIT_STATUS => 0},
281 $td->NORMALIZE_NEWLINES); 281 $td->NORMALIZE_NEWLINES);
  282 +$td->runtest("check certain exception types",
  283 + {$td->COMMAND => "test_driver 81 -"},
  284 + {$td->STRING => "test 81 done\n", $td->EXIT_STATUS => 0},
  285 + $td->NORMALIZE_NEWLINES);
282 286
283 show_ntests(); 287 show_ntests();
284 # ---------- 288 # ----------
@@ -5303,6 +5307,7 @@ for (my $large = 0; $large &lt; $nlarge; ++$large) @@ -5303,6 +5307,7 @@ for (my $large = 0; $large &lt; $nlarge; ++$large)
5303 $td->NORMALIZE_NEWLINES); 5307 $td->NORMALIZE_NEWLINES);
5304 unlink $file; 5308 unlink $file;
5305 } 5309 }
  5310 +show_ntests();
5306 # ---------- 5311 # ----------
5307 5312
5308 cleanup(); 5313 cleanup();
qpdf/test_driver.cc
@@ -259,7 +259,7 @@ void runtest(int n, char const* filename1, char const* arg2) @@ -259,7 +259,7 @@ void runtest(int n, char const* filename1, char const* arg2)
259 pdf.processMemoryFile((std::string(filename1) + ".pdf").c_str(), 259 pdf.processMemoryFile((std::string(filename1) + ".pdf").c_str(),
260 p, size); 260 p, size);
261 } 261 }
262 - else if (n == 61) 262 + else if ((n == 61) || (n == 81))
263 { 263 {
264 // Ignore filename argument entirely 264 // Ignore filename argument entirely
265 } 265 }
@@ -3049,6 +3049,19 @@ void runtest(int n, char const* filename1, char const* arg2) @@ -3049,6 +3049,19 @@ void runtest(int n, char const* filename1, char const* arg2)
3049 w2.setQDFMode(true); 3049 w2.setQDFMode(true);
3050 w2.write(); 3050 w2.write();
3051 } 3051 }
  3052 + else if (n == 81)
  3053 + {
  3054 + // Exercise that type errors get their own special type
  3055 + try
  3056 + {
  3057 + QPDFObjectHandle::newNull().getIntValue();
  3058 + assert(false);
  3059 + }
  3060 + catch (QPDFExc& e)
  3061 + {
  3062 + assert(e.getErrorCode() == qpdf_e_object);
  3063 + }
  3064 + }
3052 else 3065 else
3053 { 3066 {
3054 throw std::runtime_error(std::string("invalid test ") + 3067 throw std::runtime_error(std::string("invalid test ") +