Commit ea73bf72e0ff2577672eec405380456fa56bc6af

Authored by Jay Berkenbilt
1 parent 92613a1e

Further improvements to handling binary strings

ChangeLog
  1 +2021-12-19 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * C API: clarify documentation around string lengths. Add two new
  4 + methods: qpdf_oh_get_binary_string_value and
  5 + qpdf_oh_new_binary_string to make the need to handle the length
  6 + and data separate in more explicit in cases in which the string
  7 + data may contain embedded null characters.
  8 +
1 9 2021-12-17 Jay Berkenbilt <ejb@ql.org>
2 10  
3 11 * C API: simplify error handling for uncaught errors (never in a
... ...
include/qpdf/qpdf-c.h
... ... @@ -61,10 +61,12 @@
61 61 * subsequent function calls, sometimes even to different
62 62 * functions. If you want a string to last past the next qpdf call
63 63 * or after a call to qpdf_cleanup, you should make a copy of it.
64   - * It is possible for the internal string data to contain null
65   - * characters. To handle that case, you call
66   - * qpdf_get_last_string_length() to get the length of whatever
67   - * string was just returned.
  64 + *
  65 + * Since it is possible for a PDF string to contain null
  66 + * characters, a function that returns data originating from a PDF
  67 + * string may also contain null characters. To handle that case,
  68 + * you call qpdf_get_last_string_length() to get the length of
  69 + * whatever string was just returned. See STRING FUNCTIONS below.
68 70 *
69 71 * Most functions defined here have obvious counterparts that are
70 72 * methods to either QPDF or QPDFWriter. Please see comments in
... ... @@ -189,14 +191,6 @@ extern &quot;C&quot; {
189 191 QPDF_DLL
190 192 void qpdf_cleanup(qpdf_data* qpdf);
191 193  
192   - /* Return the length of the last string returned. This enables you
193   - * to retrieve the entire string for cases in which a char*
194   - * returned by one of the functions below points to a string with
195   - * embedded null characters.
196   - */
197   - QPDF_DLL
198   - size_t qpdf_get_last_string_length(qpdf_data qpdf);
199   -
200 194 /* ERROR REPORTING */
201 195  
202 196 /* Returns 1 if there is an error condition. The error condition
... ... @@ -716,10 +710,29 @@ extern &quot;C&quot; {
716 710 QPDF_DLL
717 711 char const* qpdf_oh_get_name(qpdf_data qpdf, qpdf_oh oh);
718 712  
  713 + /* Return the length of the last string returned. This enables you
  714 + * to retrieve the entire string for cases in which a char*
  715 + * returned by one of the functions below points to a string with
  716 + * embedded null characters. The function
  717 + * qpdf_oh_get_binary_string_value takes a length pointer, which
  718 + * can be useful if you are retrieving the value of a string that
  719 + * is expected to contain binary data, such as a checksum or
  720 + * document ID. It is always valid to call
  721 + * qpdf_get_last_string_length, but it is usually not necessary as
  722 + * C strings returned by the library are only expected to be able
  723 + * to contain null characters if their values originate from PDF
  724 + * strings in the input.
  725 + */
  726 + QPDF_DLL
  727 + size_t qpdf_get_last_string_length(qpdf_data qpdf);
  728 +
719 729 QPDF_DLL
720 730 char const* qpdf_oh_get_string_value(qpdf_data qpdf, qpdf_oh oh);
721 731 QPDF_DLL
722 732 char const* qpdf_oh_get_utf8_value(qpdf_data qpdf, qpdf_oh oh);
  733 + QPDF_DLL
  734 + char const* qpdf_oh_get_binary_string_value(
  735 + qpdf_data qpdf, qpdf_oh oh, size_t* length);
723 736  
724 737 QPDF_DLL
725 738 int qpdf_oh_get_array_n_items(qpdf_data qpdf, qpdf_oh oh);
... ... @@ -772,6 +785,12 @@ extern &quot;C&quot; {
772 785 qpdf_oh qpdf_oh_new_string(qpdf_data qpdf, char const* str);
773 786 QPDF_DLL
774 787 qpdf_oh qpdf_oh_new_unicode_string(qpdf_data qpdf, char const* utf8_str);
  788 + /* Use qpdf_oh_new_binary_string for creating a string that may
  789 + * contain atrbitary binary data including embedded null characters.
  790 + */
  791 + QPDF_DLL
  792 + qpdf_oh qpdf_oh_new_binary_string(
  793 + qpdf_data qpdf, char const* str, size_t length);
775 794 QPDF_DLL
776 795 qpdf_oh qpdf_oh_new_array(qpdf_data qpdf);
777 796 QPDF_DLL
... ...
libqpdf/qpdf-c.cc
... ... @@ -1292,6 +1292,20 @@ char const* qpdf_oh_get_utf8_value(qpdf_data qpdf, qpdf_oh oh)
1292 1292 });
1293 1293 }
1294 1294  
  1295 +char const* qpdf_oh_get_binary_string_value(
  1296 + qpdf_data qpdf, qpdf_oh oh, size_t* length)
  1297 +{
  1298 + return do_with_oh<char const*>(
  1299 + qpdf, oh,
  1300 + return_T<char const*>(""),
  1301 + [qpdf, length](QPDFObjectHandle& o) {
  1302 + QTC::TC("qpdf", "qpdf-c called qpdf_oh_get_binary_string_value");
  1303 + qpdf->tmp_string = o.getStringValue();
  1304 + *length = qpdf->tmp_string.length();
  1305 + return qpdf->tmp_string.c_str();
  1306 + });
  1307 +}
  1308 +
1295 1309 int qpdf_oh_get_array_n_items(qpdf_data qpdf, qpdf_oh oh)
1296 1310 {
1297 1311 return do_with_oh<int>(
... ... @@ -1425,6 +1439,14 @@ qpdf_oh qpdf_oh_new_unicode_string(qpdf_data qpdf, char const* utf8_str)
1425 1439 return new_object(qpdf, QPDFObjectHandle::newUnicodeString(utf8_str));
1426 1440 }
1427 1441  
  1442 +qpdf_oh qpdf_oh_new_binary_string(
  1443 + qpdf_data qpdf, char const* str, size_t length)
  1444 +{
  1445 + QTC::TC("qpdf", "qpdf-c called qpdf_oh_new_binary_string");
  1446 + return new_object(
  1447 + qpdf, QPDFObjectHandle::newString(std::string(str, length)));
  1448 +}
  1449 +
1428 1450 qpdf_oh qpdf_oh_new_array(qpdf_data qpdf)
1429 1451 {
1430 1452 QTC::TC("qpdf", "qpdf-c called qpdf_oh_new_array");
... ...
manual/release-notes.rst
... ... @@ -46,6 +46,12 @@ For a detailed list of changes, please see the file
46 46  
47 47 - C API Enhancements
48 48  
  49 + - Many thanks to M. Holger whose contributions have heavily
  50 + influenced these C API enhancements. His several suggestions,
  51 + pull requests, questions, and critical reading of documentation
  52 + and comments have resulted in significant usability improvements
  53 + to the C API.
  54 +
49 55 - Overhaul error handling for the object handle functions C API.
50 56 Some rare error conditions that would previously have caused a
51 57 crash are now trapped and reported, and the functions that
... ... @@ -80,6 +86,10 @@ For a detailed list of changes, please see the file
80 86  
81 87 - Add ``qpdf_oh_get_type_code`` and ``qpdf_oh_get_type_name``.
82 88  
  89 + - Add ``qpdf_oh_get_binary_string_value`` and
  90 + ``qpdf_oh_new_binary_string`` for making it easier to deal with
  91 + strings that contain embedded null characters.
  92 +
83 93 10.4.0: November 16, 2021
84 94 - Handling of Weak Cryptography Algorithms
85 95  
... ...
qpdf/qpdf-ctest.c
... ... @@ -781,8 +781,17 @@ static void test27(char const* infile,
781 781 assert(strcmp(qpdf_oh_get_string_value(qpdf, p_string_with_null),
782 782 "one") == 0);
783 783 assert(qpdf_get_last_string_length(qpdf) == 7);
  784 + /* memcmp adds a character to verify the trailing null */
784 785 assert(memcmp(qpdf_oh_get_string_value(qpdf, p_string_with_null),
785   - "one\000two", 7) == 0);
  786 + "one\000two", 8) == 0);
  787 + size_t length = 0;
  788 + p_string_with_null = qpdf_oh_new_binary_string(qpdf, "potato\000salad", 12);
  789 + /* memcmp adds a character to verify the trailing null */
  790 + assert(memcmp(qpdf_oh_get_binary_string_value(
  791 + qpdf, p_string_with_null, &length),
  792 + "potato\000salad", 13) == 0);
  793 + assert(qpdf_get_last_string_length(qpdf) == 12);
  794 + assert(length == 12);
786 795 }
787 796  
788 797 static void test28(char const* infile,
... ...
qpdf/qpdf.testcov
... ... @@ -625,3 +625,5 @@ qpdf-c stream data buf set 1
625 625 qpdf-c called qpdf_oh_get_page_content_data 0
626 626 qpdf-c called qpdf_oh_replace_stream_data 0
627 627 qpdf-c silence oh errors 0
  628 +qpdf-c called qpdf_oh_get_binary_string_value 0
  629 +qpdf-c called qpdf_oh_new_binary_string 0
... ...