Commit ea73bf72e0ff2577672eec405380456fa56bc6af

Authored by Jay Berkenbilt
1 parent 92613a1e

Further improvements to handling binary strings

ChangeLog
  1 +2021-12-19 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * C API: clarify documentation around string lengths. Add two new
  4 + methods: qpdf_oh_get_binary_string_value and
  5 + qpdf_oh_new_binary_string to make the need to handle the length
  6 + and data separate in more explicit in cases in which the string
  7 + data may contain embedded null characters.
  8 +
1 2021-12-17 Jay Berkenbilt <ejb@ql.org> 9 2021-12-17 Jay Berkenbilt <ejb@ql.org>
2 10
3 * C API: simplify error handling for uncaught errors (never in a 11 * C API: simplify error handling for uncaught errors (never in a
include/qpdf/qpdf-c.h
@@ -61,10 +61,12 @@ @@ -61,10 +61,12 @@
61 * subsequent function calls, sometimes even to different 61 * subsequent function calls, sometimes even to different
62 * functions. If you want a string to last past the next qpdf call 62 * functions. If you want a string to last past the next qpdf call
63 * or after a call to qpdf_cleanup, you should make a copy of it. 63 * or after a call to qpdf_cleanup, you should make a copy of it.
64 - * It is possible for the internal string data to contain null  
65 - * characters. To handle that case, you call  
66 - * qpdf_get_last_string_length() to get the length of whatever  
67 - * string was just returned. 64 + *
  65 + * Since it is possible for a PDF string to contain null
  66 + * characters, a function that returns data originating from a PDF
  67 + * string may also contain null characters. To handle that case,
  68 + * you call qpdf_get_last_string_length() to get the length of
  69 + * whatever string was just returned. See STRING FUNCTIONS below.
68 * 70 *
69 * Most functions defined here have obvious counterparts that are 71 * Most functions defined here have obvious counterparts that are
70 * methods to either QPDF or QPDFWriter. Please see comments in 72 * methods to either QPDF or QPDFWriter. Please see comments in
@@ -189,14 +191,6 @@ extern &quot;C&quot; { @@ -189,14 +191,6 @@ extern &quot;C&quot; {
189 QPDF_DLL 191 QPDF_DLL
190 void qpdf_cleanup(qpdf_data* qpdf); 192 void qpdf_cleanup(qpdf_data* qpdf);
191 193
192 - /* Return the length of the last string returned. This enables you  
193 - * to retrieve the entire string for cases in which a char*  
194 - * returned by one of the functions below points to a string with  
195 - * embedded null characters.  
196 - */  
197 - QPDF_DLL  
198 - size_t qpdf_get_last_string_length(qpdf_data qpdf);  
199 -  
200 /* ERROR REPORTING */ 194 /* ERROR REPORTING */
201 195
202 /* Returns 1 if there is an error condition. The error condition 196 /* Returns 1 if there is an error condition. The error condition
@@ -716,10 +710,29 @@ extern &quot;C&quot; { @@ -716,10 +710,29 @@ extern &quot;C&quot; {
716 QPDF_DLL 710 QPDF_DLL
717 char const* qpdf_oh_get_name(qpdf_data qpdf, qpdf_oh oh); 711 char const* qpdf_oh_get_name(qpdf_data qpdf, qpdf_oh oh);
718 712
  713 + /* Return the length of the last string returned. This enables you
  714 + * to retrieve the entire string for cases in which a char*
  715 + * returned by one of the functions below points to a string with
  716 + * embedded null characters. The function
  717 + * qpdf_oh_get_binary_string_value takes a length pointer, which
  718 + * can be useful if you are retrieving the value of a string that
  719 + * is expected to contain binary data, such as a checksum or
  720 + * document ID. It is always valid to call
  721 + * qpdf_get_last_string_length, but it is usually not necessary as
  722 + * C strings returned by the library are only expected to be able
  723 + * to contain null characters if their values originate from PDF
  724 + * strings in the input.
  725 + */
  726 + QPDF_DLL
  727 + size_t qpdf_get_last_string_length(qpdf_data qpdf);
  728 +
719 QPDF_DLL 729 QPDF_DLL
720 char const* qpdf_oh_get_string_value(qpdf_data qpdf, qpdf_oh oh); 730 char const* qpdf_oh_get_string_value(qpdf_data qpdf, qpdf_oh oh);
721 QPDF_DLL 731 QPDF_DLL
722 char const* qpdf_oh_get_utf8_value(qpdf_data qpdf, qpdf_oh oh); 732 char const* qpdf_oh_get_utf8_value(qpdf_data qpdf, qpdf_oh oh);
  733 + QPDF_DLL
  734 + char const* qpdf_oh_get_binary_string_value(
  735 + qpdf_data qpdf, qpdf_oh oh, size_t* length);
723 736
724 QPDF_DLL 737 QPDF_DLL
725 int qpdf_oh_get_array_n_items(qpdf_data qpdf, qpdf_oh oh); 738 int qpdf_oh_get_array_n_items(qpdf_data qpdf, qpdf_oh oh);
@@ -772,6 +785,12 @@ extern &quot;C&quot; { @@ -772,6 +785,12 @@ extern &quot;C&quot; {
772 qpdf_oh qpdf_oh_new_string(qpdf_data qpdf, char const* str); 785 qpdf_oh qpdf_oh_new_string(qpdf_data qpdf, char const* str);
773 QPDF_DLL 786 QPDF_DLL
774 qpdf_oh qpdf_oh_new_unicode_string(qpdf_data qpdf, char const* utf8_str); 787 qpdf_oh qpdf_oh_new_unicode_string(qpdf_data qpdf, char const* utf8_str);
  788 + /* Use qpdf_oh_new_binary_string for creating a string that may
  789 + * contain atrbitary binary data including embedded null characters.
  790 + */
  791 + QPDF_DLL
  792 + qpdf_oh qpdf_oh_new_binary_string(
  793 + qpdf_data qpdf, char const* str, size_t length);
775 QPDF_DLL 794 QPDF_DLL
776 qpdf_oh qpdf_oh_new_array(qpdf_data qpdf); 795 qpdf_oh qpdf_oh_new_array(qpdf_data qpdf);
777 QPDF_DLL 796 QPDF_DLL
libqpdf/qpdf-c.cc
@@ -1292,6 +1292,20 @@ char const* qpdf_oh_get_utf8_value(qpdf_data qpdf, qpdf_oh oh) @@ -1292,6 +1292,20 @@ char const* qpdf_oh_get_utf8_value(qpdf_data qpdf, qpdf_oh oh)
1292 }); 1292 });
1293 } 1293 }
1294 1294
  1295 +char const* qpdf_oh_get_binary_string_value(
  1296 + qpdf_data qpdf, qpdf_oh oh, size_t* length)
  1297 +{
  1298 + return do_with_oh<char const*>(
  1299 + qpdf, oh,
  1300 + return_T<char const*>(""),
  1301 + [qpdf, length](QPDFObjectHandle& o) {
  1302 + QTC::TC("qpdf", "qpdf-c called qpdf_oh_get_binary_string_value");
  1303 + qpdf->tmp_string = o.getStringValue();
  1304 + *length = qpdf->tmp_string.length();
  1305 + return qpdf->tmp_string.c_str();
  1306 + });
  1307 +}
  1308 +
1295 int qpdf_oh_get_array_n_items(qpdf_data qpdf, qpdf_oh oh) 1309 int qpdf_oh_get_array_n_items(qpdf_data qpdf, qpdf_oh oh)
1296 { 1310 {
1297 return do_with_oh<int>( 1311 return do_with_oh<int>(
@@ -1425,6 +1439,14 @@ qpdf_oh qpdf_oh_new_unicode_string(qpdf_data qpdf, char const* utf8_str) @@ -1425,6 +1439,14 @@ qpdf_oh qpdf_oh_new_unicode_string(qpdf_data qpdf, char const* utf8_str)
1425 return new_object(qpdf, QPDFObjectHandle::newUnicodeString(utf8_str)); 1439 return new_object(qpdf, QPDFObjectHandle::newUnicodeString(utf8_str));
1426 } 1440 }
1427 1441
  1442 +qpdf_oh qpdf_oh_new_binary_string(
  1443 + qpdf_data qpdf, char const* str, size_t length)
  1444 +{
  1445 + QTC::TC("qpdf", "qpdf-c called qpdf_oh_new_binary_string");
  1446 + return new_object(
  1447 + qpdf, QPDFObjectHandle::newString(std::string(str, length)));
  1448 +}
  1449 +
1428 qpdf_oh qpdf_oh_new_array(qpdf_data qpdf) 1450 qpdf_oh qpdf_oh_new_array(qpdf_data qpdf)
1429 { 1451 {
1430 QTC::TC("qpdf", "qpdf-c called qpdf_oh_new_array"); 1452 QTC::TC("qpdf", "qpdf-c called qpdf_oh_new_array");
manual/release-notes.rst
@@ -46,6 +46,12 @@ For a detailed list of changes, please see the file @@ -46,6 +46,12 @@ For a detailed list of changes, please see the file
46 46
47 - C API Enhancements 47 - C API Enhancements
48 48
  49 + - Many thanks to M. Holger whose contributions have heavily
  50 + influenced these C API enhancements. His several suggestions,
  51 + pull requests, questions, and critical reading of documentation
  52 + and comments have resulted in significant usability improvements
  53 + to the C API.
  54 +
49 - Overhaul error handling for the object handle functions C API. 55 - Overhaul error handling for the object handle functions C API.
50 Some rare error conditions that would previously have caused a 56 Some rare error conditions that would previously have caused a
51 crash are now trapped and reported, and the functions that 57 crash are now trapped and reported, and the functions that
@@ -80,6 +86,10 @@ For a detailed list of changes, please see the file @@ -80,6 +86,10 @@ For a detailed list of changes, please see the file
80 86
81 - Add ``qpdf_oh_get_type_code`` and ``qpdf_oh_get_type_name``. 87 - Add ``qpdf_oh_get_type_code`` and ``qpdf_oh_get_type_name``.
82 88
  89 + - Add ``qpdf_oh_get_binary_string_value`` and
  90 + ``qpdf_oh_new_binary_string`` for making it easier to deal with
  91 + strings that contain embedded null characters.
  92 +
83 10.4.0: November 16, 2021 93 10.4.0: November 16, 2021
84 - Handling of Weak Cryptography Algorithms 94 - Handling of Weak Cryptography Algorithms
85 95
qpdf/qpdf-ctest.c
@@ -781,8 +781,17 @@ static void test27(char const* infile, @@ -781,8 +781,17 @@ static void test27(char const* infile,
781 assert(strcmp(qpdf_oh_get_string_value(qpdf, p_string_with_null), 781 assert(strcmp(qpdf_oh_get_string_value(qpdf, p_string_with_null),
782 "one") == 0); 782 "one") == 0);
783 assert(qpdf_get_last_string_length(qpdf) == 7); 783 assert(qpdf_get_last_string_length(qpdf) == 7);
  784 + /* memcmp adds a character to verify the trailing null */
784 assert(memcmp(qpdf_oh_get_string_value(qpdf, p_string_with_null), 785 assert(memcmp(qpdf_oh_get_string_value(qpdf, p_string_with_null),
785 - "one\000two", 7) == 0); 786 + "one\000two", 8) == 0);
  787 + size_t length = 0;
  788 + p_string_with_null = qpdf_oh_new_binary_string(qpdf, "potato\000salad", 12);
  789 + /* memcmp adds a character to verify the trailing null */
  790 + assert(memcmp(qpdf_oh_get_binary_string_value(
  791 + qpdf, p_string_with_null, &length),
  792 + "potato\000salad", 13) == 0);
  793 + assert(qpdf_get_last_string_length(qpdf) == 12);
  794 + assert(length == 12);
786 } 795 }
787 796
788 static void test28(char const* infile, 797 static void test28(char const* infile,
qpdf/qpdf.testcov
@@ -625,3 +625,5 @@ qpdf-c stream data buf set 1 @@ -625,3 +625,5 @@ qpdf-c stream data buf set 1
625 qpdf-c called qpdf_oh_get_page_content_data 0 625 qpdf-c called qpdf_oh_get_page_content_data 0
626 qpdf-c called qpdf_oh_replace_stream_data 0 626 qpdf-c called qpdf_oh_replace_stream_data 0
627 qpdf-c silence oh errors 0 627 qpdf-c silence oh errors 0
  628 +qpdf-c called qpdf_oh_get_binary_string_value 0
  629 +qpdf-c called qpdf_oh_new_binary_string 0