Commit ea73bf72e0ff2577672eec405380456fa56bc6af
1 parent
92613a1e
Further improvements to handling binary strings
Showing
6 changed files
with
83 additions
and
13 deletions
ChangeLog
| 1 | +2021-12-19 Jay Berkenbilt <ejb@ql.org> | |
| 2 | + | |
| 3 | + * C API: clarify documentation around string lengths. Add two new | |
| 4 | + methods: qpdf_oh_get_binary_string_value and | |
| 5 | + qpdf_oh_new_binary_string to make the need to handle the length | |
| 6 | + and data separate in more explicit in cases in which the string | |
| 7 | + data may contain embedded null characters. | |
| 8 | + | |
| 1 | 9 | 2021-12-17 Jay Berkenbilt <ejb@ql.org> |
| 2 | 10 | |
| 3 | 11 | * C API: simplify error handling for uncaught errors (never in a | ... | ... |
include/qpdf/qpdf-c.h
| ... | ... | @@ -61,10 +61,12 @@ |
| 61 | 61 | * subsequent function calls, sometimes even to different |
| 62 | 62 | * functions. If you want a string to last past the next qpdf call |
| 63 | 63 | * or after a call to qpdf_cleanup, you should make a copy of it. |
| 64 | - * It is possible for the internal string data to contain null | |
| 65 | - * characters. To handle that case, you call | |
| 66 | - * qpdf_get_last_string_length() to get the length of whatever | |
| 67 | - * string was just returned. | |
| 64 | + * | |
| 65 | + * Since it is possible for a PDF string to contain null | |
| 66 | + * characters, a function that returns data originating from a PDF | |
| 67 | + * string may also contain null characters. To handle that case, | |
| 68 | + * you call qpdf_get_last_string_length() to get the length of | |
| 69 | + * whatever string was just returned. See STRING FUNCTIONS below. | |
| 68 | 70 | * |
| 69 | 71 | * Most functions defined here have obvious counterparts that are |
| 70 | 72 | * methods to either QPDF or QPDFWriter. Please see comments in |
| ... | ... | @@ -189,14 +191,6 @@ extern "C" { |
| 189 | 191 | QPDF_DLL |
| 190 | 192 | void qpdf_cleanup(qpdf_data* qpdf); |
| 191 | 193 | |
| 192 | - /* Return the length of the last string returned. This enables you | |
| 193 | - * to retrieve the entire string for cases in which a char* | |
| 194 | - * returned by one of the functions below points to a string with | |
| 195 | - * embedded null characters. | |
| 196 | - */ | |
| 197 | - QPDF_DLL | |
| 198 | - size_t qpdf_get_last_string_length(qpdf_data qpdf); | |
| 199 | - | |
| 200 | 194 | /* ERROR REPORTING */ |
| 201 | 195 | |
| 202 | 196 | /* Returns 1 if there is an error condition. The error condition |
| ... | ... | @@ -716,10 +710,29 @@ extern "C" { |
| 716 | 710 | QPDF_DLL |
| 717 | 711 | char const* qpdf_oh_get_name(qpdf_data qpdf, qpdf_oh oh); |
| 718 | 712 | |
| 713 | + /* Return the length of the last string returned. This enables you | |
| 714 | + * to retrieve the entire string for cases in which a char* | |
| 715 | + * returned by one of the functions below points to a string with | |
| 716 | + * embedded null characters. The function | |
| 717 | + * qpdf_oh_get_binary_string_value takes a length pointer, which | |
| 718 | + * can be useful if you are retrieving the value of a string that | |
| 719 | + * is expected to contain binary data, such as a checksum or | |
| 720 | + * document ID. It is always valid to call | |
| 721 | + * qpdf_get_last_string_length, but it is usually not necessary as | |
| 722 | + * C strings returned by the library are only expected to be able | |
| 723 | + * to contain null characters if their values originate from PDF | |
| 724 | + * strings in the input. | |
| 725 | + */ | |
| 726 | + QPDF_DLL | |
| 727 | + size_t qpdf_get_last_string_length(qpdf_data qpdf); | |
| 728 | + | |
| 719 | 729 | QPDF_DLL |
| 720 | 730 | char const* qpdf_oh_get_string_value(qpdf_data qpdf, qpdf_oh oh); |
| 721 | 731 | QPDF_DLL |
| 722 | 732 | char const* qpdf_oh_get_utf8_value(qpdf_data qpdf, qpdf_oh oh); |
| 733 | + QPDF_DLL | |
| 734 | + char const* qpdf_oh_get_binary_string_value( | |
| 735 | + qpdf_data qpdf, qpdf_oh oh, size_t* length); | |
| 723 | 736 | |
| 724 | 737 | QPDF_DLL |
| 725 | 738 | int qpdf_oh_get_array_n_items(qpdf_data qpdf, qpdf_oh oh); |
| ... | ... | @@ -772,6 +785,12 @@ extern "C" { |
| 772 | 785 | qpdf_oh qpdf_oh_new_string(qpdf_data qpdf, char const* str); |
| 773 | 786 | QPDF_DLL |
| 774 | 787 | qpdf_oh qpdf_oh_new_unicode_string(qpdf_data qpdf, char const* utf8_str); |
| 788 | + /* Use qpdf_oh_new_binary_string for creating a string that may | |
| 789 | + * contain atrbitary binary data including embedded null characters. | |
| 790 | + */ | |
| 791 | + QPDF_DLL | |
| 792 | + qpdf_oh qpdf_oh_new_binary_string( | |
| 793 | + qpdf_data qpdf, char const* str, size_t length); | |
| 775 | 794 | QPDF_DLL |
| 776 | 795 | qpdf_oh qpdf_oh_new_array(qpdf_data qpdf); |
| 777 | 796 | QPDF_DLL | ... | ... |
libqpdf/qpdf-c.cc
| ... | ... | @@ -1292,6 +1292,20 @@ char const* qpdf_oh_get_utf8_value(qpdf_data qpdf, qpdf_oh oh) |
| 1292 | 1292 | }); |
| 1293 | 1293 | } |
| 1294 | 1294 | |
| 1295 | +char const* qpdf_oh_get_binary_string_value( | |
| 1296 | + qpdf_data qpdf, qpdf_oh oh, size_t* length) | |
| 1297 | +{ | |
| 1298 | + return do_with_oh<char const*>( | |
| 1299 | + qpdf, oh, | |
| 1300 | + return_T<char const*>(""), | |
| 1301 | + [qpdf, length](QPDFObjectHandle& o) { | |
| 1302 | + QTC::TC("qpdf", "qpdf-c called qpdf_oh_get_binary_string_value"); | |
| 1303 | + qpdf->tmp_string = o.getStringValue(); | |
| 1304 | + *length = qpdf->tmp_string.length(); | |
| 1305 | + return qpdf->tmp_string.c_str(); | |
| 1306 | + }); | |
| 1307 | +} | |
| 1308 | + | |
| 1295 | 1309 | int qpdf_oh_get_array_n_items(qpdf_data qpdf, qpdf_oh oh) |
| 1296 | 1310 | { |
| 1297 | 1311 | return do_with_oh<int>( |
| ... | ... | @@ -1425,6 +1439,14 @@ qpdf_oh qpdf_oh_new_unicode_string(qpdf_data qpdf, char const* utf8_str) |
| 1425 | 1439 | return new_object(qpdf, QPDFObjectHandle::newUnicodeString(utf8_str)); |
| 1426 | 1440 | } |
| 1427 | 1441 | |
| 1442 | +qpdf_oh qpdf_oh_new_binary_string( | |
| 1443 | + qpdf_data qpdf, char const* str, size_t length) | |
| 1444 | +{ | |
| 1445 | + QTC::TC("qpdf", "qpdf-c called qpdf_oh_new_binary_string"); | |
| 1446 | + return new_object( | |
| 1447 | + qpdf, QPDFObjectHandle::newString(std::string(str, length))); | |
| 1448 | +} | |
| 1449 | + | |
| 1428 | 1450 | qpdf_oh qpdf_oh_new_array(qpdf_data qpdf) |
| 1429 | 1451 | { |
| 1430 | 1452 | QTC::TC("qpdf", "qpdf-c called qpdf_oh_new_array"); | ... | ... |
manual/release-notes.rst
| ... | ... | @@ -46,6 +46,12 @@ For a detailed list of changes, please see the file |
| 46 | 46 | |
| 47 | 47 | - C API Enhancements |
| 48 | 48 | |
| 49 | + - Many thanks to M. Holger whose contributions have heavily | |
| 50 | + influenced these C API enhancements. His several suggestions, | |
| 51 | + pull requests, questions, and critical reading of documentation | |
| 52 | + and comments have resulted in significant usability improvements | |
| 53 | + to the C API. | |
| 54 | + | |
| 49 | 55 | - Overhaul error handling for the object handle functions C API. |
| 50 | 56 | Some rare error conditions that would previously have caused a |
| 51 | 57 | crash are now trapped and reported, and the functions that |
| ... | ... | @@ -80,6 +86,10 @@ For a detailed list of changes, please see the file |
| 80 | 86 | |
| 81 | 87 | - Add ``qpdf_oh_get_type_code`` and ``qpdf_oh_get_type_name``. |
| 82 | 88 | |
| 89 | + - Add ``qpdf_oh_get_binary_string_value`` and | |
| 90 | + ``qpdf_oh_new_binary_string`` for making it easier to deal with | |
| 91 | + strings that contain embedded null characters. | |
| 92 | + | |
| 83 | 93 | 10.4.0: November 16, 2021 |
| 84 | 94 | - Handling of Weak Cryptography Algorithms |
| 85 | 95 | ... | ... |
qpdf/qpdf-ctest.c
| ... | ... | @@ -781,8 +781,17 @@ static void test27(char const* infile, |
| 781 | 781 | assert(strcmp(qpdf_oh_get_string_value(qpdf, p_string_with_null), |
| 782 | 782 | "one") == 0); |
| 783 | 783 | assert(qpdf_get_last_string_length(qpdf) == 7); |
| 784 | + /* memcmp adds a character to verify the trailing null */ | |
| 784 | 785 | assert(memcmp(qpdf_oh_get_string_value(qpdf, p_string_with_null), |
| 785 | - "one\000two", 7) == 0); | |
| 786 | + "one\000two", 8) == 0); | |
| 787 | + size_t length = 0; | |
| 788 | + p_string_with_null = qpdf_oh_new_binary_string(qpdf, "potato\000salad", 12); | |
| 789 | + /* memcmp adds a character to verify the trailing null */ | |
| 790 | + assert(memcmp(qpdf_oh_get_binary_string_value( | |
| 791 | + qpdf, p_string_with_null, &length), | |
| 792 | + "potato\000salad", 13) == 0); | |
| 793 | + assert(qpdf_get_last_string_length(qpdf) == 12); | |
| 794 | + assert(length == 12); | |
| 786 | 795 | } |
| 787 | 796 | |
| 788 | 797 | static void test28(char const* infile, | ... | ... |
qpdf/qpdf.testcov
| ... | ... | @@ -625,3 +625,5 @@ qpdf-c stream data buf set 1 |
| 625 | 625 | qpdf-c called qpdf_oh_get_page_content_data 0 |
| 626 | 626 | qpdf-c called qpdf_oh_replace_stream_data 0 |
| 627 | 627 | qpdf-c silence oh errors 0 |
| 628 | +qpdf-c called qpdf_oh_get_binary_string_value 0 | |
| 629 | +qpdf-c called qpdf_oh_new_binary_string 0 | ... | ... |