Commit 02281632ccbba3ef00a6968bfd697f4be836d0dd
1 parent
b55567a0
Add QUtil::utf8_to_ascii
Showing
5 changed files
with
52 additions
and
0 deletions
ChangeLog
| 1 | +2019-01-03 Jay Berkenbilt <ejb@ql.org> | |
| 2 | + | |
| 3 | + * Add method QUtil::utf8_to_ascii, which returns an ASCII string | |
| 4 | + for a UTF-8 string, replacing out-of-range characters with a | |
| 5 | + specified substitute. | |
| 6 | + | |
| 1 | 7 | 2019-01-02 Jay Berkenbilt <ejb@ql.org> |
| 2 | 8 | |
| 3 | 9 | * Add method QPDFObjectHandle::getResourceNames that returns a set | ... | ... |
include/qpdf/QUtil.hh
| ... | ... | @@ -152,6 +152,12 @@ namespace QUtil |
| 152 | 152 | QPDF_DLL |
| 153 | 153 | std::string toUTF16(unsigned long uval); |
| 154 | 154 | |
| 155 | + // Convert a UTF-8 encoded string to ASCII by replacing all | |
| 156 | + // characters outside of ascii with the given unknown_char. | |
| 157 | + QPDF_DLL | |
| 158 | + std::string utf8_to_ascii( | |
| 159 | + std::string const& utf8, char unknown_char = '?'); | |
| 160 | + | |
| 155 | 161 | // If secure random number generation is supported on your |
| 156 | 162 | // platform and qpdf was not compiled with insecure random number |
| 157 | 163 | // generation, this returns a cryptographically secure random | ... | ... |
libqpdf/QUtil.cc
| ... | ... | @@ -892,3 +892,26 @@ QUtil::parse_numrange(char const* range, int max) |
| 892 | 892 | } |
| 893 | 893 | return result; |
| 894 | 894 | } |
| 895 | + | |
| 896 | +std::string | |
| 897 | +QUtil::utf8_to_ascii(std::string const& utf8, char unknown_char) | |
| 898 | +{ | |
| 899 | + std::string ascii_value; | |
| 900 | + for (size_t i = 0; i < utf8.length(); ++i) | |
| 901 | + { | |
| 902 | + unsigned char ch = static_cast<unsigned char>(utf8.at(i)); | |
| 903 | + if (ch < 128) | |
| 904 | + { | |
| 905 | + ascii_value.append(1, ch); | |
| 906 | + } | |
| 907 | + else if ((ch & 0xc0) == 0x80) | |
| 908 | + { | |
| 909 | + // Ignore subsequent byte of UTF-8 encoded character | |
| 910 | + } | |
| 911 | + else | |
| 912 | + { | |
| 913 | + ascii_value.append(1, unknown_char); | |
| 914 | + } | |
| 915 | + } | |
| 916 | + return ascii_value; | |
| 917 | +} | ... | ... |
libtests/qtest/qutil/qutil.out
libtests/qutil.cc
| ... | ... | @@ -220,6 +220,17 @@ void to_utf16_test() |
| 220 | 220 | print_utf16(0x80000000UL); |
| 221 | 221 | } |
| 222 | 222 | |
| 223 | +void utf8_to_ascii_test() | |
| 224 | +{ | |
| 225 | + char const* input = "Does \317\200 have fingers?"; | |
| 226 | + std::cout << input | |
| 227 | + << std::endl | |
| 228 | + << QUtil::utf8_to_ascii(input) | |
| 229 | + << std::endl | |
| 230 | + << QUtil::utf8_to_ascii(input, '*') | |
| 231 | + << std::endl; | |
| 232 | +} | |
| 233 | + | |
| 223 | 234 | void print_whoami(char const* str) |
| 224 | 235 | { |
| 225 | 236 | PointerHolder<char> dup(true, QUtil::copy_string(str)); |
| ... | ... | @@ -328,6 +339,8 @@ int main(int argc, char* argv[]) |
| 328 | 339 | to_utf8_test(); |
| 329 | 340 | std::cout << "---- utf16" << std::endl; |
| 330 | 341 | to_utf16_test(); |
| 342 | + std::cout << "---- utf8_to_ascii" << std::endl; | |
| 343 | + utf8_to_ascii_test(); | |
| 331 | 344 | std::cout << "---- whoami" << std::endl; |
| 332 | 345 | get_whoami_test(); |
| 333 | 346 | std::cout << "---- file" << std::endl; | ... | ... |