Commit 02281632ccbba3ef00a6968bfd697f4be836d0dd
1 parent
b55567a0
Add QUtil::utf8_to_ascii
Showing
5 changed files
with
52 additions
and
0 deletions
ChangeLog
| 1 | +2019-01-03 Jay Berkenbilt <ejb@ql.org> | ||
| 2 | + | ||
| 3 | + * Add method QUtil::utf8_to_ascii, which returns an ASCII string | ||
| 4 | + for a UTF-8 string, replacing out-of-range characters with a | ||
| 5 | + specified substitute. | ||
| 6 | + | ||
| 1 | 2019-01-02 Jay Berkenbilt <ejb@ql.org> | 7 | 2019-01-02 Jay Berkenbilt <ejb@ql.org> |
| 2 | 8 | ||
| 3 | * Add method QPDFObjectHandle::getResourceNames that returns a set | 9 | * Add method QPDFObjectHandle::getResourceNames that returns a set |
include/qpdf/QUtil.hh
| @@ -152,6 +152,12 @@ namespace QUtil | @@ -152,6 +152,12 @@ namespace QUtil | ||
| 152 | QPDF_DLL | 152 | QPDF_DLL |
| 153 | std::string toUTF16(unsigned long uval); | 153 | std::string toUTF16(unsigned long uval); |
| 154 | 154 | ||
| 155 | + // Convert a UTF-8 encoded string to ASCII by replacing all | ||
| 156 | + // characters outside of ascii with the given unknown_char. | ||
| 157 | + QPDF_DLL | ||
| 158 | + std::string utf8_to_ascii( | ||
| 159 | + std::string const& utf8, char unknown_char = '?'); | ||
| 160 | + | ||
| 155 | // If secure random number generation is supported on your | 161 | // If secure random number generation is supported on your |
| 156 | // platform and qpdf was not compiled with insecure random number | 162 | // platform and qpdf was not compiled with insecure random number |
| 157 | // generation, this returns a cryptographically secure random | 163 | // generation, this returns a cryptographically secure random |
libqpdf/QUtil.cc
| @@ -892,3 +892,26 @@ QUtil::parse_numrange(char const* range, int max) | @@ -892,3 +892,26 @@ QUtil::parse_numrange(char const* range, int max) | ||
| 892 | } | 892 | } |
| 893 | return result; | 893 | return result; |
| 894 | } | 894 | } |
| 895 | + | ||
| 896 | +std::string | ||
| 897 | +QUtil::utf8_to_ascii(std::string const& utf8, char unknown_char) | ||
| 898 | +{ | ||
| 899 | + std::string ascii_value; | ||
| 900 | + for (size_t i = 0; i < utf8.length(); ++i) | ||
| 901 | + { | ||
| 902 | + unsigned char ch = static_cast<unsigned char>(utf8.at(i)); | ||
| 903 | + if (ch < 128) | ||
| 904 | + { | ||
| 905 | + ascii_value.append(1, ch); | ||
| 906 | + } | ||
| 907 | + else if ((ch & 0xc0) == 0x80) | ||
| 908 | + { | ||
| 909 | + // Ignore subsequent byte of UTF-8 encoded character | ||
| 910 | + } | ||
| 911 | + else | ||
| 912 | + { | ||
| 913 | + ascii_value.append(1, unknown_char); | ||
| 914 | + } | ||
| 915 | + } | ||
| 916 | + return ascii_value; | ||
| 917 | +} |
libtests/qtest/qutil/qutil.out
| @@ -47,6 +47,10 @@ HAGOOGAMAGOOGLE: 0 | @@ -47,6 +47,10 @@ HAGOOGAMAGOOGLE: 0 | ||
| 47 | 0xdead -> ff fd | 47 | 0xdead -> ff fd |
| 48 | 0x7fffffff -> ff fd | 48 | 0x7fffffff -> ff fd |
| 49 | 0x80000000 -> ff fd | 49 | 0x80000000 -> ff fd |
| 50 | +---- utf8_to_ascii | ||
| 51 | +Does π have fingers? | ||
| 52 | +Does ? have fingers? | ||
| 53 | +Does * have fingers? | ||
| 50 | ---- whoami | 54 | ---- whoami |
| 51 | quack1 | 55 | quack1 |
| 52 | quack2 | 56 | quack2 |
libtests/qutil.cc
| @@ -220,6 +220,17 @@ void to_utf16_test() | @@ -220,6 +220,17 @@ void to_utf16_test() | ||
| 220 | print_utf16(0x80000000UL); | 220 | print_utf16(0x80000000UL); |
| 221 | } | 221 | } |
| 222 | 222 | ||
| 223 | +void utf8_to_ascii_test() | ||
| 224 | +{ | ||
| 225 | + char const* input = "Does \317\200 have fingers?"; | ||
| 226 | + std::cout << input | ||
| 227 | + << std::endl | ||
| 228 | + << QUtil::utf8_to_ascii(input) | ||
| 229 | + << std::endl | ||
| 230 | + << QUtil::utf8_to_ascii(input, '*') | ||
| 231 | + << std::endl; | ||
| 232 | +} | ||
| 233 | + | ||
| 223 | void print_whoami(char const* str) | 234 | void print_whoami(char const* str) |
| 224 | { | 235 | { |
| 225 | PointerHolder<char> dup(true, QUtil::copy_string(str)); | 236 | PointerHolder<char> dup(true, QUtil::copy_string(str)); |
| @@ -328,6 +339,8 @@ int main(int argc, char* argv[]) | @@ -328,6 +339,8 @@ int main(int argc, char* argv[]) | ||
| 328 | to_utf8_test(); | 339 | to_utf8_test(); |
| 329 | std::cout << "---- utf16" << std::endl; | 340 | std::cout << "---- utf16" << std::endl; |
| 330 | to_utf16_test(); | 341 | to_utf16_test(); |
| 342 | + std::cout << "---- utf8_to_ascii" << std::endl; | ||
| 343 | + utf8_to_ascii_test(); | ||
| 331 | std::cout << "---- whoami" << std::endl; | 344 | std::cout << "---- whoami" << std::endl; |
| 332 | get_whoami_test(); | 345 | get_whoami_test(); |
| 333 | std::cout << "---- file" << std::endl; | 346 | std::cout << "---- file" << std::endl; |