Commit 02281632ccbba3ef00a6968bfd697f4be836d0dd

Authored by Jay Berkenbilt
1 parent b55567a0

Add QUtil::utf8_to_ascii

ChangeLog
  1 +2019-01-03 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * Add method QUtil::utf8_to_ascii, which returns an ASCII string
  4 + for a UTF-8 string, replacing out-of-range characters with a
  5 + specified substitute.
  6 +
1 2019-01-02 Jay Berkenbilt <ejb@ql.org> 7 2019-01-02 Jay Berkenbilt <ejb@ql.org>
2 8
3 * Add method QPDFObjectHandle::getResourceNames that returns a set 9 * Add method QPDFObjectHandle::getResourceNames that returns a set
include/qpdf/QUtil.hh
@@ -152,6 +152,12 @@ namespace QUtil @@ -152,6 +152,12 @@ namespace QUtil
152 QPDF_DLL 152 QPDF_DLL
153 std::string toUTF16(unsigned long uval); 153 std::string toUTF16(unsigned long uval);
154 154
  155 + // Convert a UTF-8 encoded string to ASCII by replacing all
  156 + // characters outside of ascii with the given unknown_char.
  157 + QPDF_DLL
  158 + std::string utf8_to_ascii(
  159 + std::string const& utf8, char unknown_char = '?');
  160 +
155 // If secure random number generation is supported on your 161 // If secure random number generation is supported on your
156 // platform and qpdf was not compiled with insecure random number 162 // platform and qpdf was not compiled with insecure random number
157 // generation, this returns a cryptographically secure random 163 // generation, this returns a cryptographically secure random
libqpdf/QUtil.cc
@@ -892,3 +892,26 @@ QUtil::parse_numrange(char const* range, int max) @@ -892,3 +892,26 @@ QUtil::parse_numrange(char const* range, int max)
892 } 892 }
893 return result; 893 return result;
894 } 894 }
  895 +
  896 +std::string
  897 +QUtil::utf8_to_ascii(std::string const& utf8, char unknown_char)
  898 +{
  899 + std::string ascii_value;
  900 + for (size_t i = 0; i < utf8.length(); ++i)
  901 + {
  902 + unsigned char ch = static_cast<unsigned char>(utf8.at(i));
  903 + if (ch < 128)
  904 + {
  905 + ascii_value.append(1, ch);
  906 + }
  907 + else if ((ch & 0xc0) == 0x80)
  908 + {
  909 + // Ignore subsequent byte of UTF-8 encoded character
  910 + }
  911 + else
  912 + {
  913 + ascii_value.append(1, unknown_char);
  914 + }
  915 + }
  916 + return ascii_value;
  917 +}
libtests/qtest/qutil/qutil.out
@@ -47,6 +47,10 @@ HAGOOGAMAGOOGLE: 0 @@ -47,6 +47,10 @@ HAGOOGAMAGOOGLE: 0
47 0xdead -> ff fd 47 0xdead -> ff fd
48 0x7fffffff -> ff fd 48 0x7fffffff -> ff fd
49 0x80000000 -> ff fd 49 0x80000000 -> ff fd
  50 +---- utf8_to_ascii
  51 +Does π have fingers?
  52 +Does ? have fingers?
  53 +Does * have fingers?
50 ---- whoami 54 ---- whoami
51 quack1 55 quack1
52 quack2 56 quack2
libtests/qutil.cc
@@ -220,6 +220,17 @@ void to_utf16_test() @@ -220,6 +220,17 @@ void to_utf16_test()
220 print_utf16(0x80000000UL); 220 print_utf16(0x80000000UL);
221 } 221 }
222 222
  223 +void utf8_to_ascii_test()
  224 +{
  225 + char const* input = "Does \317\200 have fingers?";
  226 + std::cout << input
  227 + << std::endl
  228 + << QUtil::utf8_to_ascii(input)
  229 + << std::endl
  230 + << QUtil::utf8_to_ascii(input, '*')
  231 + << std::endl;
  232 +}
  233 +
223 void print_whoami(char const* str) 234 void print_whoami(char const* str)
224 { 235 {
225 PointerHolder<char> dup(true, QUtil::copy_string(str)); 236 PointerHolder<char> dup(true, QUtil::copy_string(str));
@@ -328,6 +339,8 @@ int main(int argc, char* argv[]) @@ -328,6 +339,8 @@ int main(int argc, char* argv[])
328 to_utf8_test(); 339 to_utf8_test();
329 std::cout << "---- utf16" << std::endl; 340 std::cout << "---- utf16" << std::endl;
330 to_utf16_test(); 341 to_utf16_test();
  342 + std::cout << "---- utf8_to_ascii" << std::endl;
  343 + utf8_to_ascii_test();
331 std::cout << "---- whoami" << std::endl; 344 std::cout << "---- whoami" << std::endl;
332 get_whoami_test(); 345 get_whoami_test();
333 std::cout << "---- file" << std::endl; 346 std::cout << "---- file" << std::endl;