Commit 02281632ccbba3ef00a6968bfd697f4be836d0dd

Authored by Jay Berkenbilt
1 parent b55567a0

Add QUtil::utf8_to_ascii

ChangeLog
  1 +2019-01-03 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * Add method QUtil::utf8_to_ascii, which returns an ASCII string
  4 + for a UTF-8 string, replacing out-of-range characters with a
  5 + specified substitute.
  6 +
1 7 2019-01-02 Jay Berkenbilt <ejb@ql.org>
2 8  
3 9 * Add method QPDFObjectHandle::getResourceNames that returns a set
... ...
include/qpdf/QUtil.hh
... ... @@ -152,6 +152,12 @@ namespace QUtil
152 152 QPDF_DLL
153 153 std::string toUTF16(unsigned long uval);
154 154  
  155 + // Convert a UTF-8 encoded string to ASCII by replacing all
  156 + // characters outside of ascii with the given unknown_char.
  157 + QPDF_DLL
  158 + std::string utf8_to_ascii(
  159 + std::string const& utf8, char unknown_char = '?');
  160 +
155 161 // If secure random number generation is supported on your
156 162 // platform and qpdf was not compiled with insecure random number
157 163 // generation, this returns a cryptographically secure random
... ...
libqpdf/QUtil.cc
... ... @@ -892,3 +892,26 @@ QUtil::parse_numrange(char const* range, int max)
892 892 }
893 893 return result;
894 894 }
  895 +
  896 +std::string
  897 +QUtil::utf8_to_ascii(std::string const& utf8, char unknown_char)
  898 +{
  899 + std::string ascii_value;
  900 + for (size_t i = 0; i < utf8.length(); ++i)
  901 + {
  902 + unsigned char ch = static_cast<unsigned char>(utf8.at(i));
  903 + if (ch < 128)
  904 + {
  905 + ascii_value.append(1, ch);
  906 + }
  907 + else if ((ch & 0xc0) == 0x80)
  908 + {
  909 + // Ignore subsequent byte of UTF-8 encoded character
  910 + }
  911 + else
  912 + {
  913 + ascii_value.append(1, unknown_char);
  914 + }
  915 + }
  916 + return ascii_value;
  917 +}
... ...
libtests/qtest/qutil/qutil.out
... ... @@ -47,6 +47,10 @@ HAGOOGAMAGOOGLE: 0
47 47 0xdead -> ff fd
48 48 0x7fffffff -> ff fd
49 49 0x80000000 -> ff fd
  50 +---- utf8_to_ascii
  51 +Does π have fingers?
  52 +Does ? have fingers?
  53 +Does * have fingers?
50 54 ---- whoami
51 55 quack1
52 56 quack2
... ...
libtests/qutil.cc
... ... @@ -220,6 +220,17 @@ void to_utf16_test()
220 220 print_utf16(0x80000000UL);
221 221 }
222 222  
  223 +void utf8_to_ascii_test()
  224 +{
  225 + char const* input = "Does \317\200 have fingers?";
  226 + std::cout << input
  227 + << std::endl
  228 + << QUtil::utf8_to_ascii(input)
  229 + << std::endl
  230 + << QUtil::utf8_to_ascii(input, '*')
  231 + << std::endl;
  232 +}
  233 +
223 234 void print_whoami(char const* str)
224 235 {
225 236 PointerHolder<char> dup(true, QUtil::copy_string(str));
... ... @@ -328,6 +339,8 @@ int main(int argc, char* argv[])
328 339 to_utf8_test();
329 340 std::cout << "---- utf16" << std::endl;
330 341 to_utf16_test();
  342 + std::cout << "---- utf8_to_ascii" << std::endl;
  343 + utf8_to_ascii_test();
331 344 std::cout << "---- whoami" << std::endl;
332 345 get_whoami_test();
333 346 std::cout << "---- file" << std::endl;
... ...