Commit 3ef1b77304ec49ec2527d8cc3e17e1d0dd220720

Authored by Jay Berkenbilt
1 parent 089ce590

Refactor QUtil::utf8_to_ascii

libqpdf/QUtil.cc
... ... @@ -893,20 +893,32 @@ QUtil::parse_numrange(char const* range, int max)
893 893 return result;
894 894 }
895 895  
896   -enum encoding_e { e_utf16 };
  896 +enum encoding_e { e_utf16, e_ascii };
897 897  
898 898 static
899 899 std::string
900   -transcode_utf8(std::string const& utf8_val, encoding_e encoding)
  900 +transcode_utf8(std::string const& utf8_val, encoding_e encoding,
  901 + char unknown)
901 902 {
902   - std::string result = "\xfe\xff";
  903 + std::string result;
  904 + if (encoding == e_utf16)
  905 + {
  906 + result += "\xfe\xff";
  907 + }
903 908 size_t len = utf8_val.length();
904 909 for (size_t i = 0; i < len; ++i)
905 910 {
906 911 unsigned char ch = static_cast<unsigned char>(utf8_val.at(i));
907 912 if (ch < 128)
908 913 {
909   - result += QUtil::toUTF16(ch);
  914 + if (encoding == e_utf16)
  915 + {
  916 + result += QUtil::toUTF16(ch);
  917 + }
  918 + else
  919 + {
  920 + result.append(1, ch);
  921 + }
910 922 }
911 923 else
912 924 {
... ... @@ -923,7 +935,14 @@ transcode_utf8(std::string const&amp; utf8_val, encoding_e encoding)
923 935 if (((bytes_needed > 5) || (bytes_needed < 1)) ||
924 936 ((i + bytes_needed) >= len))
925 937 {
926   - result += "\xff\xfd";
  938 + if (encoding == e_utf16)
  939 + {
  940 + result += "\xff\xfd";
  941 + }
  942 + else
  943 + {
  944 + result.append(1, unknown);
  945 + }
927 946 }
928 947 else
929 948 {
... ... @@ -941,7 +960,14 @@ transcode_utf8(std::string const&amp; utf8_val, encoding_e encoding)
941 960 codepoint <<= 6;
942 961 codepoint += (ch & 0x3f);
943 962 }
944   - result += QUtil::toUTF16(codepoint);
  963 + if (encoding == e_utf16)
  964 + {
  965 + result += QUtil::toUTF16(codepoint);
  966 + }
  967 + else
  968 + {
  969 + result.append(1, unknown);
  970 + }
945 971 }
946 972 }
947 973 }
... ... @@ -951,28 +977,11 @@ transcode_utf8(std::string const&amp; utf8_val, encoding_e encoding)
951 977 std::string
952 978 QUtil::utf8_to_utf16(std::string const& utf8)
953 979 {
954   - return transcode_utf8(utf8, e_utf16);
  980 + return transcode_utf8(utf8, e_utf16, 0);
955 981 }
956 982  
957 983 std::string
958 984 QUtil::utf8_to_ascii(std::string const& utf8, char unknown_char)
959 985 {
960   - std::string ascii_value;
961   - for (size_t i = 0; i < utf8.length(); ++i)
962   - {
963   - unsigned char ch = static_cast<unsigned char>(utf8.at(i));
964   - if (ch < 128)
965   - {
966   - ascii_value.append(1, ch);
967   - }
968   - else if ((ch & 0xc0) == 0x80)
969   - {
970   - // Ignore subsequent byte of UTF-8 encoded character
971   - }
972   - else
973   - {
974   - ascii_value.append(1, unknown_char);
975   - }
976   - }
977   - return ascii_value;
  986 + return transcode_utf8(utf8, e_ascii, unknown_char);
978 987 }
... ...
libtests/qtest/qutil/qutil.out
... ... @@ -48,9 +48,9 @@ HAGOOGAMAGOOGLE: 0
48 48 0x7fffffff -> ff fd
49 49 0x80000000 -> ff fd
50 50 ---- utf8_to_ascii
51   -Does π have fingers?
52   -Does ? have fingers?
53   -Does * have fingers?
  51 +¿Does π have fingers?
  52 +?Does ? have fingers?
  53 +*Does * have fingers?
54 54 ---- whoami
55 55 quack1
56 56 quack2
... ...
libtests/qutil.cc
... ... @@ -222,7 +222,7 @@ void to_utf16_test()
222 222  
223 223 void utf8_to_ascii_test()
224 224 {
225   - char const* input = "Does 1700 have fingers?";
  225 + char const* input = "\302\277Does 1700 have fingers?";
226 226 std::cout << input
227 227 << std::endl
228 228 << QUtil::utf8_to_ascii(input)
... ...