Commit 3ef1b77304ec49ec2527d8cc3e17e1d0dd220720

Authored by Jay Berkenbilt
1 parent 089ce590

Refactor QUtil::utf8_to_ascii

libqpdf/QUtil.cc
@@ -893,20 +893,32 @@ QUtil::parse_numrange(char const* range, int max) @@ -893,20 +893,32 @@ QUtil::parse_numrange(char const* range, int max)
893 return result; 893 return result;
894 } 894 }
895 895
896 -enum encoding_e { e_utf16 }; 896 +enum encoding_e { e_utf16, e_ascii };
897 897
898 static 898 static
899 std::string 899 std::string
900 -transcode_utf8(std::string const& utf8_val, encoding_e encoding) 900 +transcode_utf8(std::string const& utf8_val, encoding_e encoding,
  901 + char unknown)
901 { 902 {
902 - std::string result = "\xfe\xff"; 903 + std::string result;
  904 + if (encoding == e_utf16)
  905 + {
  906 + result += "\xfe\xff";
  907 + }
903 size_t len = utf8_val.length(); 908 size_t len = utf8_val.length();
904 for (size_t i = 0; i < len; ++i) 909 for (size_t i = 0; i < len; ++i)
905 { 910 {
906 unsigned char ch = static_cast<unsigned char>(utf8_val.at(i)); 911 unsigned char ch = static_cast<unsigned char>(utf8_val.at(i));
907 if (ch < 128) 912 if (ch < 128)
908 { 913 {
909 - result += QUtil::toUTF16(ch); 914 + if (encoding == e_utf16)
  915 + {
  916 + result += QUtil::toUTF16(ch);
  917 + }
  918 + else
  919 + {
  920 + result.append(1, ch);
  921 + }
910 } 922 }
911 else 923 else
912 { 924 {
@@ -923,7 +935,14 @@ transcode_utf8(std::string const&amp; utf8_val, encoding_e encoding) @@ -923,7 +935,14 @@ transcode_utf8(std::string const&amp; utf8_val, encoding_e encoding)
923 if (((bytes_needed > 5) || (bytes_needed < 1)) || 935 if (((bytes_needed > 5) || (bytes_needed < 1)) ||
924 ((i + bytes_needed) >= len)) 936 ((i + bytes_needed) >= len))
925 { 937 {
926 - result += "\xff\xfd"; 938 + if (encoding == e_utf16)
  939 + {
  940 + result += "\xff\xfd";
  941 + }
  942 + else
  943 + {
  944 + result.append(1, unknown);
  945 + }
927 } 946 }
928 else 947 else
929 { 948 {
@@ -941,7 +960,14 @@ transcode_utf8(std::string const&amp; utf8_val, encoding_e encoding) @@ -941,7 +960,14 @@ transcode_utf8(std::string const&amp; utf8_val, encoding_e encoding)
941 codepoint <<= 6; 960 codepoint <<= 6;
942 codepoint += (ch & 0x3f); 961 codepoint += (ch & 0x3f);
943 } 962 }
944 - result += QUtil::toUTF16(codepoint); 963 + if (encoding == e_utf16)
  964 + {
  965 + result += QUtil::toUTF16(codepoint);
  966 + }
  967 + else
  968 + {
  969 + result.append(1, unknown);
  970 + }
945 } 971 }
946 } 972 }
947 } 973 }
@@ -951,28 +977,11 @@ transcode_utf8(std::string const&amp; utf8_val, encoding_e encoding) @@ -951,28 +977,11 @@ transcode_utf8(std::string const&amp; utf8_val, encoding_e encoding)
951 std::string 977 std::string
952 QUtil::utf8_to_utf16(std::string const& utf8) 978 QUtil::utf8_to_utf16(std::string const& utf8)
953 { 979 {
954 - return transcode_utf8(utf8, e_utf16); 980 + return transcode_utf8(utf8, e_utf16, 0);
955 } 981 }
956 982
957 std::string 983 std::string
958 QUtil::utf8_to_ascii(std::string const& utf8, char unknown_char) 984 QUtil::utf8_to_ascii(std::string const& utf8, char unknown_char)
959 { 985 {
960 - std::string ascii_value;  
961 - for (size_t i = 0; i < utf8.length(); ++i)  
962 - {  
963 - unsigned char ch = static_cast<unsigned char>(utf8.at(i));  
964 - if (ch < 128)  
965 - {  
966 - ascii_value.append(1, ch);  
967 - }  
968 - else if ((ch & 0xc0) == 0x80)  
969 - {  
970 - // Ignore subsequent byte of UTF-8 encoded character  
971 - }  
972 - else  
973 - {  
974 - ascii_value.append(1, unknown_char);  
975 - }  
976 - }  
977 - return ascii_value; 986 + return transcode_utf8(utf8, e_ascii, unknown_char);
978 } 987 }
libtests/qtest/qutil/qutil.out
@@ -48,9 +48,9 @@ HAGOOGAMAGOOGLE: 0 @@ -48,9 +48,9 @@ HAGOOGAMAGOOGLE: 0
48 0x7fffffff -> ff fd 48 0x7fffffff -> ff fd
49 0x80000000 -> ff fd 49 0x80000000 -> ff fd
50 ---- utf8_to_ascii 50 ---- utf8_to_ascii
51 -Does π have fingers?  
52 -Does ? have fingers?  
53 -Does * have fingers? 51 +¿Does π have fingers?
  52 +?Does ? have fingers?
  53 +*Does * have fingers?
54 ---- whoami 54 ---- whoami
55 quack1 55 quack1
56 quack2 56 quack2
libtests/qutil.cc
@@ -222,7 +222,7 @@ void to_utf16_test() @@ -222,7 +222,7 @@ void to_utf16_test()
222 222
223 void utf8_to_ascii_test() 223 void utf8_to_ascii_test()
224 { 224 {
225 - char const* input = "Does 1700 have fingers?"; 225 + char const* input = "\302\277Does 1700 have fingers?";
226 std::cout << input 226 std::cout << input
227 << std::endl 227 << std::endl
228 << QUtil::utf8_to_ascii(input) 228 << QUtil::utf8_to_ascii(input)