Commit 6817ca585a44eec2c50961ba7c714b419774da36

Authored by Jay Berkenbilt
1 parent 69848546

Bidirectional transcoding for win, mac, pdf, utf8, utf16

libqpdf/QUtil.cc
@@ -66,6 +66,171 @@ static unsigned short pdf_doc_to_unicode[] = { @@ -66,6 +66,171 @@ static unsigned short pdf_doc_to_unicode[] = {
66 0xfffd, // 0x9f UNDEFINED 66 0xfffd, // 0x9f UNDEFINED
67 0x20ac, // 0xa0 EURO SIGN 67 0x20ac, // 0xa0 EURO SIGN
68 }; 68 };
  69 +static unsigned short win_ansi_to_unicode[] = {
  70 + 0x20ac, // 0x80
  71 + 0xfffd, // 0x81
  72 + 0x201a, // 0x82
  73 + 0x0192, // 0x83
  74 + 0x201e, // 0x84
  75 + 0x2026, // 0x85
  76 + 0x2020, // 0x86
  77 + 0x2021, // 0x87
  78 + 0x02c6, // 0x88
  79 + 0x2030, // 0x89
  80 + 0x0160, // 0x8a
  81 + 0x2039, // 0x8b
  82 + 0x0152, // 0x8c
  83 + 0xfffd, // 0x8d
  84 + 0x017d, // 0x8e
  85 + 0xfffd, // 0x8f
  86 + 0xfffd, // 0x90
  87 + 0x2018, // 0x91
  88 + 0x2019, // 0x92
  89 + 0x201c, // 0x93
  90 + 0x201d, // 0x94
  91 + 0x2022, // 0x95
  92 + 0x2013, // 0x96
  93 + 0x2014, // 0x97
  94 + 0x0303, // 0x98
  95 + 0x2122, // 0x99
  96 + 0x0161, // 0x9a
  97 + 0x203a, // 0x9b
  98 + 0x0153, // 0x9c
  99 + 0xfffd, // 0x9d
  100 + 0x017e, // 0x9e
  101 + 0x0178, // 0x9f
  102 + 0x00a0, // 0xa0
  103 +};
  104 +static unsigned short mac_roman_to_unicode[] = {
  105 + 0x00c4, // 0x80
  106 + 0x00c5, // 0x81
  107 + 0x00c7, // 0x82
  108 + 0x00c9, // 0x83
  109 + 0x00d1, // 0x84
  110 + 0x00d6, // 0x85
  111 + 0x00dc, // 0x86
  112 + 0x00e1, // 0x87
  113 + 0x00e0, // 0x88
  114 + 0x00e2, // 0x89
  115 + 0x00e4, // 0x8a
  116 + 0x00e3, // 0x8b
  117 + 0x00e5, // 0x8c
  118 + 0x00e7, // 0x8d
  119 + 0x00e9, // 0x8e
  120 + 0x00e8, // 0x8f
  121 + 0x00ea, // 0x90
  122 + 0x00eb, // 0x91
  123 + 0x00ed, // 0x92
  124 + 0x00ec, // 0x93
  125 + 0x00ee, // 0x94
  126 + 0x00ef, // 0x95
  127 + 0x00f1, // 0x96
  128 + 0x00f3, // 0x97
  129 + 0x00f2, // 0x98
  130 + 0x00f4, // 0x99
  131 + 0x00f6, // 0x9a
  132 + 0x00f5, // 0x9b
  133 + 0x00fa, // 0x9c
  134 + 0x00f9, // 0x9d
  135 + 0x00fb, // 0x9e
  136 + 0x00fc, // 0x9f
  137 + 0x2020, // 0xa0
  138 + 0x00b0, // 0xa1
  139 + 0x00a2, // 0xa2
  140 + 0x00a3, // 0xa3
  141 + 0x00a7, // 0xa4
  142 + 0x2022, // 0xa5
  143 + 0x00b6, // 0xa6
  144 + 0x00df, // 0xa7
  145 + 0x00ae, // 0xa8
  146 + 0x00a9, // 0xa9
  147 + 0x2122, // 0xaa
  148 + 0x0301, // 0xab
  149 + 0x0308, // 0xac
  150 + 0xfffd, // 0xad
  151 + 0x00c6, // 0xae
  152 + 0x00d8, // 0xaf
  153 + 0xfffd, // 0xb0
  154 + 0x00b1, // 0xb1
  155 + 0xfffd, // 0xb2
  156 + 0xfffd, // 0xb3
  157 + 0x00a5, // 0xb4
  158 + 0x03bc, // 0xb5
  159 + 0xfffd, // 0xb6
  160 + 0xfffd, // 0xb7
  161 + 0xfffd, // 0xb8
  162 + 0xfffd, // 0xb9
  163 + 0xfffd, // 0xba
  164 + 0x1d43, // 0xbb
  165 + 0x1d52, // 0xbc
  166 + 0xfffd, // 0xbd
  167 + 0x00e6, // 0xbe
  168 + 0x00f8, // 0xbf
  169 + 0x00bf, // 0xc0
  170 + 0x00a1, // 0xc1
  171 + 0x00ac, // 0xc2
  172 + 0xfffd, // 0xc3
  173 + 0x0192, // 0xc4
  174 + 0xfffd, // 0xc5
  175 + 0xfffd, // 0xc6
  176 + 0x00ab, // 0xc7
  177 + 0x00bb, // 0xc8
  178 + 0x2026, // 0xc9
  179 + 0xfffd, // 0xca
  180 + 0x00c0, // 0xcb
  181 + 0x00c3, // 0xcc
  182 + 0x00d5, // 0xcd
  183 + 0x0152, // 0xce
  184 + 0x0153, // 0xcf
  185 + 0x2013, // 0xd0
  186 + 0x2014, // 0xd1
  187 + 0x201c, // 0xd2
  188 + 0x201d, // 0xd3
  189 + 0x2018, // 0xd4
  190 + 0x2019, // 0xd5
  191 + 0x00f7, // 0xd6
  192 + 0xfffd, // 0xd7
  193 + 0x00ff, // 0xd8
  194 + 0x0178, // 0xd9
  195 + 0x2044, // 0xda
  196 + 0x00a4, // 0xdb
  197 + 0x2039, // 0xdc
  198 + 0x203a, // 0xdd
  199 + 0xfb01, // 0xde
  200 + 0xfb02, // 0xdf
  201 + 0x2021, // 0xe0
  202 + 0x00b7, // 0xe1
  203 + 0x201a, // 0xe2
  204 + 0x201e, // 0xe3
  205 + 0x2030, // 0xe4
  206 + 0x00c2, // 0xe5
  207 + 0x00ca, // 0xe6
  208 + 0x00c1, // 0xe7
  209 + 0x00cb, // 0xe8
  210 + 0x00c8, // 0xe9
  211 + 0x00cd, // 0xea
  212 + 0x00ce, // 0xeb
  213 + 0x00cf, // 0xec
  214 + 0x00cc, // 0xed
  215 + 0x00d3, // 0xee
  216 + 0x00d4, // 0xef
  217 + 0xfffd, // 0xf0
  218 + 0x00d2, // 0xf1
  219 + 0x00da, // 0xf2
  220 + 0x00db, // 0xf3
  221 + 0x00d9, // 0xf4
  222 + 0x0131, // 0xf5
  223 + 0x02c6, // 0xf6
  224 + 0x0303, // 0xf7
  225 + 0x0304, // 0xf8
  226 + 0x0306, // 0xf9
  227 + 0x0307, // 0xfa
  228 + 0x030a, // 0xfb
  229 + 0x0327, // 0xfc
  230 + 0x030b, // 0xfd
  231 + 0x0328, // 0xfe
  232 + 0x02c7, // 0xff
  233 +};
69 234
70 std::string 235 std::string
71 QUtil::int_to_string(long long num, int length) 236 QUtil::int_to_string(long long num, int length)
@@ -946,23 +1111,17 @@ encode_winansi(unsigned long codepoint) @@ -946,23 +1111,17 @@ encode_winansi(unsigned long codepoint)
946 case 0x20ac: 1111 case 0x20ac:
947 ch = 0x80; 1112 ch = 0x80;
948 break; 1113 break;
949 - case 0x152:  
950 - ch = 0x8c;  
951 - break;  
952 - case 0x160:  
953 - ch = 0x8a;  
954 - break;  
955 - case 0x178:  
956 - ch = 0x9f; 1114 + case 0x201a:
  1115 + ch = 0x82;
957 break; 1116 break;
958 - case 0x17d:  
959 - ch = 0x8e; 1117 + case 0x192:
  1118 + ch = 0x83;
960 break; 1119 break;
961 - case 0x2022:  
962 - ch = 0x95; 1120 + case 0x201e:
  1121 + ch = 0x84;
963 break; 1122 break;
964 - case 0x2c6:  
965 - ch = 0x88; 1123 + case 0x2026:
  1124 + ch = 0x85;
966 break; 1125 break;
967 case 0x2020: 1126 case 0x2020:
968 ch = 0x86; 1127 ch = 0x86;
@@ -970,32 +1129,29 @@ encode_winansi(unsigned long codepoint) @@ -970,32 +1129,29 @@ encode_winansi(unsigned long codepoint)
970 case 0x2021: 1129 case 0x2021:
971 ch = 0x87; 1130 ch = 0x87;
972 break; 1131 break;
973 - case 0x2026:  
974 - ch = 0x85;  
975 - break;  
976 - case 0x2014:  
977 - ch = 0x97; 1132 + case 0x2c6:
  1133 + ch = 0x88;
978 break; 1134 break;
979 - case 0x2013:  
980 - ch = 0x96; 1135 + case 0x2030:
  1136 + ch = 0x89;
981 break; 1137 break;
982 - case 0x192:  
983 - ch = 0x83; 1138 + case 0x160:
  1139 + ch = 0x8a;
984 break; 1140 break;
985 case 0x2039: 1141 case 0x2039:
986 ch = 0x8b; 1142 ch = 0x8b;
987 break; 1143 break;
988 - case 0x203a:  
989 - ch = 0x9b; 1144 + case 0x152:
  1145 + ch = 0x8c;
990 break; 1146 break;
991 - case 0x153:  
992 - ch = 0x9c; 1147 + case 0x17d:
  1148 + ch = 0x8e;
993 break; 1149 break;
994 - case 0x2030:  
995 - ch = 0x89; 1150 + case 0x2018:
  1151 + ch = 0x91;
996 break; 1152 break;
997 - case 0x201e:  
998 - ch = 0x84; 1153 + case 0x2019:
  1154 + ch = 0x92;
999 break; 1155 break;
1000 case 0x201c: 1156 case 0x201c:
1001 ch = 0x93; 1157 ch = 0x93;
@@ -1003,17 +1159,14 @@ encode_winansi(unsigned long codepoint) @@ -1003,17 +1159,14 @@ encode_winansi(unsigned long codepoint)
1003 case 0x201d: 1159 case 0x201d:
1004 ch = 0x94; 1160 ch = 0x94;
1005 break; 1161 break;
1006 - case 0x2018:  
1007 - ch = 0x91;  
1008 - break;  
1009 - case 0x2019:  
1010 - ch = 0x92; 1162 + case 0x2022:
  1163 + ch = 0x95;
1011 break; 1164 break;
1012 - case 0x201a:  
1013 - ch = 0x82; 1165 + case 0x2013:
  1166 + ch = 0x96;
1014 break; 1167 break;
1015 - case 0x161:  
1016 - ch = 0x9a; 1168 + case 0x2014:
  1169 + ch = 0x97;
1017 break; 1170 break;
1018 case 0x303: 1171 case 0x303:
1019 ch = 0x98; 1172 ch = 0x98;
@@ -1021,9 +1174,24 @@ encode_winansi(unsigned long codepoint) @@ -1021,9 +1174,24 @@ encode_winansi(unsigned long codepoint)
1021 case 0x2122: 1174 case 0x2122:
1022 ch = 0x99; 1175 ch = 0x99;
1023 break; 1176 break;
  1177 + case 0x161:
  1178 + ch = 0x9a;
  1179 + break;
  1180 + case 0x203a:
  1181 + ch = 0x9b;
  1182 + break;
  1183 + case 0x153:
  1184 + ch = 0x9c;
  1185 + break;
1024 case 0x17e: 1186 case 0x17e:
1025 ch = 0x9e; 1187 ch = 0x9e;
1026 break; 1188 break;
  1189 + case 0x178:
  1190 + ch = 0x9f;
  1191 + break;
  1192 + case 0xa0:
  1193 + ch = 0xa0;
  1194 + break;
1027 default: 1195 default:
1028 break; 1196 break;
1029 } 1197 }
@@ -1038,290 +1206,209 @@ encode_macroman(unsigned long codepoint) @@ -1038,290 +1206,209 @@ encode_macroman(unsigned long codepoint)
1038 unsigned char ch = '\0'; 1206 unsigned char ch = '\0';
1039 switch (codepoint) 1207 switch (codepoint)
1040 { 1208 {
1041 - case 0xc6:  
1042 - ch = 0xae;  
1043 - break;  
1044 - case 0xc1:  
1045 - ch = 0xe7;  
1046 - break;  
1047 - case 0xc2:  
1048 - ch = 0xe5;  
1049 - break;  
1050 case 0xc4: 1209 case 0xc4:
1051 ch = 0x80; 1210 ch = 0x80;
1052 break; 1211 break;
1053 - case 0xc0:  
1054 - ch = 0xcb;  
1055 - break;  
1056 case 0xc5: 1212 case 0xc5:
1057 ch = 0x81; 1213 ch = 0x81;
1058 break; 1214 break;
1059 - case 0xc3:  
1060 - ch = 0xcc;  
1061 - break;  
1062 case 0xc7: 1215 case 0xc7:
1063 ch = 0x82; 1216 ch = 0x82;
1064 break; 1217 break;
1065 case 0xc9: 1218 case 0xc9:
1066 ch = 0x83; 1219 ch = 0x83;
1067 break; 1220 break;
1068 - case 0xca:  
1069 - ch = 0xe6;  
1070 - break;  
1071 - case 0xcb:  
1072 - ch = 0xe8;  
1073 - break;  
1074 - case 0xc8:  
1075 - ch = 0xe9;  
1076 - break;  
1077 - case 0xcd:  
1078 - ch = 0xea;  
1079 - break;  
1080 - case 0xce:  
1081 - ch = 0xeb;  
1082 - break;  
1083 - case 0xcf:  
1084 - ch = 0xec;  
1085 - break;  
1086 - case 0xcc:  
1087 - ch = 0xed;  
1088 - break;  
1089 case 0xd1: 1221 case 0xd1:
1090 ch = 0x84; 1222 ch = 0x84;
1091 break; 1223 break;
1092 - case 0x152:  
1093 - ch = 0xce;  
1094 - break;  
1095 - case 0xd3:  
1096 - ch = 0xee;  
1097 - break;  
1098 - case 0xd4:  
1099 - ch = 0xef;  
1100 - break;  
1101 case 0xd6: 1224 case 0xd6:
1102 ch = 0x85; 1225 ch = 0x85;
1103 break; 1226 break;
1104 - case 0xd2:  
1105 - ch = 0xf1;  
1106 - break;  
1107 - case 0xd8:  
1108 - ch = 0xaf;  
1109 - break;  
1110 - case 0xd5:  
1111 - ch = 0xcd;  
1112 - break;  
1113 - case 0xda:  
1114 - ch = 0xf2;  
1115 - break;  
1116 - case 0xdb:  
1117 - ch = 0xf3;  
1118 - break;  
1119 case 0xdc: 1227 case 0xdc:
1120 ch = 0x86; 1228 ch = 0x86;
1121 break; 1229 break;
1122 - case 0xd9:  
1123 - ch = 0xf4;  
1124 - break;  
1125 - case 0x178:  
1126 - ch = 0xd9;  
1127 - break;  
1128 case 0xe1: 1230 case 0xe1:
1129 ch = 0x87; 1231 ch = 0x87;
1130 break; 1232 break;
  1233 + case 0xe0:
  1234 + ch = 0x88;
  1235 + break;
1131 case 0xe2: 1236 case 0xe2:
1132 ch = 0x89; 1237 ch = 0x89;
1133 break; 1238 break;
1134 - case 0x301:  
1135 - ch = 0xab;  
1136 - break;  
1137 case 0xe4: 1239 case 0xe4:
1138 ch = 0x8a; 1240 ch = 0x8a;
1139 break; 1241 break;
1140 - case 0xe6:  
1141 - ch = 0xbe;  
1142 - break;  
1143 - case 0xe0:  
1144 - ch = 0x88;  
1145 - break;  
1146 - case 0xe5:  
1147 - ch = 0x8c;  
1148 - break;  
1149 case 0xe3: 1242 case 0xe3:
1150 ch = 0x8b; 1243 ch = 0x8b;
1151 break; 1244 break;
1152 - case 0x306:  
1153 - ch = 0xf9;  
1154 - break;  
1155 - case 0x2022:  
1156 - ch = 0xa5;  
1157 - break;  
1158 - case 0x2c7:  
1159 - ch = 0xff; 1245 + case 0xe5:
  1246 + ch = 0x8c;
1160 break; 1247 break;
1161 case 0xe7: 1248 case 0xe7:
1162 ch = 0x8d; 1249 ch = 0x8d;
1163 break; 1250 break;
1164 - case 0x327:  
1165 - ch = 0xfc; 1251 + case 0xe9:
  1252 + ch = 0x8e;
1166 break; 1253 break;
1167 - case 0xa2:  
1168 - ch = 0xa2; 1254 + case 0xe8:
  1255 + ch = 0x8f;
1169 break; 1256 break;
1170 - case 0x2c6:  
1171 - ch = 0xf6; 1257 + case 0xea:
  1258 + ch = 0x90;
1172 break; 1259 break;
1173 - case 0xa9:  
1174 - ch = 0xa9; 1260 + case 0xeb:
  1261 + ch = 0x91;
1175 break; 1262 break;
1176 - case 0xa4:  
1177 - ch = 0xdb; 1263 + case 0xed:
  1264 + ch = 0x92;
1178 break; 1265 break;
1179 - case 0x2020:  
1180 - ch = 0xa0; 1266 + case 0xec:
  1267 + ch = 0x93;
1181 break; 1268 break;
1182 - case 0x2021:  
1183 - ch = 0xe0; 1269 + case 0xee:
  1270 + ch = 0x94;
1184 break; 1271 break;
1185 - case 0xb0:  
1186 - ch = 0xa1; 1272 + case 0xef:
  1273 + ch = 0x95;
1187 break; 1274 break;
1188 - case 0x308:  
1189 - ch = 0xac; 1275 + case 0xf1:
  1276 + ch = 0x96;
1190 break; 1277 break;
1191 - case 0xf7:  
1192 - ch = 0xd6; 1278 + case 0xf3:
  1279 + ch = 0x97;
1193 break; 1280 break;
1194 - case 0x307:  
1195 - ch = 0xfa; 1281 + case 0xf2:
  1282 + ch = 0x98;
1196 break; 1283 break;
1197 - case 0x131:  
1198 - ch = 0xf5; 1284 + case 0xf4:
  1285 + ch = 0x99;
1199 break; 1286 break;
1200 - case 0xe9:  
1201 - ch = 0x8e; 1287 + case 0xf6:
  1288 + ch = 0x9a;
1202 break; 1289 break;
1203 - case 0xea:  
1204 - ch = 0x90; 1290 + case 0xf5:
  1291 + ch = 0x9b;
1205 break; 1292 break;
1206 - case 0xeb:  
1207 - ch = 0x91; 1293 + case 0xfa:
  1294 + ch = 0x9c;
1208 break; 1295 break;
1209 - case 0xe8:  
1210 - ch = 0x8f; 1296 + case 0xf9:
  1297 + ch = 0x9d;
1211 break; 1298 break;
1212 - case 0x2026:  
1213 - ch = 0xc9; 1299 + case 0xfb:
  1300 + ch = 0x9e;
1214 break; 1301 break;
1215 - case 0x2014:  
1216 - ch = 0xd1; 1302 + case 0xfc:
  1303 + ch = 0x9f;
1217 break; 1304 break;
1218 - case 0x2013:  
1219 - ch = 0xd0; 1305 + case 0x2020:
  1306 + ch = 0xa0;
1220 break; 1307 break;
1221 - case 0xa1:  
1222 - ch = 0xc1; 1308 + case 0xb0:
  1309 + ch = 0xa1;
1223 break; 1310 break;
1224 - case 0xfb01:  
1225 - ch = 0xde; 1311 + case 0xa2:
  1312 + ch = 0xa2;
1226 break; 1313 break;
1227 - case 0xfb02:  
1228 - ch = 0xdf; 1314 + case 0xa3:
  1315 + ch = 0xa3;
1229 break; 1316 break;
1230 - case 0x192:  
1231 - ch = 0xc4; 1317 + case 0xa7:
  1318 + ch = 0xa4;
1232 break; 1319 break;
1233 - case 0x2044:  
1234 - ch = 0xda; 1320 + case 0x2022:
  1321 + ch = 0xa5;
  1322 + break;
  1323 + case 0xb6:
  1324 + ch = 0xa6;
1235 break; 1325 break;
1236 case 0xdf: 1326 case 0xdf:
1237 ch = 0xa7; 1327 ch = 0xa7;
1238 break; 1328 break;
1239 - case 0xab:  
1240 - ch = 0xc7;  
1241 - break;  
1242 - case 0xbb:  
1243 - ch = 0xc8;  
1244 - break;  
1245 - case 0x2039:  
1246 - ch = 0xdc; 1329 + case 0xae:
  1330 + ch = 0xa8;
1247 break; 1331 break;
1248 - case 0x203a:  
1249 - ch = 0xdd; 1332 + case 0xa9:
  1333 + ch = 0xa9;
1250 break; 1334 break;
1251 - case 0x30b:  
1252 - ch = 0xfd; 1335 + case 0x2122:
  1336 + ch = 0xaa;
1253 break; 1337 break;
1254 - case 0xed:  
1255 - ch = 0x92; 1338 + case 0x301:
  1339 + ch = 0xab;
1256 break; 1340 break;
1257 - case 0xee:  
1258 - ch = 0x94; 1341 + case 0x308:
  1342 + ch = 0xac;
1259 break; 1343 break;
1260 - case 0xef:  
1261 - ch = 0x95; 1344 + case 0xc6:
  1345 + ch = 0xae;
1262 break; 1346 break;
1263 - case 0xec:  
1264 - ch = 0x93; 1347 + case 0xd8:
  1348 + ch = 0xaf;
1265 break; 1349 break;
1266 - case 0xac:  
1267 - ch = 0xc2; 1350 + case 0xb1:
  1351 + ch = 0xb1;
1268 break; 1352 break;
1269 - case 0x304:  
1270 - ch = 0xf8; 1353 + case 0xa5:
  1354 + ch = 0xb4;
1271 break; 1355 break;
1272 case 0x3bc: 1356 case 0x3bc:
1273 ch = 0xb5; 1357 ch = 0xb5;
1274 break; 1358 break;
1275 - case 0xf1:  
1276 - ch = 0x96; 1359 + case 0x1d43:
  1360 + ch = 0xbb;
1277 break; 1361 break;
1278 - case 0xf3:  
1279 - ch = 0x97; 1362 + case 0x1d52:
  1363 + ch = 0xbc;
1280 break; 1364 break;
1281 - case 0xf4:  
1282 - ch = 0x99; 1365 + case 0xe6:
  1366 + ch = 0xbe;
1283 break; 1367 break;
1284 - case 0xf6:  
1285 - ch = 0x9a; 1368 + case 0xf8:
  1369 + ch = 0xbf;
1286 break; 1370 break;
1287 - case 0x153:  
1288 - ch = 0xcf; 1371 + case 0xbf:
  1372 + ch = 0xc0;
1289 break; 1373 break;
1290 - case 0x328:  
1291 - ch = 0xfe; 1374 + case 0xa1:
  1375 + ch = 0xc1;
1292 break; 1376 break;
1293 - case 0xf2:  
1294 - ch = 0x98; 1377 + case 0xac:
  1378 + ch = 0xc2;
1295 break; 1379 break;
1296 - case 0x1d43:  
1297 - ch = 0xbb; 1380 + case 0x192:
  1381 + ch = 0xc4;
1298 break; 1382 break;
1299 - case 0x1d52:  
1300 - ch = 0xbc; 1383 + case 0xab:
  1384 + ch = 0xc7;
1301 break; 1385 break;
1302 - case 0xf8:  
1303 - ch = 0xbf; 1386 + case 0xbb:
  1387 + ch = 0xc8;
1304 break; 1388 break;
1305 - case 0xf5:  
1306 - ch = 0x9b; 1389 + case 0x2026:
  1390 + ch = 0xc9;
1307 break; 1391 break;
1308 - case 0xb6:  
1309 - ch = 0xa6; 1392 + case 0xc0:
  1393 + ch = 0xcb;
1310 break; 1394 break;
1311 - case 0xb7:  
1312 - ch = 0xe1; 1395 + case 0xc3:
  1396 + ch = 0xcc;
1313 break; 1397 break;
1314 - case 0x2030:  
1315 - ch = 0xe4; 1398 + case 0xd5:
  1399 + ch = 0xcd;
1316 break; 1400 break;
1317 - case 0xb1:  
1318 - ch = 0xb1; 1401 + case 0x152:
  1402 + ch = 0xce;
1319 break; 1403 break;
1320 - case 0xbf:  
1321 - ch = 0xc0; 1404 + case 0x153:
  1405 + ch = 0xcf;
1322 break; 1406 break;
1323 - case 0x201e:  
1324 - ch = 0xe3; 1407 + case 0x2013:
  1408 + ch = 0xd0;
  1409 + break;
  1410 + case 0x2014:
  1411 + ch = 0xd1;
1325 break; 1412 break;
1326 case 0x201c: 1413 case 0x201c:
1327 ch = 0xd2; 1414 ch = 0xd2;
@@ -1335,44 +1422,125 @@ encode_macroman(unsigned long codepoint) @@ -1335,44 +1422,125 @@ encode_macroman(unsigned long codepoint)
1335 case 0x2019: 1422 case 0x2019:
1336 ch = 0xd5; 1423 ch = 0xd5;
1337 break; 1424 break;
  1425 + case 0xf7:
  1426 + ch = 0xd6;
  1427 + break;
  1428 + case 0xff:
  1429 + ch = 0xd8;
  1430 + break;
  1431 + case 0x178:
  1432 + ch = 0xd9;
  1433 + break;
  1434 + case 0x2044:
  1435 + ch = 0xda;
  1436 + break;
  1437 + case 0xa4:
  1438 + ch = 0xdb;
  1439 + break;
  1440 + case 0x2039:
  1441 + ch = 0xdc;
  1442 + break;
  1443 + case 0x203a:
  1444 + ch = 0xdd;
  1445 + break;
  1446 + case 0xfb01:
  1447 + ch = 0xde;
  1448 + break;
  1449 + case 0xfb02:
  1450 + ch = 0xdf;
  1451 + break;
  1452 + case 0x2021:
  1453 + ch = 0xe0;
  1454 + break;
  1455 + case 0xb7:
  1456 + ch = 0xe1;
  1457 + break;
1338 case 0x201a: 1458 case 0x201a:
1339 ch = 0xe2; 1459 ch = 0xe2;
1340 break; 1460 break;
1341 - case 0xae:  
1342 - ch = 0xa8; 1461 + case 0x201e:
  1462 + ch = 0xe3;
1343 break; 1463 break;
1344 - case 0x30a:  
1345 - ch = 0xfb; 1464 + case 0x2030:
  1465 + ch = 0xe4;
1346 break; 1466 break;
1347 - case 0xa7:  
1348 - ch = 0xa4; 1467 + case 0xc2:
  1468 + ch = 0xe5;
1349 break; 1469 break;
1350 - case 0xa3:  
1351 - ch = 0xa3; 1470 + case 0xca:
  1471 + ch = 0xe6;
  1472 + break;
  1473 + case 0xc1:
  1474 + ch = 0xe7;
  1475 + break;
  1476 + case 0xcb:
  1477 + ch = 0xe8;
  1478 + break;
  1479 + case 0xc8:
  1480 + ch = 0xe9;
  1481 + break;
  1482 + case 0xcd:
  1483 + ch = 0xea;
  1484 + break;
  1485 + case 0xce:
  1486 + ch = 0xeb;
  1487 + break;
  1488 + case 0xcf:
  1489 + ch = 0xec;
  1490 + break;
  1491 + case 0xcc:
  1492 + ch = 0xed;
  1493 + break;
  1494 + case 0xd3:
  1495 + ch = 0xee;
  1496 + break;
  1497 + case 0xd4:
  1498 + ch = 0xef;
  1499 + break;
  1500 + case 0xd2:
  1501 + ch = 0xf1;
  1502 + break;
  1503 + case 0xda:
  1504 + ch = 0xf2;
  1505 + break;
  1506 + case 0xdb:
  1507 + ch = 0xf3;
  1508 + break;
  1509 + case 0xd9:
  1510 + ch = 0xf4;
  1511 + break;
  1512 + case 0x131:
  1513 + ch = 0xf5;
  1514 + break;
  1515 + case 0x2c6:
  1516 + ch = 0xf6;
1352 break; 1517 break;
1353 case 0x303: 1518 case 0x303:
1354 ch = 0xf7; 1519 ch = 0xf7;
1355 break; 1520 break;
1356 - case 0x2122:  
1357 - ch = 0xaa; 1521 + case 0x304:
  1522 + ch = 0xf8;
1358 break; 1523 break;
1359 - case 0xfa:  
1360 - ch = 0x9c; 1524 + case 0x306:
  1525 + ch = 0xf9;
1361 break; 1526 break;
1362 - case 0xfb:  
1363 - ch = 0x9e; 1527 + case 0x307:
  1528 + ch = 0xfa;
1364 break; 1529 break;
1365 - case 0xfc:  
1366 - ch = 0x9f; 1530 + case 0x30a:
  1531 + ch = 0xfb;
1367 break; 1532 break;
1368 - case 0xf9:  
1369 - ch = 0x9d; 1533 + case 0x327:
  1534 + ch = 0xfc;
1370 break; 1535 break;
1371 - case 0xff:  
1372 - ch = 0xd8; 1536 + case 0x30b:
  1537 + ch = 0xfd;
1373 break; 1538 break;
1374 - case 0xa5:  
1375 - ch = 0xb4; 1539 + case 0x328:
  1540 + ch = 0xfe;
  1541 + break;
  1542 + case 0x2c7:
  1543 + ch = 0xff;
1376 break; 1544 break;
1377 default: 1545 default:
1378 break; 1546 break;
@@ -1561,7 +1729,7 @@ transcode_utf8(std::string const& utf8_val, encoding_e encoding, @@ -1561,7 +1729,7 @@ transcode_utf8(std::string const& utf8_val, encoding_e encoding,
1561 { 1729 {
1562 result += QUtil::toUTF16(codepoint); 1730 result += QUtil::toUTF16(codepoint);
1563 } 1731 }
1564 - else if ((codepoint >= 160) && (codepoint < 256) && 1732 + else if ((codepoint > 160) && (codepoint < 256) &&
1565 ((encoding == e_winansi) || (encoding == e_pdfdoc))) 1733 ((encoding == e_winansi) || (encoding == e_pdfdoc)))
1566 { 1734 {
1567 ch = static_cast<unsigned char>(codepoint & 0xff); 1735 ch = static_cast<unsigned char>(codepoint & 0xff);
@@ -1686,13 +1854,37 @@ QUtil::utf16_to_utf8(std::string const&amp; val) @@ -1686,13 +1854,37 @@ QUtil::utf16_to_utf8(std::string const&amp; val)
1686 std::string 1854 std::string
1687 QUtil::win_ansi_to_utf8(std::string const& val) 1855 QUtil::win_ansi_to_utf8(std::string const& val)
1688 { 1856 {
1689 - return "QXXXQ"; 1857 + std::string result;
  1858 + size_t len = val.length();
  1859 + for (unsigned int i = 0; i < len; ++i)
  1860 + {
  1861 + unsigned char ch = static_cast<unsigned char>(val.at(i));
  1862 + unsigned short val = ch;
  1863 + if ((ch >= 128) && (ch <= 160))
  1864 + {
  1865 + val = win_ansi_to_unicode[ch - 128];
  1866 + }
  1867 + result += QUtil::toUTF8(val);
  1868 + }
  1869 + return result;
1690 } 1870 }
1691 1871
1692 std::string 1872 std::string
1693 QUtil::mac_roman_to_utf8(std::string const& val) 1873 QUtil::mac_roman_to_utf8(std::string const& val)
1694 { 1874 {
1695 - return "QXXXQ"; 1875 + std::string result;
  1876 + size_t len = val.length();
  1877 + for (unsigned int i = 0; i < len; ++i)
  1878 + {
  1879 + unsigned char ch = static_cast<unsigned char>(val.at(i));
  1880 + unsigned short val = ch;
  1881 + if (ch >= 128)
  1882 + {
  1883 + val = mac_roman_to_unicode[ch - 128];
  1884 + }
  1885 + result += QUtil::toUTF8(val);
  1886 + }
  1887 + return result;
1696 } 1888 }
1697 1889
1698 std::string 1890 std::string
libtests/qtest/qutil/qutil.out
@@ -53,6 +53,10 @@ HAGOOGAMAGOOGLE: 0 @@ -53,6 +53,10 @@ HAGOOGAMAGOOGLE: 0
53 *Does * have fingers? 53 *Does * have fingers?
54 <bf>Does * have fingers? 54 <bf>Does * have fingers?
55 <c0>Does * have fingers? 55 <c0>Does * have fingers?
  56 +---- transcoding
  57 +bidirectional pdf doc done
  58 +bidirectional win ansi done
  59 +bidirectional mac roman done
56 ---- whoami 60 ---- whoami
57 quack1 61 quack1
58 quack2 62 quack2
libtests/qutil.cc
@@ -240,6 +240,41 @@ void utf8_to_ascii_test() @@ -240,6 +240,41 @@ void utf8_to_ascii_test()
240 << ">" << b.substr(1) << std::endl; 240 << ">" << b.substr(1) << std::endl;
241 } 241 }
242 242
  243 +void transcoding_test(std::string (*to_utf8)(std::string const&),
  244 + std::string (*from_utf8)(std::string const&, char),
  245 + int last, std::string unknown)
  246 +{
  247 + std::string in(" ");
  248 + std::string out;
  249 + std::string back;
  250 + for (int i = 128; i <= last; ++i)
  251 + {
  252 + in.at(0) = static_cast<unsigned char>(i);
  253 + out = (*to_utf8)(in);
  254 + std::string wanted = (out == "\xef\xbf\xbd") ? unknown : in;
  255 + back = (*from_utf8)(out, '?');
  256 + if (back != wanted)
  257 + {
  258 + std::cout << i << ": " << in << " -> " << out
  259 + << " -> " << back << " (wanted " << wanted << ")"
  260 + << std::endl;
  261 + }
  262 + }
  263 +}
  264 +
  265 +void transcoding_test()
  266 +{
  267 + transcoding_test(&QUtil::pdf_doc_to_utf8,
  268 + &QUtil::utf8_to_pdf_doc, 160, "\x9f");
  269 + std::cout << "bidirectional pdf doc done" << std::endl;
  270 + transcoding_test(&QUtil::win_ansi_to_utf8,
  271 + &QUtil::utf8_to_win_ansi, 160, "?");
  272 + std::cout << "bidirectional win ansi done" << std::endl;
  273 + transcoding_test(&QUtil::mac_roman_to_utf8,
  274 + &QUtil::utf8_to_mac_roman, 255, "?");
  275 + std::cout << "bidirectional mac roman done" << std::endl;
  276 +}
  277 +
243 void print_whoami(char const* str) 278 void print_whoami(char const* str)
244 { 279 {
245 PointerHolder<char> dup(true, QUtil::copy_string(str)); 280 PointerHolder<char> dup(true, QUtil::copy_string(str));
@@ -350,6 +385,8 @@ int main(int argc, char* argv[]) @@ -350,6 +385,8 @@ int main(int argc, char* argv[])
350 to_utf16_test(); 385 to_utf16_test();
351 std::cout << "---- utf8_to_ascii" << std::endl; 386 std::cout << "---- utf8_to_ascii" << std::endl;
352 utf8_to_ascii_test(); 387 utf8_to_ascii_test();
  388 + std::cout << "---- transcoding" << std::endl;
  389 + transcoding_test();
353 std::cout << "---- whoami" << std::endl; 390 std::cout << "---- whoami" << std::endl;
354 get_whoami_test(); 391 get_whoami_test();
355 std::cout << "---- file" << std::endl; 392 std::cout << "---- file" << std::endl;