Commit 6817ca585a44eec2c50961ba7c714b419774da36

Authored by Jay Berkenbilt
1 parent 69848546

Bidirectional transcoding for win, mac, pdf, utf8, utf16

libqpdf/QUtil.cc
... ... @@ -66,6 +66,171 @@ static unsigned short pdf_doc_to_unicode[] = {
66 66 0xfffd, // 0x9f UNDEFINED
67 67 0x20ac, // 0xa0 EURO SIGN
68 68 };
  69 +static unsigned short win_ansi_to_unicode[] = {
  70 + 0x20ac, // 0x80
  71 + 0xfffd, // 0x81
  72 + 0x201a, // 0x82
  73 + 0x0192, // 0x83
  74 + 0x201e, // 0x84
  75 + 0x2026, // 0x85
  76 + 0x2020, // 0x86
  77 + 0x2021, // 0x87
  78 + 0x02c6, // 0x88
  79 + 0x2030, // 0x89
  80 + 0x0160, // 0x8a
  81 + 0x2039, // 0x8b
  82 + 0x0152, // 0x8c
  83 + 0xfffd, // 0x8d
  84 + 0x017d, // 0x8e
  85 + 0xfffd, // 0x8f
  86 + 0xfffd, // 0x90
  87 + 0x2018, // 0x91
  88 + 0x2019, // 0x92
  89 + 0x201c, // 0x93
  90 + 0x201d, // 0x94
  91 + 0x2022, // 0x95
  92 + 0x2013, // 0x96
  93 + 0x2014, // 0x97
  94 + 0x0303, // 0x98
  95 + 0x2122, // 0x99
  96 + 0x0161, // 0x9a
  97 + 0x203a, // 0x9b
  98 + 0x0153, // 0x9c
  99 + 0xfffd, // 0x9d
  100 + 0x017e, // 0x9e
  101 + 0x0178, // 0x9f
  102 + 0x00a0, // 0xa0
  103 +};
  104 +static unsigned short mac_roman_to_unicode[] = {
  105 + 0x00c4, // 0x80
  106 + 0x00c5, // 0x81
  107 + 0x00c7, // 0x82
  108 + 0x00c9, // 0x83
  109 + 0x00d1, // 0x84
  110 + 0x00d6, // 0x85
  111 + 0x00dc, // 0x86
  112 + 0x00e1, // 0x87
  113 + 0x00e0, // 0x88
  114 + 0x00e2, // 0x89
  115 + 0x00e4, // 0x8a
  116 + 0x00e3, // 0x8b
  117 + 0x00e5, // 0x8c
  118 + 0x00e7, // 0x8d
  119 + 0x00e9, // 0x8e
  120 + 0x00e8, // 0x8f
  121 + 0x00ea, // 0x90
  122 + 0x00eb, // 0x91
  123 + 0x00ed, // 0x92
  124 + 0x00ec, // 0x93
  125 + 0x00ee, // 0x94
  126 + 0x00ef, // 0x95
  127 + 0x00f1, // 0x96
  128 + 0x00f3, // 0x97
  129 + 0x00f2, // 0x98
  130 + 0x00f4, // 0x99
  131 + 0x00f6, // 0x9a
  132 + 0x00f5, // 0x9b
  133 + 0x00fa, // 0x9c
  134 + 0x00f9, // 0x9d
  135 + 0x00fb, // 0x9e
  136 + 0x00fc, // 0x9f
  137 + 0x2020, // 0xa0
  138 + 0x00b0, // 0xa1
  139 + 0x00a2, // 0xa2
  140 + 0x00a3, // 0xa3
  141 + 0x00a7, // 0xa4
  142 + 0x2022, // 0xa5
  143 + 0x00b6, // 0xa6
  144 + 0x00df, // 0xa7
  145 + 0x00ae, // 0xa8
  146 + 0x00a9, // 0xa9
  147 + 0x2122, // 0xaa
  148 + 0x0301, // 0xab
  149 + 0x0308, // 0xac
  150 + 0xfffd, // 0xad
  151 + 0x00c6, // 0xae
  152 + 0x00d8, // 0xaf
  153 + 0xfffd, // 0xb0
  154 + 0x00b1, // 0xb1
  155 + 0xfffd, // 0xb2
  156 + 0xfffd, // 0xb3
  157 + 0x00a5, // 0xb4
  158 + 0x03bc, // 0xb5
  159 + 0xfffd, // 0xb6
  160 + 0xfffd, // 0xb7
  161 + 0xfffd, // 0xb8
  162 + 0xfffd, // 0xb9
  163 + 0xfffd, // 0xba
  164 + 0x1d43, // 0xbb
  165 + 0x1d52, // 0xbc
  166 + 0xfffd, // 0xbd
  167 + 0x00e6, // 0xbe
  168 + 0x00f8, // 0xbf
  169 + 0x00bf, // 0xc0
  170 + 0x00a1, // 0xc1
  171 + 0x00ac, // 0xc2
  172 + 0xfffd, // 0xc3
  173 + 0x0192, // 0xc4
  174 + 0xfffd, // 0xc5
  175 + 0xfffd, // 0xc6
  176 + 0x00ab, // 0xc7
  177 + 0x00bb, // 0xc8
  178 + 0x2026, // 0xc9
  179 + 0xfffd, // 0xca
  180 + 0x00c0, // 0xcb
  181 + 0x00c3, // 0xcc
  182 + 0x00d5, // 0xcd
  183 + 0x0152, // 0xce
  184 + 0x0153, // 0xcf
  185 + 0x2013, // 0xd0
  186 + 0x2014, // 0xd1
  187 + 0x201c, // 0xd2
  188 + 0x201d, // 0xd3
  189 + 0x2018, // 0xd4
  190 + 0x2019, // 0xd5
  191 + 0x00f7, // 0xd6
  192 + 0xfffd, // 0xd7
  193 + 0x00ff, // 0xd8
  194 + 0x0178, // 0xd9
  195 + 0x2044, // 0xda
  196 + 0x00a4, // 0xdb
  197 + 0x2039, // 0xdc
  198 + 0x203a, // 0xdd
  199 + 0xfb01, // 0xde
  200 + 0xfb02, // 0xdf
  201 + 0x2021, // 0xe0
  202 + 0x00b7, // 0xe1
  203 + 0x201a, // 0xe2
  204 + 0x201e, // 0xe3
  205 + 0x2030, // 0xe4
  206 + 0x00c2, // 0xe5
  207 + 0x00ca, // 0xe6
  208 + 0x00c1, // 0xe7
  209 + 0x00cb, // 0xe8
  210 + 0x00c8, // 0xe9
  211 + 0x00cd, // 0xea
  212 + 0x00ce, // 0xeb
  213 + 0x00cf, // 0xec
  214 + 0x00cc, // 0xed
  215 + 0x00d3, // 0xee
  216 + 0x00d4, // 0xef
  217 + 0xfffd, // 0xf0
  218 + 0x00d2, // 0xf1
  219 + 0x00da, // 0xf2
  220 + 0x00db, // 0xf3
  221 + 0x00d9, // 0xf4
  222 + 0x0131, // 0xf5
  223 + 0x02c6, // 0xf6
  224 + 0x0303, // 0xf7
  225 + 0x0304, // 0xf8
  226 + 0x0306, // 0xf9
  227 + 0x0307, // 0xfa
  228 + 0x030a, // 0xfb
  229 + 0x0327, // 0xfc
  230 + 0x030b, // 0xfd
  231 + 0x0328, // 0xfe
  232 + 0x02c7, // 0xff
  233 +};
69 234  
70 235 std::string
71 236 QUtil::int_to_string(long long num, int length)
... ... @@ -946,23 +1111,17 @@ encode_winansi(unsigned long codepoint)
946 1111 case 0x20ac:
947 1112 ch = 0x80;
948 1113 break;
949   - case 0x152:
950   - ch = 0x8c;
951   - break;
952   - case 0x160:
953   - ch = 0x8a;
954   - break;
955   - case 0x178:
956   - ch = 0x9f;
  1114 + case 0x201a:
  1115 + ch = 0x82;
957 1116 break;
958   - case 0x17d:
959   - ch = 0x8e;
  1117 + case 0x192:
  1118 + ch = 0x83;
960 1119 break;
961   - case 0x2022:
962   - ch = 0x95;
  1120 + case 0x201e:
  1121 + ch = 0x84;
963 1122 break;
964   - case 0x2c6:
965   - ch = 0x88;
  1123 + case 0x2026:
  1124 + ch = 0x85;
966 1125 break;
967 1126 case 0x2020:
968 1127 ch = 0x86;
... ... @@ -970,32 +1129,29 @@ encode_winansi(unsigned long codepoint)
970 1129 case 0x2021:
971 1130 ch = 0x87;
972 1131 break;
973   - case 0x2026:
974   - ch = 0x85;
975   - break;
976   - case 0x2014:
977   - ch = 0x97;
  1132 + case 0x2c6:
  1133 + ch = 0x88;
978 1134 break;
979   - case 0x2013:
980   - ch = 0x96;
  1135 + case 0x2030:
  1136 + ch = 0x89;
981 1137 break;
982   - case 0x192:
983   - ch = 0x83;
  1138 + case 0x160:
  1139 + ch = 0x8a;
984 1140 break;
985 1141 case 0x2039:
986 1142 ch = 0x8b;
987 1143 break;
988   - case 0x203a:
989   - ch = 0x9b;
  1144 + case 0x152:
  1145 + ch = 0x8c;
990 1146 break;
991   - case 0x153:
992   - ch = 0x9c;
  1147 + case 0x17d:
  1148 + ch = 0x8e;
993 1149 break;
994   - case 0x2030:
995   - ch = 0x89;
  1150 + case 0x2018:
  1151 + ch = 0x91;
996 1152 break;
997   - case 0x201e:
998   - ch = 0x84;
  1153 + case 0x2019:
  1154 + ch = 0x92;
999 1155 break;
1000 1156 case 0x201c:
1001 1157 ch = 0x93;
... ... @@ -1003,17 +1159,14 @@ encode_winansi(unsigned long codepoint)
1003 1159 case 0x201d:
1004 1160 ch = 0x94;
1005 1161 break;
1006   - case 0x2018:
1007   - ch = 0x91;
1008   - break;
1009   - case 0x2019:
1010   - ch = 0x92;
  1162 + case 0x2022:
  1163 + ch = 0x95;
1011 1164 break;
1012   - case 0x201a:
1013   - ch = 0x82;
  1165 + case 0x2013:
  1166 + ch = 0x96;
1014 1167 break;
1015   - case 0x161:
1016   - ch = 0x9a;
  1168 + case 0x2014:
  1169 + ch = 0x97;
1017 1170 break;
1018 1171 case 0x303:
1019 1172 ch = 0x98;
... ... @@ -1021,9 +1174,24 @@ encode_winansi(unsigned long codepoint)
1021 1174 case 0x2122:
1022 1175 ch = 0x99;
1023 1176 break;
  1177 + case 0x161:
  1178 + ch = 0x9a;
  1179 + break;
  1180 + case 0x203a:
  1181 + ch = 0x9b;
  1182 + break;
  1183 + case 0x153:
  1184 + ch = 0x9c;
  1185 + break;
1024 1186 case 0x17e:
1025 1187 ch = 0x9e;
1026 1188 break;
  1189 + case 0x178:
  1190 + ch = 0x9f;
  1191 + break;
  1192 + case 0xa0:
  1193 + ch = 0xa0;
  1194 + break;
1027 1195 default:
1028 1196 break;
1029 1197 }
... ... @@ -1038,290 +1206,209 @@ encode_macroman(unsigned long codepoint)
1038 1206 unsigned char ch = '\0';
1039 1207 switch (codepoint)
1040 1208 {
1041   - case 0xc6:
1042   - ch = 0xae;
1043   - break;
1044   - case 0xc1:
1045   - ch = 0xe7;
1046   - break;
1047   - case 0xc2:
1048   - ch = 0xe5;
1049   - break;
1050 1209 case 0xc4:
1051 1210 ch = 0x80;
1052 1211 break;
1053   - case 0xc0:
1054   - ch = 0xcb;
1055   - break;
1056 1212 case 0xc5:
1057 1213 ch = 0x81;
1058 1214 break;
1059   - case 0xc3:
1060   - ch = 0xcc;
1061   - break;
1062 1215 case 0xc7:
1063 1216 ch = 0x82;
1064 1217 break;
1065 1218 case 0xc9:
1066 1219 ch = 0x83;
1067 1220 break;
1068   - case 0xca:
1069   - ch = 0xe6;
1070   - break;
1071   - case 0xcb:
1072   - ch = 0xe8;
1073   - break;
1074   - case 0xc8:
1075   - ch = 0xe9;
1076   - break;
1077   - case 0xcd:
1078   - ch = 0xea;
1079   - break;
1080   - case 0xce:
1081   - ch = 0xeb;
1082   - break;
1083   - case 0xcf:
1084   - ch = 0xec;
1085   - break;
1086   - case 0xcc:
1087   - ch = 0xed;
1088   - break;
1089 1221 case 0xd1:
1090 1222 ch = 0x84;
1091 1223 break;
1092   - case 0x152:
1093   - ch = 0xce;
1094   - break;
1095   - case 0xd3:
1096   - ch = 0xee;
1097   - break;
1098   - case 0xd4:
1099   - ch = 0xef;
1100   - break;
1101 1224 case 0xd6:
1102 1225 ch = 0x85;
1103 1226 break;
1104   - case 0xd2:
1105   - ch = 0xf1;
1106   - break;
1107   - case 0xd8:
1108   - ch = 0xaf;
1109   - break;
1110   - case 0xd5:
1111   - ch = 0xcd;
1112   - break;
1113   - case 0xda:
1114   - ch = 0xf2;
1115   - break;
1116   - case 0xdb:
1117   - ch = 0xf3;
1118   - break;
1119 1227 case 0xdc:
1120 1228 ch = 0x86;
1121 1229 break;
1122   - case 0xd9:
1123   - ch = 0xf4;
1124   - break;
1125   - case 0x178:
1126   - ch = 0xd9;
1127   - break;
1128 1230 case 0xe1:
1129 1231 ch = 0x87;
1130 1232 break;
  1233 + case 0xe0:
  1234 + ch = 0x88;
  1235 + break;
1131 1236 case 0xe2:
1132 1237 ch = 0x89;
1133 1238 break;
1134   - case 0x301:
1135   - ch = 0xab;
1136   - break;
1137 1239 case 0xe4:
1138 1240 ch = 0x8a;
1139 1241 break;
1140   - case 0xe6:
1141   - ch = 0xbe;
1142   - break;
1143   - case 0xe0:
1144   - ch = 0x88;
1145   - break;
1146   - case 0xe5:
1147   - ch = 0x8c;
1148   - break;
1149 1242 case 0xe3:
1150 1243 ch = 0x8b;
1151 1244 break;
1152   - case 0x306:
1153   - ch = 0xf9;
1154   - break;
1155   - case 0x2022:
1156   - ch = 0xa5;
1157   - break;
1158   - case 0x2c7:
1159   - ch = 0xff;
  1245 + case 0xe5:
  1246 + ch = 0x8c;
1160 1247 break;
1161 1248 case 0xe7:
1162 1249 ch = 0x8d;
1163 1250 break;
1164   - case 0x327:
1165   - ch = 0xfc;
  1251 + case 0xe9:
  1252 + ch = 0x8e;
1166 1253 break;
1167   - case 0xa2:
1168   - ch = 0xa2;
  1254 + case 0xe8:
  1255 + ch = 0x8f;
1169 1256 break;
1170   - case 0x2c6:
1171   - ch = 0xf6;
  1257 + case 0xea:
  1258 + ch = 0x90;
1172 1259 break;
1173   - case 0xa9:
1174   - ch = 0xa9;
  1260 + case 0xeb:
  1261 + ch = 0x91;
1175 1262 break;
1176   - case 0xa4:
1177   - ch = 0xdb;
  1263 + case 0xed:
  1264 + ch = 0x92;
1178 1265 break;
1179   - case 0x2020:
1180   - ch = 0xa0;
  1266 + case 0xec:
  1267 + ch = 0x93;
1181 1268 break;
1182   - case 0x2021:
1183   - ch = 0xe0;
  1269 + case 0xee:
  1270 + ch = 0x94;
1184 1271 break;
1185   - case 0xb0:
1186   - ch = 0xa1;
  1272 + case 0xef:
  1273 + ch = 0x95;
1187 1274 break;
1188   - case 0x308:
1189   - ch = 0xac;
  1275 + case 0xf1:
  1276 + ch = 0x96;
1190 1277 break;
1191   - case 0xf7:
1192   - ch = 0xd6;
  1278 + case 0xf3:
  1279 + ch = 0x97;
1193 1280 break;
1194   - case 0x307:
1195   - ch = 0xfa;
  1281 + case 0xf2:
  1282 + ch = 0x98;
1196 1283 break;
1197   - case 0x131:
1198   - ch = 0xf5;
  1284 + case 0xf4:
  1285 + ch = 0x99;
1199 1286 break;
1200   - case 0xe9:
1201   - ch = 0x8e;
  1287 + case 0xf6:
  1288 + ch = 0x9a;
1202 1289 break;
1203   - case 0xea:
1204   - ch = 0x90;
  1290 + case 0xf5:
  1291 + ch = 0x9b;
1205 1292 break;
1206   - case 0xeb:
1207   - ch = 0x91;
  1293 + case 0xfa:
  1294 + ch = 0x9c;
1208 1295 break;
1209   - case 0xe8:
1210   - ch = 0x8f;
  1296 + case 0xf9:
  1297 + ch = 0x9d;
1211 1298 break;
1212   - case 0x2026:
1213   - ch = 0xc9;
  1299 + case 0xfb:
  1300 + ch = 0x9e;
1214 1301 break;
1215   - case 0x2014:
1216   - ch = 0xd1;
  1302 + case 0xfc:
  1303 + ch = 0x9f;
1217 1304 break;
1218   - case 0x2013:
1219   - ch = 0xd0;
  1305 + case 0x2020:
  1306 + ch = 0xa0;
1220 1307 break;
1221   - case 0xa1:
1222   - ch = 0xc1;
  1308 + case 0xb0:
  1309 + ch = 0xa1;
1223 1310 break;
1224   - case 0xfb01:
1225   - ch = 0xde;
  1311 + case 0xa2:
  1312 + ch = 0xa2;
1226 1313 break;
1227   - case 0xfb02:
1228   - ch = 0xdf;
  1314 + case 0xa3:
  1315 + ch = 0xa3;
1229 1316 break;
1230   - case 0x192:
1231   - ch = 0xc4;
  1317 + case 0xa7:
  1318 + ch = 0xa4;
1232 1319 break;
1233   - case 0x2044:
1234   - ch = 0xda;
  1320 + case 0x2022:
  1321 + ch = 0xa5;
  1322 + break;
  1323 + case 0xb6:
  1324 + ch = 0xa6;
1235 1325 break;
1236 1326 case 0xdf:
1237 1327 ch = 0xa7;
1238 1328 break;
1239   - case 0xab:
1240   - ch = 0xc7;
1241   - break;
1242   - case 0xbb:
1243   - ch = 0xc8;
1244   - break;
1245   - case 0x2039:
1246   - ch = 0xdc;
  1329 + case 0xae:
  1330 + ch = 0xa8;
1247 1331 break;
1248   - case 0x203a:
1249   - ch = 0xdd;
  1332 + case 0xa9:
  1333 + ch = 0xa9;
1250 1334 break;
1251   - case 0x30b:
1252   - ch = 0xfd;
  1335 + case 0x2122:
  1336 + ch = 0xaa;
1253 1337 break;
1254   - case 0xed:
1255   - ch = 0x92;
  1338 + case 0x301:
  1339 + ch = 0xab;
1256 1340 break;
1257   - case 0xee:
1258   - ch = 0x94;
  1341 + case 0x308:
  1342 + ch = 0xac;
1259 1343 break;
1260   - case 0xef:
1261   - ch = 0x95;
  1344 + case 0xc6:
  1345 + ch = 0xae;
1262 1346 break;
1263   - case 0xec:
1264   - ch = 0x93;
  1347 + case 0xd8:
  1348 + ch = 0xaf;
1265 1349 break;
1266   - case 0xac:
1267   - ch = 0xc2;
  1350 + case 0xb1:
  1351 + ch = 0xb1;
1268 1352 break;
1269   - case 0x304:
1270   - ch = 0xf8;
  1353 + case 0xa5:
  1354 + ch = 0xb4;
1271 1355 break;
1272 1356 case 0x3bc:
1273 1357 ch = 0xb5;
1274 1358 break;
1275   - case 0xf1:
1276   - ch = 0x96;
  1359 + case 0x1d43:
  1360 + ch = 0xbb;
1277 1361 break;
1278   - case 0xf3:
1279   - ch = 0x97;
  1362 + case 0x1d52:
  1363 + ch = 0xbc;
1280 1364 break;
1281   - case 0xf4:
1282   - ch = 0x99;
  1365 + case 0xe6:
  1366 + ch = 0xbe;
1283 1367 break;
1284   - case 0xf6:
1285   - ch = 0x9a;
  1368 + case 0xf8:
  1369 + ch = 0xbf;
1286 1370 break;
1287   - case 0x153:
1288   - ch = 0xcf;
  1371 + case 0xbf:
  1372 + ch = 0xc0;
1289 1373 break;
1290   - case 0x328:
1291   - ch = 0xfe;
  1374 + case 0xa1:
  1375 + ch = 0xc1;
1292 1376 break;
1293   - case 0xf2:
1294   - ch = 0x98;
  1377 + case 0xac:
  1378 + ch = 0xc2;
1295 1379 break;
1296   - case 0x1d43:
1297   - ch = 0xbb;
  1380 + case 0x192:
  1381 + ch = 0xc4;
1298 1382 break;
1299   - case 0x1d52:
1300   - ch = 0xbc;
  1383 + case 0xab:
  1384 + ch = 0xc7;
1301 1385 break;
1302   - case 0xf8:
1303   - ch = 0xbf;
  1386 + case 0xbb:
  1387 + ch = 0xc8;
1304 1388 break;
1305   - case 0xf5:
1306   - ch = 0x9b;
  1389 + case 0x2026:
  1390 + ch = 0xc9;
1307 1391 break;
1308   - case 0xb6:
1309   - ch = 0xa6;
  1392 + case 0xc0:
  1393 + ch = 0xcb;
1310 1394 break;
1311   - case 0xb7:
1312   - ch = 0xe1;
  1395 + case 0xc3:
  1396 + ch = 0xcc;
1313 1397 break;
1314   - case 0x2030:
1315   - ch = 0xe4;
  1398 + case 0xd5:
  1399 + ch = 0xcd;
1316 1400 break;
1317   - case 0xb1:
1318   - ch = 0xb1;
  1401 + case 0x152:
  1402 + ch = 0xce;
1319 1403 break;
1320   - case 0xbf:
1321   - ch = 0xc0;
  1404 + case 0x153:
  1405 + ch = 0xcf;
1322 1406 break;
1323   - case 0x201e:
1324   - ch = 0xe3;
  1407 + case 0x2013:
  1408 + ch = 0xd0;
  1409 + break;
  1410 + case 0x2014:
  1411 + ch = 0xd1;
1325 1412 break;
1326 1413 case 0x201c:
1327 1414 ch = 0xd2;
... ... @@ -1335,44 +1422,125 @@ encode_macroman(unsigned long codepoint)
1335 1422 case 0x2019:
1336 1423 ch = 0xd5;
1337 1424 break;
  1425 + case 0xf7:
  1426 + ch = 0xd6;
  1427 + break;
  1428 + case 0xff:
  1429 + ch = 0xd8;
  1430 + break;
  1431 + case 0x178:
  1432 + ch = 0xd9;
  1433 + break;
  1434 + case 0x2044:
  1435 + ch = 0xda;
  1436 + break;
  1437 + case 0xa4:
  1438 + ch = 0xdb;
  1439 + break;
  1440 + case 0x2039:
  1441 + ch = 0xdc;
  1442 + break;
  1443 + case 0x203a:
  1444 + ch = 0xdd;
  1445 + break;
  1446 + case 0xfb01:
  1447 + ch = 0xde;
  1448 + break;
  1449 + case 0xfb02:
  1450 + ch = 0xdf;
  1451 + break;
  1452 + case 0x2021:
  1453 + ch = 0xe0;
  1454 + break;
  1455 + case 0xb7:
  1456 + ch = 0xe1;
  1457 + break;
1338 1458 case 0x201a:
1339 1459 ch = 0xe2;
1340 1460 break;
1341   - case 0xae:
1342   - ch = 0xa8;
  1461 + case 0x201e:
  1462 + ch = 0xe3;
1343 1463 break;
1344   - case 0x30a:
1345   - ch = 0xfb;
  1464 + case 0x2030:
  1465 + ch = 0xe4;
1346 1466 break;
1347   - case 0xa7:
1348   - ch = 0xa4;
  1467 + case 0xc2:
  1468 + ch = 0xe5;
1349 1469 break;
1350   - case 0xa3:
1351   - ch = 0xa3;
  1470 + case 0xca:
  1471 + ch = 0xe6;
  1472 + break;
  1473 + case 0xc1:
  1474 + ch = 0xe7;
  1475 + break;
  1476 + case 0xcb:
  1477 + ch = 0xe8;
  1478 + break;
  1479 + case 0xc8:
  1480 + ch = 0xe9;
  1481 + break;
  1482 + case 0xcd:
  1483 + ch = 0xea;
  1484 + break;
  1485 + case 0xce:
  1486 + ch = 0xeb;
  1487 + break;
  1488 + case 0xcf:
  1489 + ch = 0xec;
  1490 + break;
  1491 + case 0xcc:
  1492 + ch = 0xed;
  1493 + break;
  1494 + case 0xd3:
  1495 + ch = 0xee;
  1496 + break;
  1497 + case 0xd4:
  1498 + ch = 0xef;
  1499 + break;
  1500 + case 0xd2:
  1501 + ch = 0xf1;
  1502 + break;
  1503 + case 0xda:
  1504 + ch = 0xf2;
  1505 + break;
  1506 + case 0xdb:
  1507 + ch = 0xf3;
  1508 + break;
  1509 + case 0xd9:
  1510 + ch = 0xf4;
  1511 + break;
  1512 + case 0x131:
  1513 + ch = 0xf5;
  1514 + break;
  1515 + case 0x2c6:
  1516 + ch = 0xf6;
1352 1517 break;
1353 1518 case 0x303:
1354 1519 ch = 0xf7;
1355 1520 break;
1356   - case 0x2122:
1357   - ch = 0xaa;
  1521 + case 0x304:
  1522 + ch = 0xf8;
1358 1523 break;
1359   - case 0xfa:
1360   - ch = 0x9c;
  1524 + case 0x306:
  1525 + ch = 0xf9;
1361 1526 break;
1362   - case 0xfb:
1363   - ch = 0x9e;
  1527 + case 0x307:
  1528 + ch = 0xfa;
1364 1529 break;
1365   - case 0xfc:
1366   - ch = 0x9f;
  1530 + case 0x30a:
  1531 + ch = 0xfb;
1367 1532 break;
1368   - case 0xf9:
1369   - ch = 0x9d;
  1533 + case 0x327:
  1534 + ch = 0xfc;
1370 1535 break;
1371   - case 0xff:
1372   - ch = 0xd8;
  1536 + case 0x30b:
  1537 + ch = 0xfd;
1373 1538 break;
1374   - case 0xa5:
1375   - ch = 0xb4;
  1539 + case 0x328:
  1540 + ch = 0xfe;
  1541 + break;
  1542 + case 0x2c7:
  1543 + ch = 0xff;
1376 1544 break;
1377 1545 default:
1378 1546 break;
... ... @@ -1561,7 +1729,7 @@ transcode_utf8(std::string const& utf8_val, encoding_e encoding,
1561 1729 {
1562 1730 result += QUtil::toUTF16(codepoint);
1563 1731 }
1564   - else if ((codepoint >= 160) && (codepoint < 256) &&
  1732 + else if ((codepoint > 160) && (codepoint < 256) &&
1565 1733 ((encoding == e_winansi) || (encoding == e_pdfdoc)))
1566 1734 {
1567 1735 ch = static_cast<unsigned char>(codepoint & 0xff);
... ... @@ -1686,13 +1854,37 @@ QUtil::utf16_to_utf8(std::string const&amp; val)
1686 1854 std::string
1687 1855 QUtil::win_ansi_to_utf8(std::string const& val)
1688 1856 {
1689   - return "QXXXQ";
  1857 + std::string result;
  1858 + size_t len = val.length();
  1859 + for (unsigned int i = 0; i < len; ++i)
  1860 + {
  1861 + unsigned char ch = static_cast<unsigned char>(val.at(i));
  1862 + unsigned short val = ch;
  1863 + if ((ch >= 128) && (ch <= 160))
  1864 + {
  1865 + val = win_ansi_to_unicode[ch - 128];
  1866 + }
  1867 + result += QUtil::toUTF8(val);
  1868 + }
  1869 + return result;
1690 1870 }
1691 1871  
1692 1872 std::string
1693 1873 QUtil::mac_roman_to_utf8(std::string const& val)
1694 1874 {
1695   - return "QXXXQ";
  1875 + std::string result;
  1876 + size_t len = val.length();
  1877 + for (unsigned int i = 0; i < len; ++i)
  1878 + {
  1879 + unsigned char ch = static_cast<unsigned char>(val.at(i));
  1880 + unsigned short val = ch;
  1881 + if (ch >= 128)
  1882 + {
  1883 + val = mac_roman_to_unicode[ch - 128];
  1884 + }
  1885 + result += QUtil::toUTF8(val);
  1886 + }
  1887 + return result;
1696 1888 }
1697 1889  
1698 1890 std::string
... ...
libtests/qtest/qutil/qutil.out
... ... @@ -53,6 +53,10 @@ HAGOOGAMAGOOGLE: 0
53 53 *Does * have fingers?
54 54 <bf>Does * have fingers?
55 55 <c0>Does * have fingers?
  56 +---- transcoding
  57 +bidirectional pdf doc done
  58 +bidirectional win ansi done
  59 +bidirectional mac roman done
56 60 ---- whoami
57 61 quack1
58 62 quack2
... ...
libtests/qutil.cc
... ... @@ -240,6 +240,41 @@ void utf8_to_ascii_test()
240 240 << ">" << b.substr(1) << std::endl;
241 241 }
242 242  
  243 +void transcoding_test(std::string (*to_utf8)(std::string const&),
  244 + std::string (*from_utf8)(std::string const&, char),
  245 + int last, std::string unknown)
  246 +{
  247 + std::string in(" ");
  248 + std::string out;
  249 + std::string back;
  250 + for (int i = 128; i <= last; ++i)
  251 + {
  252 + in.at(0) = static_cast<unsigned char>(i);
  253 + out = (*to_utf8)(in);
  254 + std::string wanted = (out == "\xef\xbf\xbd") ? unknown : in;
  255 + back = (*from_utf8)(out, '?');
  256 + if (back != wanted)
  257 + {
  258 + std::cout << i << ": " << in << " -> " << out
  259 + << " -> " << back << " (wanted " << wanted << ")"
  260 + << std::endl;
  261 + }
  262 + }
  263 +}
  264 +
  265 +void transcoding_test()
  266 +{
  267 + transcoding_test(&QUtil::pdf_doc_to_utf8,
  268 + &QUtil::utf8_to_pdf_doc, 160, "\x9f");
  269 + std::cout << "bidirectional pdf doc done" << std::endl;
  270 + transcoding_test(&QUtil::win_ansi_to_utf8,
  271 + &QUtil::utf8_to_win_ansi, 160, "?");
  272 + std::cout << "bidirectional win ansi done" << std::endl;
  273 + transcoding_test(&QUtil::mac_roman_to_utf8,
  274 + &QUtil::utf8_to_mac_roman, 255, "?");
  275 + std::cout << "bidirectional mac roman done" << std::endl;
  276 +}
  277 +
243 278 void print_whoami(char const* str)
244 279 {
245 280 PointerHolder<char> dup(true, QUtil::copy_string(str));
... ... @@ -350,6 +385,8 @@ int main(int argc, char* argv[])
350 385 to_utf16_test();
351 386 std::cout << "---- utf8_to_ascii" << std::endl;
352 387 utf8_to_ascii_test();
  388 + std::cout << "---- transcoding" << std::endl;
  389 + transcoding_test();
353 390 std::cout << "---- whoami" << std::endl;
354 391 get_whoami_test();
355 392 std::cout << "---- file" << std::endl;
... ...