Commit 011695dfdf52e7a83f0eeceb85d0d2c06e7df7da
1 parent
4ccb2991
Support Unicode in filenames (fixes #298)
Showing
5 changed files
with
59 additions
and
3 deletions
ChangeLog
| 1 | 1 | 2019-04-20 Jay Berkenbilt <ejb@ql.org> |
| 2 | 2 | |
| 3 | + * Handle Unicode characters in filenames. The changes to support | |
| 4 | + Unicode on the CLI in Windows broke Unicode filenames. Fixes #298. | |
| 5 | + | |
| 3 | 6 | * Slightly tighten logic that determines whether an object is a |
| 4 | 7 | page. The previous logic was sometimes failing to preserve |
| 5 | 8 | annotations because they were passing the overly loose test for | ... | ... |
libqpdf/QUtil.cc
| ... | ... | @@ -354,11 +354,42 @@ FILE* |
| 354 | 354 | QUtil::safe_fopen(char const* filename, char const* mode) |
| 355 | 355 | { |
| 356 | 356 | FILE* f = 0; |
| 357 | +#ifdef _WIN32 | |
| 358 | + // Convert the utf-8 encoded filename argument to wchar_t*. First, | |
| 359 | + // convert to utf16, then to wchar_t*. Note that u16 will start | |
| 360 | + // with the UTF16 marker, which we skip. | |
| 361 | + std::string u16 = utf8_to_utf16(filename); | |
| 362 | + size_t len = u16.length(); | |
| 363 | + size_t wlen = (len / 2) - 1; | |
| 364 | + PointerHolder<wchar_t> wfilenamep(true, new wchar_t[wlen + 1]); | |
| 365 | + wchar_t* wfilename = wfilenamep.getPointer(); | |
| 366 | + wfilename[wlen] = 0; | |
| 367 | + for (unsigned int i = 2; i < len; i += 2) | |
| 368 | + { | |
| 369 | + wfilename[(i/2) - 1] = | |
| 370 | + static_cast<wchar_t>( | |
| 371 | + (static_cast<unsigned char>(u16.at(i)) << 8) + | |
| 372 | + static_cast<unsigned char>(u16.at(i+1))); | |
| 373 | + } | |
| 374 | + PointerHolder<wchar_t> wmodep(true, new wchar_t(strlen(mode) + 1)); | |
| 375 | + wchar_t* wmode = wmodep.getPointer(); | |
| 376 | + wmode[strlen(mode)] = 0; | |
| 377 | + for (size_t i = 0; i < strlen(mode); ++i) | |
| 378 | + { | |
| 379 | + wmode[i] = mode[i]; | |
| 380 | + } | |
| 381 | + | |
| 357 | 382 | #ifdef _MSC_VER |
| 358 | - errno_t err = fopen_s(&f, filename, mode); | |
| 383 | + errno_t err = _wfopen_s(&f, wfilename, wmode); | |
| 359 | 384 | if (err != 0) |
| 360 | 385 | { |
| 361 | 386 | errno = err; |
| 387 | + } | |
| 388 | +#else | |
| 389 | + f = _wfopen(wfilename, wmode); | |
| 390 | +#endif | |
| 391 | + if (f == 0) | |
| 392 | + { | |
| 362 | 393 | throw_system_error(std::string("open ") + filename); |
| 363 | 394 | } |
| 364 | 395 | #else | ... | ... |
qpdf/qtest/qpdf.test
| ... | ... | @@ -135,7 +135,7 @@ foreach my $c (@completion_tests) |
| 135 | 135 | show_ntests(); |
| 136 | 136 | # ---------- |
| 137 | 137 | $td->notify("--- Argument Parsing ---"); |
| 138 | -$n_tests += 6; | |
| 138 | +$n_tests += 8; | |
| 139 | 139 | |
| 140 | 140 | $td->runtest("required argument", |
| 141 | 141 | {$td->COMMAND => "qpdf --password minimal.pdf"}, |
| ... | ... | @@ -167,6 +167,16 @@ $td->runtest("extra overlay filename", |
| 167 | 167 | {$td->REGEXP => ".*overlay file already specified.*", |
| 168 | 168 | $td->EXIT_STATUS => 2}, |
| 169 | 169 | $td->NORMALIZE_NEWLINES); |
| 170 | +foreach my $d (['auto-ü', 1], ['auto-öπ', 2]) | |
| 171 | +{ | |
| 172 | + my ($u, $n) = @$d; | |
| 173 | + copy('minimal.pdf', "$u.pdf"); | |
| 174 | + $td->runtest("unicode filename $u", | |
| 175 | + {$td->COMMAND => "qpdf --check $u.pdf"}, | |
| 176 | + {$td->FILE => "check-unicode-filename-$n.out", | |
| 177 | + $td->EXIT_STATUS => 0}, | |
| 178 | + $td->NORMALIZE_NEWLINES); | |
| 179 | +} | |
| 170 | 180 | |
| 171 | 181 | show_ntests(); |
| 172 | 182 | # ---------- |
| ... | ... | @@ -4093,5 +4103,5 @@ sub get_md5_checksum |
| 4093 | 4103 | sub cleanup |
| 4094 | 4104 | { |
| 4095 | 4105 | system("rm -rf *.ps *.pnm ?.pdf ?.qdf *.enc* tif1 tif2 tiff-cache"); |
| 4096 | - system("rm -rf *split-out* ???-kfo.pdf *.tmpout \@file.pdf"); | |
| 4106 | + system("rm -rf *split-out* ???-kfo.pdf *.tmpout \@file.pdf auto-*.pdf"); | |
| 4097 | 4107 | } | ... | ... |
qpdf/qtest/qpdf/check-unicode-filename-1.out
0 → 100644