Commit 011695dfdf52e7a83f0eeceb85d0d2c06e7df7da
1 parent
4ccb2991
Support Unicode in filenames (fixes #298)
Showing
5 changed files
with
59 additions
and
3 deletions
ChangeLog
| 1 | 2019-04-20 Jay Berkenbilt <ejb@ql.org> | 1 | 2019-04-20 Jay Berkenbilt <ejb@ql.org> |
| 2 | 2 | ||
| 3 | + * Handle Unicode characters in filenames. The changes to support | ||
| 4 | + Unicode on the CLI in Windows broke Unicode filenames. Fixes #298. | ||
| 5 | + | ||
| 3 | * Slightly tighten logic that determines whether an object is a | 6 | * Slightly tighten logic that determines whether an object is a |
| 4 | page. The previous logic was sometimes failing to preserve | 7 | page. The previous logic was sometimes failing to preserve |
| 5 | annotations because they were passing the overly loose test for | 8 | annotations because they were passing the overly loose test for |
libqpdf/QUtil.cc
| @@ -354,11 +354,42 @@ FILE* | @@ -354,11 +354,42 @@ FILE* | ||
| 354 | QUtil::safe_fopen(char const* filename, char const* mode) | 354 | QUtil::safe_fopen(char const* filename, char const* mode) |
| 355 | { | 355 | { |
| 356 | FILE* f = 0; | 356 | FILE* f = 0; |
| 357 | +#ifdef _WIN32 | ||
| 358 | + // Convert the utf-8 encoded filename argument to wchar_t*. First, | ||
| 359 | + // convert to utf16, then to wchar_t*. Note that u16 will start | ||
| 360 | + // with the UTF16 marker, which we skip. | ||
| 361 | + std::string u16 = utf8_to_utf16(filename); | ||
| 362 | + size_t len = u16.length(); | ||
| 363 | + size_t wlen = (len / 2) - 1; | ||
| 364 | + PointerHolder<wchar_t> wfilenamep(true, new wchar_t[wlen + 1]); | ||
| 365 | + wchar_t* wfilename = wfilenamep.getPointer(); | ||
| 366 | + wfilename[wlen] = 0; | ||
| 367 | + for (unsigned int i = 2; i < len; i += 2) | ||
| 368 | + { | ||
| 369 | + wfilename[(i/2) - 1] = | ||
| 370 | + static_cast<wchar_t>( | ||
| 371 | + (static_cast<unsigned char>(u16.at(i)) << 8) + | ||
| 372 | + static_cast<unsigned char>(u16.at(i+1))); | ||
| 373 | + } | ||
| 374 | + PointerHolder<wchar_t> wmodep(true, new wchar_t(strlen(mode) + 1)); | ||
| 375 | + wchar_t* wmode = wmodep.getPointer(); | ||
| 376 | + wmode[strlen(mode)] = 0; | ||
| 377 | + for (size_t i = 0; i < strlen(mode); ++i) | ||
| 378 | + { | ||
| 379 | + wmode[i] = mode[i]; | ||
| 380 | + } | ||
| 381 | + | ||
| 357 | #ifdef _MSC_VER | 382 | #ifdef _MSC_VER |
| 358 | - errno_t err = fopen_s(&f, filename, mode); | 383 | + errno_t err = _wfopen_s(&f, wfilename, wmode); |
| 359 | if (err != 0) | 384 | if (err != 0) |
| 360 | { | 385 | { |
| 361 | errno = err; | 386 | errno = err; |
| 387 | + } | ||
| 388 | +#else | ||
| 389 | + f = _wfopen(wfilename, wmode); | ||
| 390 | +#endif | ||
| 391 | + if (f == 0) | ||
| 392 | + { | ||
| 362 | throw_system_error(std::string("open ") + filename); | 393 | throw_system_error(std::string("open ") + filename); |
| 363 | } | 394 | } |
| 364 | #else | 395 | #else |
qpdf/qtest/qpdf.test
| @@ -135,7 +135,7 @@ foreach my $c (@completion_tests) | @@ -135,7 +135,7 @@ foreach my $c (@completion_tests) | ||
| 135 | show_ntests(); | 135 | show_ntests(); |
| 136 | # ---------- | 136 | # ---------- |
| 137 | $td->notify("--- Argument Parsing ---"); | 137 | $td->notify("--- Argument Parsing ---"); |
| 138 | -$n_tests += 6; | 138 | +$n_tests += 8; |
| 139 | 139 | ||
| 140 | $td->runtest("required argument", | 140 | $td->runtest("required argument", |
| 141 | {$td->COMMAND => "qpdf --password minimal.pdf"}, | 141 | {$td->COMMAND => "qpdf --password minimal.pdf"}, |
| @@ -167,6 +167,16 @@ $td->runtest("extra overlay filename", | @@ -167,6 +167,16 @@ $td->runtest("extra overlay filename", | ||
| 167 | {$td->REGEXP => ".*overlay file already specified.*", | 167 | {$td->REGEXP => ".*overlay file already specified.*", |
| 168 | $td->EXIT_STATUS => 2}, | 168 | $td->EXIT_STATUS => 2}, |
| 169 | $td->NORMALIZE_NEWLINES); | 169 | $td->NORMALIZE_NEWLINES); |
| 170 | +foreach my $d (['auto-ü', 1], ['auto-öπ', 2]) | ||
| 171 | +{ | ||
| 172 | + my ($u, $n) = @$d; | ||
| 173 | + copy('minimal.pdf', "$u.pdf"); | ||
| 174 | + $td->runtest("unicode filename $u", | ||
| 175 | + {$td->COMMAND => "qpdf --check $u.pdf"}, | ||
| 176 | + {$td->FILE => "check-unicode-filename-$n.out", | ||
| 177 | + $td->EXIT_STATUS => 0}, | ||
| 178 | + $td->NORMALIZE_NEWLINES); | ||
| 179 | +} | ||
| 170 | 180 | ||
| 171 | show_ntests(); | 181 | show_ntests(); |
| 172 | # ---------- | 182 | # ---------- |
| @@ -4093,5 +4103,5 @@ sub get_md5_checksum | @@ -4093,5 +4103,5 @@ sub get_md5_checksum | ||
| 4093 | sub cleanup | 4103 | sub cleanup |
| 4094 | { | 4104 | { |
| 4095 | system("rm -rf *.ps *.pnm ?.pdf ?.qdf *.enc* tif1 tif2 tiff-cache"); | 4105 | system("rm -rf *.ps *.pnm ?.pdf ?.qdf *.enc* tif1 tif2 tiff-cache"); |
| 4096 | - system("rm -rf *split-out* ???-kfo.pdf *.tmpout \@file.pdf"); | 4106 | + system("rm -rf *split-out* ???-kfo.pdf *.tmpout \@file.pdf auto-*.pdf"); |
| 4097 | } | 4107 | } |
qpdf/qtest/qpdf/check-unicode-filename-1.out
0 → 100644