Commit 011695dfdf52e7a83f0eeceb85d0d2c06e7df7da

Authored by Jay Berkenbilt
1 parent 4ccb2991

Support Unicode in filenames (fixes #298)

ChangeLog
1 2019-04-20 Jay Berkenbilt <ejb@ql.org> 1 2019-04-20 Jay Berkenbilt <ejb@ql.org>
2 2
  3 + * Handle Unicode characters in filenames. The changes to support
  4 + Unicode on the CLI in Windows broke Unicode filenames. Fixes #298.
  5 +
3 * Slightly tighten logic that determines whether an object is a 6 * Slightly tighten logic that determines whether an object is a
4 page. The previous logic was sometimes failing to preserve 7 page. The previous logic was sometimes failing to preserve
5 annotations because they were passing the overly loose test for 8 annotations because they were passing the overly loose test for
libqpdf/QUtil.cc
@@ -354,11 +354,42 @@ FILE* @@ -354,11 +354,42 @@ FILE*
354 QUtil::safe_fopen(char const* filename, char const* mode) 354 QUtil::safe_fopen(char const* filename, char const* mode)
355 { 355 {
356 FILE* f = 0; 356 FILE* f = 0;
  357 +#ifdef _WIN32
  358 + // Convert the utf-8 encoded filename argument to wchar_t*. First,
  359 + // convert to utf16, then to wchar_t*. Note that u16 will start
  360 + // with the UTF16 marker, which we skip.
  361 + std::string u16 = utf8_to_utf16(filename);
  362 + size_t len = u16.length();
  363 + size_t wlen = (len / 2) - 1;
  364 + PointerHolder<wchar_t> wfilenamep(true, new wchar_t[wlen + 1]);
  365 + wchar_t* wfilename = wfilenamep.getPointer();
  366 + wfilename[wlen] = 0;
  367 + for (unsigned int i = 2; i < len; i += 2)
  368 + {
  369 + wfilename[(i/2) - 1] =
  370 + static_cast<wchar_t>(
  371 + (static_cast<unsigned char>(u16.at(i)) << 8) +
  372 + static_cast<unsigned char>(u16.at(i+1)));
  373 + }
  374 + PointerHolder<wchar_t> wmodep(true, new wchar_t(strlen(mode) + 1));
  375 + wchar_t* wmode = wmodep.getPointer();
  376 + wmode[strlen(mode)] = 0;
  377 + for (size_t i = 0; i < strlen(mode); ++i)
  378 + {
  379 + wmode[i] = mode[i];
  380 + }
  381 +
357 #ifdef _MSC_VER 382 #ifdef _MSC_VER
358 - errno_t err = fopen_s(&f, filename, mode); 383 + errno_t err = _wfopen_s(&f, wfilename, wmode);
359 if (err != 0) 384 if (err != 0)
360 { 385 {
361 errno = err; 386 errno = err;
  387 + }
  388 +#else
  389 + f = _wfopen(wfilename, wmode);
  390 +#endif
  391 + if (f == 0)
  392 + {
362 throw_system_error(std::string("open ") + filename); 393 throw_system_error(std::string("open ") + filename);
363 } 394 }
364 #else 395 #else
qpdf/qtest/qpdf.test
@@ -135,7 +135,7 @@ foreach my $c (@completion_tests) @@ -135,7 +135,7 @@ foreach my $c (@completion_tests)
135 show_ntests(); 135 show_ntests();
136 # ---------- 136 # ----------
137 $td->notify("--- Argument Parsing ---"); 137 $td->notify("--- Argument Parsing ---");
138 -$n_tests += 6; 138 +$n_tests += 8;
139 139
140 $td->runtest("required argument", 140 $td->runtest("required argument",
141 {$td->COMMAND => "qpdf --password minimal.pdf"}, 141 {$td->COMMAND => "qpdf --password minimal.pdf"},
@@ -167,6 +167,16 @@ $td-&gt;runtest(&quot;extra overlay filename&quot;, @@ -167,6 +167,16 @@ $td-&gt;runtest(&quot;extra overlay filename&quot;,
167 {$td->REGEXP => ".*overlay file already specified.*", 167 {$td->REGEXP => ".*overlay file already specified.*",
168 $td->EXIT_STATUS => 2}, 168 $td->EXIT_STATUS => 2},
169 $td->NORMALIZE_NEWLINES); 169 $td->NORMALIZE_NEWLINES);
  170 +foreach my $d (['auto-ü', 1], ['auto-öπ', 2])
  171 +{
  172 + my ($u, $n) = @$d;
  173 + copy('minimal.pdf', "$u.pdf");
  174 + $td->runtest("unicode filename $u",
  175 + {$td->COMMAND => "qpdf --check $u.pdf"},
  176 + {$td->FILE => "check-unicode-filename-$n.out",
  177 + $td->EXIT_STATUS => 0},
  178 + $td->NORMALIZE_NEWLINES);
  179 +}
170 180
171 show_ntests(); 181 show_ntests();
172 # ---------- 182 # ----------
@@ -4093,5 +4103,5 @@ sub get_md5_checksum @@ -4093,5 +4103,5 @@ sub get_md5_checksum
4093 sub cleanup 4103 sub cleanup
4094 { 4104 {
4095 system("rm -rf *.ps *.pnm ?.pdf ?.qdf *.enc* tif1 tif2 tiff-cache"); 4105 system("rm -rf *.ps *.pnm ?.pdf ?.qdf *.enc* tif1 tif2 tiff-cache");
4096 - system("rm -rf *split-out* ???-kfo.pdf *.tmpout \@file.pdf"); 4106 + system("rm -rf *split-out* ???-kfo.pdf *.tmpout \@file.pdf auto-*.pdf");
4097 } 4107 }
qpdf/qtest/qpdf/check-unicode-filename-1.out 0 → 100644
  1 +checking auto-ü.pdf
  2 +PDF Version: 1.3
  3 +File is not encrypted
  4 +File is not linearized
  5 +No syntax or stream encoding errors found; the file may still contain
  6 +errors that qpdf cannot detect
qpdf/qtest/qpdf/check-unicode-filename-2.out 0 → 100644
  1 +checking auto-öπ.pdf
  2 +PDF Version: 1.3
  3 +File is not encrypted
  4 +File is not linearized
  5 +No syntax or stream encoding errors found; the file may still contain
  6 +errors that qpdf cannot detect