Commit a70fbaaf50308cacc62bc9d7bb40ea75a4b9b54b

Authored by Jay Berkenbilt
1 parent b341d742

Honor other base encodings when generating appearances

ChangeLog
1 1 2019-01-05 Jay Berkenbilt <ejb@ql.org>
2 2  
  3 + * When generating appearances, if the font uses one of the
  4 + standard, built-in encodings, restrict the character set to that
  5 + rather than just to ASCII. This will allow most appearances to
  6 + contain characters from the ISO-Latin-1 range plus a few
  7 + additional characters.
  8 +
3 9 * Add methods QUtil::utf8_to_win_ansi and
4 10 QUtil::utf8_to_mac_roman.
5 11  
... ...
include/qpdf/QPDFFormFieldObjectHelper.hh
... ... @@ -196,6 +196,8 @@ class QPDFFormFieldObjectHelper: public QPDFObjectHelper
196 196 void setRadioButtonValue(QPDFObjectHandle name);
197 197 void setCheckBoxValue(bool value);
198 198 void generateTextAppearance(QPDFAnnotationObjectHelper&);
  199 + QPDFObjectHandle getFontFromResource(
  200 + QPDFObjectHandle resources, std::string const& font_name);
199 201  
200 202 class Members
201 203 {
... ...
libqpdf/QPDFFormFieldObjectHelper.cc
... ... @@ -689,10 +689,13 @@ class TfFinder: public QPDFObjectHandle::TokenFilter
689 689 }
690 690 virtual void handleToken(QPDFTokenizer::Token const&);
691 691 double getTf();
  692 + std::string getFontName();
692 693  
693 694 private:
694 695 double tf;
  696 + std::string font_name;
695 697 double last_num;
  698 + std::string last_name;
696 699 };
697 700  
698 701 TfFinder::TfFinder() :
... ... @@ -713,6 +716,10 @@ TfFinder::handleToken(QPDFTokenizer::Token const&amp; token)
713 716 last_num = strtod(value.c_str(), 0);
714 717 break;
715 718  
  719 + case QPDFTokenizer::tt_name:
  720 + last_name = value;
  721 + break;
  722 +
716 723 case QPDFTokenizer::tt_word:
717 724 if ((value == "Tf") &&
718 725 (last_num > 1.0) &&
... ... @@ -722,6 +729,7 @@ TfFinder::handleToken(QPDFTokenizer::Token const&amp; token)
722 729 // insane things or suffering from over/underflow
723 730 tf = last_num;
724 731 }
  732 + font_name = last_name;
725 733 break;
726 734  
727 735 default:
... ... @@ -735,6 +743,26 @@ TfFinder::getTf()
735 743 return this->tf;
736 744 }
737 745  
  746 +std::string
  747 +TfFinder::getFontName()
  748 +{
  749 + return this->font_name;
  750 +}
  751 +
  752 +QPDFObjectHandle
  753 +QPDFFormFieldObjectHelper::getFontFromResource(
  754 + QPDFObjectHandle resources, std::string const& name)
  755 +{
  756 + QPDFObjectHandle result;
  757 + if (resources.isDictionary() &&
  758 + resources.getKey("/Font").isDictionary() &&
  759 + resources.getKey("/Font").hasKey(name))
  760 + {
  761 + result = resources.getKey("/Font").getKey(name);
  762 + }
  763 + return result;
  764 +}
  765 +
738 766 void
739 767 QPDFFormFieldObjectHelper::generateTextAppearance(
740 768 QPDFAnnotationObjectHelper& aoh)
... ... @@ -755,17 +783,52 @@ QPDFFormFieldObjectHelper::generateTextAppearance(
755 783 }
756 784 QPDFObjectHandle::Rectangle bbox = bbox_obj.getArrayAsRectangle();
757 785 std::string DA = getDefaultAppearance();
758   - std::string V = QUtil::utf8_to_ascii(getValueAsString());
  786 + std::string V = getValueAsString();
  787 + std::vector<std::string> opt;
  788 + if (isChoice() && ((getFlags() & ff_ch_combo) == 0))
  789 + {
  790 + opt = getChoices();
  791 + }
759 792  
760 793 TfFinder tff;
761 794 Pl_QPDFTokenizer tok("tf", &tff);
762 795 tok.write(QUtil::unsigned_char_pointer(DA.c_str()), DA.length());
763 796 tok.finish();
764 797 double tf = tff.getTf();
765   - std::vector<std::string> opt;
766   - if (isChoice() && ((getFlags() & ff_ch_combo) == 0))
  798 +
  799 + std::string (*encoder)(std::string const&, char) = &QUtil::utf8_to_ascii;
  800 + std::string font_name = tff.getFontName();
  801 + if (! font_name.empty())
767 802 {
768   - opt = getChoices();
  803 + // See if the font is encoded with something we know about.
  804 + QPDFObjectHandle resources = AS.getDict().getKey("/Resources");
  805 + QPDFObjectHandle font = getFontFromResource(resources, font_name);
  806 + if (! font.isInitialized())
  807 + {
  808 + QPDFObjectHandle dr = getInheritableFieldValue("/DR");
  809 + font = getFontFromResource(dr, font_name);
  810 + }
  811 + if (font.isDictionary() &&
  812 + font.getKey("/Encoding").isName())
  813 + {
  814 + std::string encoding = font.getKey("/Encoding").getName();
  815 + if (encoding == "/WinAnsiEncoding")
  816 + {
  817 + QTC::TC("qpdf", "QPDFFormFieldObjectHelper WinAnsi");
  818 + encoder = &QUtil::utf8_to_win_ansi;
  819 + }
  820 + else if (encoding == "/MacRomanEncoding")
  821 + {
  822 + encoder = &QUtil::utf8_to_mac_roman;
  823 + }
  824 + }
769 825 }
  826 +
  827 + V = (*encoder)(V, '?');
  828 + for (size_t i = 0; i < opt.size(); ++i)
  829 + {
  830 + opt.at(i) = (*encoder)(opt.at(i), '?');
  831 + }
  832 +
770 833 AS.addTokenFilter(new ValueSetter(DA, V, opt, tf, bbox));
771 834 }
... ...
manual/qpdf-manual.xml
... ... @@ -1422,8 +1422,9 @@ outfile.pdf&lt;/option&gt;
1422 1422 <listitem>
1423 1423 <para>
1424 1424 For text fields and list boxes, any characters that fall
1425   - outside of US-ASCII will be replaced by the
1426   - <literal>?</literal> character.
  1425 + outside of US-ASCII or, if detected, &ldquo;Windows
  1426 + ANSI&rdquo; or &ldquo;Mac Roman&rdquo; encoding, will be
  1427 + replaced by the <literal>?</literal> character.
1427 1428 </para>
1428 1429 </listitem>
1429 1430 </itemizedlist>
... ...
qpdf/qpdf.cc
... ... @@ -1184,10 +1184,10 @@ ArgParser::argHelp()
1184 1184 << "fields in files like this. If you get this warning, you have two choices:\n"
1185 1185 << "either use qpdf's --generate-appearances flag to tell qpdf to go ahead and\n"
1186 1186 << "regenerate appearances, or use some other tool to generate the appearances.\n"
1187   - << "qpdf does a pretty good job with most forms when only ASCII characters are\n"
1188   - << "used in form field values, but if your form fields contain other\n"
1189   - << "characters, rich text, or are other than left justified, you will get\n"
1190   - << "better results first saving with other software.\n"
  1187 + << "qpdf does a pretty good job with most forms when only ASCII and \"Windows\n"
  1188 + << "ANSI\" characters are used in form field values, but if your form fields\n"
  1189 + << "contain other characters, rich text, or are other than left justified, you\n"
  1190 + << "will get better results first saving with other software.\n"
1191 1191 << "\n"
1192 1192 << "Version numbers may be expressed as major.minor.extension-level, so 1.7.3\n"
1193 1193 << "means PDF version 1.7 at extension level 3.\n"
... ...
qpdf/qpdf.testcov
... ... @@ -405,3 +405,4 @@ QPDF detected dangling ref 0
405 405 qpdf image optimize no pipeline 0
406 406 qpdf image optimize no shrink 0
407 407 qpdf image optimize too small 0
  408 +QPDFFormFieldObjectHelper WinAnsi 0
... ...
qpdf/qtest/qpdf/appearances-1.pdf
No preview for this file type
qpdf/qtest/qpdf/appearances-11.pdf
No preview for this file type
qpdf/qtest/qpdf/appearances-12.pdf
No preview for this file type
qpdf/qtest/qpdf/appearances-2.pdf
No preview for this file type
qpdf/qtest/qpdf/appearances-a.pdf
No preview for this file type
qpdf/qtest/qpdf/appearances-b.pdf
No preview for this file type
qpdf/qtest/qpdf/appearances-quack.pdf
No preview for this file type
qpdf/qtest/qpdf/json-need-appearances-acroform.out
... ... @@ -263,7 +263,7 @@
263 263 "parent": null,
264 264 "partialname": "text2",
265 265 "quadding": 0,
266   - "value": "salad πʬ"
  266 + "value": "salad ÷πʬ"
267 267 },
268 268 {
269 269 "alternativename": "combolist1",
... ...
qpdf/qtest/qpdf/more-choices.pdf
No preview for this file type
qpdf/qtest/qpdf/need-appearances-out.pdf
No preview for this file type
qpdf/qtest/qpdf/need-appearances.pdf
No preview for this file type