Commit 2d6512eb3e7fd508de452b5bfa75dd7e224142da

Authored by m-holger
Committed by GitHub
2 parents 08d49887 a367a367

Merge pull request #1575 from m-holger/string

Add new private-API class String
include/qpdf/ObjectHandle.hh
@@ -105,6 +105,9 @@ namespace qpdf @@ -105,6 +105,9 @@ namespace qpdf
105 void warn(QPDFExc&&) const; 105 void warn(QPDFExc&&) const;
106 void warn(std::string const& warning) const; 106 void warn(std::string const& warning) const;
107 107
  108 + inline std::shared_ptr<QPDFObject> const& obj_sp() const;
  109 + inline QPDFObjectHandle oh() const;
  110 +
108 protected: 111 protected:
109 BaseHandle() = default; 112 BaseHandle() = default;
110 BaseHandle(std::shared_ptr<QPDFObject> const& obj) : 113 BaseHandle(std::shared_ptr<QPDFObject> const& obj) :
include/qpdf/QPDFObjectHandle.hh
@@ -1321,21 +1321,6 @@ class QPDFObjectHandle: public qpdf::BaseHandle @@ -1321,21 +1321,6 @@ class QPDFObjectHandle: public qpdf::BaseHandle
1321 { 1321 {
1322 return obj; 1322 return obj;
1323 } 1323 }
1324 - std::shared_ptr<QPDFObject>  
1325 - getObj() const  
1326 - {  
1327 - return obj;  
1328 - }  
1329 - QPDFObject*  
1330 - getObjectPtr()  
1331 - {  
1332 - return obj.get();  
1333 - }  
1334 - QPDFObject* const  
1335 - getObjectPtr() const  
1336 - {  
1337 - return obj.get();  
1338 - }  
1339 1324
1340 void writeJSON(int json_version, JSON::Writer& p, bool dereference_indirect = false) const; 1325 void writeJSON(int json_version, JSON::Writer& p, bool dereference_indirect = false) const;
1341 1326
libqpdf/QPDFFormFieldObjectHelper.cc
@@ -765,7 +765,7 @@ QPDFFormFieldObjectHelper::generateTextAppearance(QPDFAnnotationObjectHelper&amp; ao @@ -765,7 +765,7 @@ QPDFFormFieldObjectHelper::generateTextAppearance(QPDFAnnotationObjectHelper&amp; ao
765 return; 765 return;
766 } 766 }
767 767
768 - if (AS.getObj().use_count() > 4) { 768 + if (AS.obj_sp().use_count() > 3) {
769 aoh.warn("unable to generate text appearance from shared appearance stream for update"); 769 aoh.warn("unable to generate text appearance from shared appearance stream for update");
770 return; 770 return;
771 } 771 }
libqpdf/QPDFObjectHandle.cc
@@ -318,7 +318,7 @@ BaseHandle::copy(bool shallow) const @@ -318,7 +318,7 @@ BaseHandle::copy(bool shallow) const
318 throw std::logic_error("attempted to shallow copy QPDFObjectHandle from destroyed QPDF"); 318 throw std::logic_error("attempted to shallow copy QPDFObjectHandle from destroyed QPDF");
319 return {}; // does not return 319 return {}; // does not return
320 case ::ot_reference: 320 case ::ot_reference:
321 - return obj->qpdf->getObject(obj->og).getObj(); 321 + return obj->qpdf->getObject(obj->og).obj_sp();
322 } 322 }
323 return {}; // unreachable 323 return {}; // unreachable
324 } 324 }
@@ -470,7 +470,7 @@ BaseHandle::write_json(int json_version, JSON::Writer&amp; p) const @@ -470,7 +470,7 @@ BaseHandle::write_json(int json_version, JSON::Writer&amp; p) const
470 p.writeNext() << "null"; 470 p.writeNext() << "null";
471 } 471 }
472 p.writeNext(); 472 p.writeNext();
473 - auto item_og = value.getObj()->getObjGen(); 473 + auto item_og = value.id_gen();
474 if (item_og.isIndirect()) { 474 if (item_og.isIndirect()) {
475 p << "\"" << item_og.unparse(' ') << " R\""; 475 p << "\"" << item_og.unparse(' ') << " R\"";
476 } else { 476 } else {
@@ -999,50 +999,110 @@ QPDFObjectHandle::getValueAsName(std::string&amp; value) const @@ -999,50 +999,110 @@ QPDFObjectHandle::getValueAsName(std::string&amp; value) const
999 return true; 999 return true;
1000 } 1000 }
1001 1001
1002 -// String accessors 1002 +// String methods
  1003 +
  1004 +QPDFObjectHandle
  1005 +QPDFObjectHandle::newString(std::string const& str)
  1006 +{
  1007 + return {QPDFObject::create<QPDF_String>(str)};
  1008 +}
  1009 +
  1010 +QPDFObjectHandle
  1011 +QPDFObjectHandle::newUnicodeString(std::string const& utf8_str)
  1012 +{
  1013 + return {String::utf16(utf8_str).obj_sp()};
  1014 +}
  1015 +
  1016 +String::String(std::string const& str) :
  1017 + BaseHandle(QPDFObject::create<QPDF_String>(str))
  1018 +{
  1019 +}
  1020 +
  1021 +String::String(std::string&& str) :
  1022 + BaseHandle(QPDFObject::create<QPDF_String>(std::move(str)))
  1023 +{
  1024 +}
  1025 +
  1026 +String
  1027 +String::utf16(std::string const& utf8_str)
  1028 +{
  1029 + std::string result;
  1030 + if (QUtil::utf8_to_pdf_doc(utf8_str, result, '?')) {
  1031 + return String(result);
  1032 + }
  1033 + return String(QUtil::utf8_to_utf16(utf8_str));
  1034 +}
  1035 +
  1036 +std::string const&
  1037 +String::value() const
  1038 +{
  1039 + auto* s = as<QPDF_String>();
  1040 + if (!s) {
  1041 + throw invalid_error("String");
  1042 + }
  1043 + return s->val;
  1044 +}
  1045 +
  1046 +std::string
  1047 +String::utf8_value() const
  1048 +{
  1049 + auto* s = as<QPDF_String>();
  1050 + if (!s) {
  1051 + throw invalid_error("String");
  1052 + }
  1053 + if (util::is_utf16(s->val)) {
  1054 + return QUtil::utf16_to_utf8(s->val);
  1055 + }
  1056 + if (util::is_explicit_utf8(s->val)) {
  1057 + // PDF 2.0 allows UTF-8 strings when explicitly prefixed with the three-byte representation
  1058 + // of U+FEFF.
  1059 + return s->val.substr(3);
  1060 + }
  1061 + return QUtil::pdf_doc_to_utf8(s->val);
  1062 +}
1003 1063
1004 std::string 1064 std::string
1005 QPDFObjectHandle::getStringValue() const 1065 QPDFObjectHandle::getStringValue() const
1006 { 1066 {
1007 - if (isString()) {  
1008 - return obj->getStringValue();  
1009 - } else { 1067 + try {
  1068 + return String(obj).value();
  1069 + } catch (std::invalid_argument&) {
1010 typeWarning("string", "returning empty string"); 1070 typeWarning("string", "returning empty string");
1011 - QTC::TC("qpdf", "QPDFObjectHandle string returning empty string");  
1012 - return ""; 1071 + return {};
1013 } 1072 }
1014 } 1073 }
1015 1074
1016 bool 1075 bool
1017 QPDFObjectHandle::getValueAsString(std::string& value) const 1076 QPDFObjectHandle::getValueAsString(std::string& value) const
1018 { 1077 {
1019 - if (!isString()) { 1078 + try {
  1079 + value = String(obj).value();
  1080 + return true;
  1081 + } catch (std::invalid_argument&) {
1020 return false; 1082 return false;
1021 } 1083 }
1022 - value = obj->getStringValue();  
1023 - return true;  
1024 } 1084 }
1025 1085
1026 std::string 1086 std::string
1027 QPDFObjectHandle::getUTF8Value() const 1087 QPDFObjectHandle::getUTF8Value() const
1028 { 1088 {
1029 - if (auto str = as<QPDF_String>()) {  
1030 - return str->getUTF8Val();  
1031 - } else { 1089 + try {
  1090 + return String(obj).utf8_value();
  1091 + } catch (std::invalid_argument&) {
1032 typeWarning("string", "returning empty string"); 1092 typeWarning("string", "returning empty string");
1033 - QTC::TC("qpdf", "QPDFObjectHandle string returning empty utf8");  
1034 - return ""; 1093 + return {};
1035 } 1094 }
1036 } 1095 }
1037 1096
1038 bool 1097 bool
1039 QPDFObjectHandle::getValueAsUTF8(std::string& value) const 1098 QPDFObjectHandle::getValueAsUTF8(std::string& value) const
1040 { 1099 {
1041 - if (auto str = as<QPDF_String>()) {  
1042 - value = str->getUTF8Val(); 1100 + try {
  1101 + value = String(obj).utf8_value();
1043 return true; 1102 return true;
  1103 + } catch (std::invalid_argument&) {
  1104 + return false;
1044 } 1105 }
1045 - return false;  
1046 } 1106 }
1047 1107
1048 // Operator and Inline Image accessors 1108 // Operator and Inline Image accessors
@@ -1718,18 +1778,6 @@ QPDFObjectHandle::newReal(double value, int decimal_places, bool trim_trailing_z @@ -1718,18 +1778,6 @@ QPDFObjectHandle::newReal(double value, int decimal_places, bool trim_trailing_z
1718 } 1778 }
1719 1779
1720 QPDFObjectHandle 1780 QPDFObjectHandle
1721 -QPDFObjectHandle::newString(std::string const& str)  
1722 -{  
1723 - return {QPDFObject::create<QPDF_String>(str)};  
1724 -}  
1725 -  
1726 -QPDFObjectHandle  
1727 -QPDFObjectHandle::newUnicodeString(std::string const& utf8_str)  
1728 -{  
1729 - return {QPDF_String::create_utf16(utf8_str)};  
1730 -}  
1731 -  
1732 -QPDFObjectHandle  
1733 QPDFObjectHandle::newOperator(std::string const& value) 1781 QPDFObjectHandle::newOperator(std::string const& value)
1734 { 1782 {
1735 return {QPDFObject::create<QPDF_Operator>(value)}; 1783 return {QPDFObject::create<QPDF_Operator>(value)};
libqpdf/QPDFParser.cc
@@ -626,8 +626,8 @@ QPDFParser::fixMissingKeys() @@ -626,8 +626,8 @@ QPDFParser::fixMissingKeys()
626 { 626 {
627 std::set<std::string> names; 627 std::set<std::string> names;
628 for (auto& obj: frame->olist) { 628 for (auto& obj: frame->olist) {
629 - if (obj.getObj()->getTypeCode() == ::ot_name) {  
630 - names.insert(obj.getObj()->getStringValue()); 629 + if (obj.raw_type_code() == ::ot_name) {
  630 + names.insert(obj.obj_sp()->getStringValue());
631 } 631 }
632 } 632 }
633 int next_fake_key = 1; 633 int next_fake_key = 1;
libqpdf/QPDF_String.cc
@@ -2,6 +2,7 @@ @@ -2,6 +2,7 @@
2 2
3 #include <qpdf/QPDFObjectHandle_private.hh> 3 #include <qpdf/QPDFObjectHandle_private.hh>
4 #include <qpdf/QUtil.hh> 4 #include <qpdf/QUtil.hh>
  5 +#include <qpdf/Util.hh>
5 6
6 // DO NOT USE ctype -- it is locale dependent for some things, and it's not worth the risk of 7 // DO NOT USE ctype -- it is locale dependent for some things, and it's not worth the risk of
7 // including it in case it may accidentally be used. 8 // including it in case it may accidentally be used.
@@ -9,40 +10,46 @@ @@ -9,40 +10,46 @@
9 static bool 10 static bool
10 is_iso_latin1_printable(char ch) 11 is_iso_latin1_printable(char ch)
11 { 12 {
12 - return (((ch >= 32) && (ch <= 126)) || (static_cast<unsigned char>(ch) >= 160));  
13 -}  
14 -  
15 -std::shared_ptr<QPDFObject>  
16 -QPDF_String::create_utf16(std::string const& utf8_val)  
17 -{  
18 - std::string result;  
19 - if (!QUtil::utf8_to_pdf_doc(utf8_val, result, '?')) {  
20 - result = QUtil::utf8_to_utf16(utf8_val);  
21 - }  
22 - return QPDFObject::create<QPDF_String>(result); 13 + return (ch >= 32 && ch <= 126) || static_cast<unsigned char>(ch) >= 160;
23 } 14 }
24 15
25 void 16 void
26 QPDF_String::writeJSON(int json_version, JSON::Writer& p) 17 QPDF_String::writeJSON(int json_version, JSON::Writer& p)
27 { 18 {
28 - auto candidate = getUTF8Val();  
29 if (json_version == 1) { 19 if (json_version == 1) {
30 - p << "\"" << JSON::Writer::encode_string(candidate) << "\"";  
31 - } else {  
32 - // See if we can unambiguously represent as Unicode.  
33 - if (QUtil::is_utf16(val) || QUtil::is_explicit_utf8(val)) { 20 + if (util::is_utf16(val)) {
  21 + p << "\"" << JSON::Writer::encode_string(QUtil::utf16_to_utf8(val)) << "\"";
  22 + return;
  23 + }
  24 + if (util::is_explicit_utf8(val)) {
  25 + // PDF 2.0 allows UTF-8 strings when explicitly prefixed with the three-byte
  26 + // representation of U+FEFF.
  27 + p << "\"" << JSON::Writer::encode_string(val.substr(3)) << "\"";
  28 + return;
  29 + }
  30 + p << "\"" << JSON::Writer::encode_string(QUtil::pdf_doc_to_utf8(val)) << "\"";
  31 + return;
  32 + }
  33 + // See if we can unambiguously represent as Unicode.
  34 + if (util::is_utf16(val)) {
  35 + p << "\"u:" << JSON::Writer::encode_string(QUtil::utf16_to_utf8(val)) << "\"";
  36 + return;
  37 + }
  38 + // See if we can unambiguously represent as Unicode.
  39 + if (util::is_explicit_utf8(val)) {
  40 + p << "\"u:" << JSON::Writer::encode_string(val.substr(3)) << "\"";
  41 + return;
  42 + }
  43 + if (!useHexString()) {
  44 + auto candidate = QUtil::pdf_doc_to_utf8(val);
  45 + std::string test;
  46 + if (QUtil::utf8_to_pdf_doc(candidate, test, '?') && test == val) {
  47 + // This is a PDF-doc string that can be losslessly encoded as Unicode.
34 p << "\"u:" << JSON::Writer::encode_string(candidate) << "\""; 48 p << "\"u:" << JSON::Writer::encode_string(candidate) << "\"";
35 return; 49 return;
36 - } else if (!useHexString()) {  
37 - std::string test;  
38 - if (QUtil::utf8_to_pdf_doc(candidate, test, '?') && (test == val)) {  
39 - // This is a PDF-doc string that can be losslessly encoded as Unicode.  
40 - p << "\"u:" << JSON::Writer::encode_string(candidate) << "\"";  
41 - return;  
42 - }  
43 } 50 }
44 - p << "\"b:" << QUtil::hex_encode(val) << "\"";  
45 } 51 }
  52 + p << "\"b:" << QUtil::hex_encode(val) << "\"";
46 } 53 }
47 54
48 bool 55 bool
@@ -133,17 +140,3 @@ QPDF_String::unparse(bool force_binary) @@ -133,17 +140,3 @@ QPDF_String::unparse(bool force_binary)
133 140
134 return result; 141 return result;
135 } 142 }
136 -  
137 -std::string  
138 -QPDF_String::getUTF8Val() const  
139 -{  
140 - if (QUtil::is_utf16(val)) {  
141 - return QUtil::utf16_to_utf8(val);  
142 - } else if (QUtil::is_explicit_utf8(val)) {  
143 - // PDF 2.0 allows UTF-8 strings when explicitly prefixed with the three-byte representation  
144 - // of U+FEFF.  
145 - return val.substr(3);  
146 - } else {  
147 - return QUtil::pdf_doc_to_utf8(val);  
148 - }  
149 -}  
libqpdf/QPDF_json.cc
@@ -693,7 +693,7 @@ QPDF::JSONReactor::setObjectDescription(QPDFObjectHandle&amp; oh, JSON const&amp; value) @@ -693,7 +693,7 @@ QPDF::JSONReactor::setObjectDescription(QPDFObjectHandle&amp; oh, JSON const&amp; value)
693 QPDFObject::JSON_Descr(j_descr.input, cur_object)); 693 QPDFObject::JSON_Descr(j_descr.input, cur_object));
694 } 694 }
695 695
696 - oh.getObjectPtr()->setDescription(&pdf, descr, value.getStart()); 696 + oh.obj_sp()->setDescription(&pdf, descr, value.getStart());
697 } 697 }
698 698
699 QPDFObjectHandle 699 QPDFObjectHandle
libqpdf/QPDF_objects.cc
@@ -1554,7 +1554,7 @@ Objects::readObjectAtOffset( @@ -1554,7 +1554,7 @@ Objects::readObjectAtOffset(
1554 break; 1554 break;
1555 } 1555 }
1556 } 1556 }
1557 - m->objects.updateCache(og, oh.getObj(), end_before_space, m->file->tell()); 1557 + m->objects.updateCache(og, oh.obj_sp(), end_before_space, m->file->tell());
1558 } 1558 }
1559 1559
1560 QPDFObjectHandle 1560 QPDFObjectHandle
@@ -1613,7 +1613,7 @@ Objects::readObjectAtOffset( @@ -1613,7 +1613,7 @@ Objects::readObjectAtOffset(
1613 break; 1613 break;
1614 } 1614 }
1615 } 1615 }
1616 - m->objects.updateCache(og, oh.getObj(), end_before_space, m->file->tell()); 1616 + m->objects.updateCache(og, oh.obj_sp(), end_before_space, m->file->tell());
1617 1617
1618 return oh; 1618 return oh;
1619 } 1619 }
@@ -1805,7 +1805,7 @@ Objects::resolveObjectsInStream(int obj_stream_number) @@ -1805,7 +1805,7 @@ Objects::resolveObjectsInStream(int obj_stream_number)
1805 entry->second.getObjStreamNumber() == obj_stream_number) { 1805 entry->second.getObjStreamNumber() == obj_stream_number) {
1806 is::OffsetBuffer in("", {b_start + obj_offset, obj_size}, obj_offset); 1806 is::OffsetBuffer in("", {b_start + obj_offset, obj_size}, obj_offset);
1807 auto oh = readObjectInStream(in, obj_stream_number, obj_id); 1807 auto oh = readObjectInStream(in, obj_stream_number, obj_id);
1808 - updateCache(og, oh.getObj(), end_before_space, end_after_space); 1808 + updateCache(og, oh.obj_sp(), end_before_space, end_after_space);
1809 } else { 1809 } else {
1810 QTC::TC("qpdf", "QPDF not caching overridden objstm object"); 1810 QTC::TC("qpdf", "QPDF not caching overridden objstm object");
1811 } 1811 }
@@ -1874,7 +1874,7 @@ QPDF::makeIndirectObject(QPDFObjectHandle oh) @@ -1874,7 +1874,7 @@ QPDF::makeIndirectObject(QPDFObjectHandle oh)
1874 if (!oh) { 1874 if (!oh) {
1875 throw std::logic_error("attempted to make an uninitialized QPDFObjectHandle indirect"); 1875 throw std::logic_error("attempted to make an uninitialized QPDFObjectHandle indirect");
1876 } 1876 }
1877 - return m->objects.makeIndirectFromQPDFObject(oh.getObj()); 1877 + return m->objects.makeIndirectFromQPDFObject(oh.obj_sp());
1878 } 1878 }
1879 1879
1880 std::shared_ptr<QPDFObject> 1880 std::shared_ptr<QPDFObject>
@@ -1935,7 +1935,7 @@ QPDF::replaceObject(QPDFObjGen og, QPDFObjectHandle oh) @@ -1935,7 +1935,7 @@ QPDF::replaceObject(QPDFObjGen og, QPDFObjectHandle oh)
1935 if (!oh || (oh.isIndirect() && !(oh.isStream() && oh.getObjGen() == og))) { 1935 if (!oh || (oh.isIndirect() && !(oh.isStream() && oh.getObjGen() == og))) {
1936 throw std::logic_error("QPDF::replaceObject called with indirect object handle"); 1936 throw std::logic_error("QPDF::replaceObject called with indirect object handle");
1937 } 1937 }
1938 - m->objects.updateCache(og, oh.getObj(), -1, -1, false); 1938 + m->objects.updateCache(og, oh.obj_sp(), -1, -1, false);
1939 } 1939 }
1940 1940
1941 void 1941 void
libqpdf/QUtil.cc
@@ -1688,19 +1688,13 @@ QUtil::utf8_to_pdf_doc(std::string const&amp; utf8, std::string&amp; pdfdoc, char unknow @@ -1688,19 +1688,13 @@ QUtil::utf8_to_pdf_doc(std::string const&amp; utf8, std::string&amp; pdfdoc, char unknow
1688 bool 1688 bool
1689 QUtil::is_utf16(std::string const& val) 1689 QUtil::is_utf16(std::string const& val)
1690 { 1690 {
1691 - return (  
1692 - (val.length() >= 2) &&  
1693 - (((val.at(0) == '\xfe') && (val.at(1) == '\xff')) ||  
1694 - ((val.at(0) == '\xff') && (val.at(1) == '\xfe')))); 1691 + return util::is_utf16(val);
1695 } 1692 }
1696 1693
1697 bool 1694 bool
1698 QUtil::is_explicit_utf8(std::string const& val) 1695 QUtil::is_explicit_utf8(std::string const& val)
1699 { 1696 {
1700 - // QPDF_String.cc knows that this is a 3-byte sequence.  
1701 - return (  
1702 - (val.length() >= 3) && (val.at(0) == '\xef') && (val.at(1) == '\xbb') &&  
1703 - (val.at(2) == '\xbf')); 1697 + return util::is_explicit_utf8(val);
1704 } 1698 }
1705 1699
1706 std::string 1700 std::string
libqpdf/qpdf/QPDFObjectHandle_private.hh
@@ -412,15 +412,15 @@ namespace qpdf @@ -412,15 +412,15 @@ namespace qpdf
412 { 412 {
413 } 413 }
414 414
415 - // Return the name value. If the object is not a valid Name, throw a  
416 - // std::invalid_argument exception. 415 + // Return the name value. If the object is not a valid Name, throw a std::invalid_argument
  416 + // exception.
417 operator std::string() const& 417 operator std::string() const&
418 { 418 {
419 return value(); 419 return value();
420 } 420 }
421 421
422 - // Return the integer value. If the object is not a valid integer, throw a  
423 - // std::invalid_argument exception. 422 + // Return the name value. If the object is not a valid name, throw a std::invalid_argument
  423 + // exception.
424 std::string const& value() const; 424 std::string const& value() const;
425 425
426 // Return true if object value is equal to the 'rhs' value. Return false if the object is 426 // Return true if object value is equal to the 'rhs' value. Return false if the object is
@@ -589,7 +589,56 @@ namespace qpdf @@ -589,7 +589,56 @@ namespace qpdf
589 void warn(std::string const& message); 589 void warn(std::string const& message);
590 590
591 static std::map<std::string, std::string> filter_abbreviations; 591 static std::map<std::string, std::string> filter_abbreviations;
592 - }; 592 + }; // class Stream
  593 +
  594 + class String final: public BaseHandle
  595 + {
  596 + public:
  597 + String() = default;
  598 + String(String const&) = default;
  599 + String(String&&) = default;
  600 + String& operator=(String const&) = default;
  601 + String& operator=(String&&) = default;
  602 + ~String() = default;
  603 +
  604 + explicit String(std::string const&);
  605 + explicit String(std::string&&);
  606 +
  607 + String(QPDFObjectHandle const& oh) :
  608 + BaseHandle(oh.type_code() == ::ot_string ? oh : QPDFObjectHandle())
  609 + {
  610 + }
  611 +
  612 + String(QPDFObjectHandle&& oh) :
  613 + BaseHandle(oh.type_code() == ::ot_string ? std::move(oh) : QPDFObjectHandle())
  614 + {
  615 + }
  616 +
  617 + static String utf16(std::string const&);
  618 +
  619 + // Return the string value. If the object is not a valid string, throw a
  620 + // std::invalid_argument exception.
  621 + operator std::string() const&
  622 + {
  623 + return value();
  624 + }
  625 +
  626 + // Return the string value. If the object is not a valid string, throw a
  627 + // std::invalid_argument exception.
  628 + std::string const& value() const;
  629 +
  630 + // Return the string value. If the object is not a valid string, throw a
  631 + // std::invalid_argument exception.
  632 + std::string utf8_value() const;
  633 +
  634 + // Return true if object value is equal to the 'rhs' value. Return false if the object is
  635 + // not a valid String.
  636 + friend bool
  637 + operator==(String const& lhs, std::string_view rhs)
  638 + {
  639 + return lhs && lhs.value() == rhs;
  640 + }
  641 + }; // class String
593 642
594 template <typename T> 643 template <typename T>
595 T* 644 T*
@@ -621,6 +670,18 @@ namespace qpdf @@ -621,6 +670,18 @@ namespace qpdf
621 { 670 {
622 } 671 }
623 672
  673 + inline std::shared_ptr<QPDFObject> const&
  674 + BaseHandle::obj_sp() const
  675 + {
  676 + return obj;
  677 + }
  678 +
  679 + inline QPDFObjectHandle
  680 + BaseHandle::oh() const
  681 + {
  682 + return {obj};
  683 + }
  684 +
624 inline void 685 inline void
625 BaseHandle::assign(qpdf_object_type_e required, BaseHandle const& other) 686 BaseHandle::assign(qpdf_object_type_e required, BaseHandle const& other)
626 { 687 {
libqpdf/qpdf/QPDFObject_private.hh
@@ -30,6 +30,7 @@ namespace qpdf @@ -30,6 +30,7 @@ namespace qpdf
30 class Integer; 30 class Integer;
31 class Name; 31 class Name;
32 class Stream; 32 class Stream;
  33 + class String;
33 34
34 namespace impl 35 namespace impl
35 { 36 {
@@ -261,20 +262,24 @@ class QPDF_String final @@ -261,20 +262,24 @@ class QPDF_String final
261 { 262 {
262 friend class QPDFObject; 263 friend class QPDFObject;
263 friend class qpdf::BaseHandle; 264 friend class qpdf::BaseHandle;
  265 + friend class qpdf::String;
264 friend class qpdf::impl::Writer; 266 friend class qpdf::impl::Writer;
265 267
266 public: 268 public:
267 - static std::shared_ptr<QPDFObject> create_utf16(std::string const& utf8_val);  
268 std::string unparse(bool force_binary = false); 269 std::string unparse(bool force_binary = false);
269 void writeJSON(int json_version, JSON::Writer& p); 270 void writeJSON(int json_version, JSON::Writer& p);
270 - std::string getUTF8Val() const;  
271 271
272 private: 272 private:
273 - QPDF_String(std::string val) : 273 + QPDF_String(std::string const& val) :
  274 + val(val)
  275 + {
  276 + }
  277 + QPDF_String(std::string&& val) :
274 val(std::move(val)) 278 val(std::move(val))
275 { 279 {
276 } 280 }
277 bool useHexString() const; 281 bool useHexString() const;
  282 +
278 std::string val; 283 std::string val;
279 }; 284 };
280 285
libqpdf/qpdf/Util.hh
@@ -74,6 +74,19 @@ namespace qpdf::util @@ -74,6 +74,19 @@ namespace qpdf::util
74 s.insert(0, 1, '1'); 74 s.insert(0, 1, '1');
75 } 75 }
76 76
  77 + inline bool
  78 + is_utf16(std::string const& str)
  79 + {
  80 + return str.starts_with("\xfe\xff") || str.starts_with("\xff\xfe");
  81 + }
  82 +
  83 + inline bool
  84 + is_explicit_utf8(std::string const& str)
  85 + {
  86 + // QPDF_String.cc knows that this is a 3-byte sequence.
  87 + return str.starts_with("\xef\xbb\xbf");
  88 + }
  89 +
77 std::string random_string(size_t len); 90 std::string random_string(size_t len);
78 91
79 } // namespace qpdf::util 92 } // namespace qpdf::util
libtests/qutil.cc
@@ -367,6 +367,16 @@ check_analyze(std::string const&amp; str, bool has8bit, bool utf8, bool utf16) @@ -367,6 +367,16 @@ check_analyze(std::string const&amp; str, bool has8bit, bool utf8, bool utf16)
367 } 367 }
368 368
369 void 369 void
  370 +explicit_utf8_test()
  371 +{
  372 + assert(QUtil::is_explicit_utf8("\xef\xbb\xbfnot empty"));
  373 + assert(QUtil::is_explicit_utf8("\xef\xbb\xbf"));
  374 + assert(!QUtil::is_explicit_utf8("\xef\xbb\xbenot explicit"));
  375 + assert(!QUtil::is_explicit_utf8("\xef\xbe\xbfnot explicit"));
  376 + assert(!QUtil::is_explicit_utf8("\xee\xbb\xbfnot explicit"));
  377 +}
  378 +
  379 +void
370 print_alternatives(std::string const& str) 380 print_alternatives(std::string const& str)
371 { 381 {
372 std::vector<std::string> result = QUtil::possible_repaired_encodings(str); 382 std::vector<std::string> result = QUtil::possible_repaired_encodings(str);
@@ -432,7 +442,7 @@ transcoding_test() @@ -432,7 +442,7 @@ transcoding_test()
432 std::string other_to_utf8; 442 std::string other_to_utf8;
433 assert(!QUtil::utf8_to_pdf_doc(other_utf8, other_to_utf8)); 443 assert(!QUtil::utf8_to_pdf_doc(other_utf8, other_to_utf8));
434 std::cout << other_to_utf8 << '\n'; 444 std::cout << other_to_utf8 << '\n';
435 - std::cout << "done other characters" << '\n'; 445 + std::cout << "done other characters\n";
436 // These valid UTF8 strings when converted to PDFDoc would end up 446 // These valid UTF8 strings when converted to PDFDoc would end up
437 // with a byte sequence that would be recognized as UTF-8 or 447 // with a byte sequence that would be recognized as UTF-8 or
438 // UTF-16 rather than PDFDoc. A special case is required to store 448 // UTF-16 rather than PDFDoc. A special case is required to store
@@ -747,6 +757,7 @@ main(int argc, char* argv[]) @@ -747,6 +757,7 @@ main(int argc, char* argv[])
747 getenv_test(); 757 getenv_test();
748 std::cout << "---- utf8" << '\n'; 758 std::cout << "---- utf8" << '\n';
749 to_utf8_test(); 759 to_utf8_test();
  760 + explicit_utf8_test();
750 std::cout << "---- utf16" << '\n'; 761 std::cout << "---- utf16" << '\n';
751 to_utf16_test(); 762 to_utf16_test();
752 std::cout << "---- utf8_to_ascii" << '\n'; 763 std::cout << "---- utf8_to_ascii" << '\n';
qpdf/qpdf.testcov
@@ -174,8 +174,6 @@ QPDFParser eof in parse 0 @@ -174,8 +174,6 @@ QPDFParser eof in parse 0
174 QPDFParser eof in parseRemainder 0 174 QPDFParser eof in parseRemainder 0
175 QPDFObjectHandle boolean returning false 0 175 QPDFObjectHandle boolean returning false 0
176 QPDFObjectHandle real returning 0.0 0 176 QPDFObjectHandle real returning 0.0 0
177 -QPDFObjectHandle string returning empty string 0  
178 -QPDFObjectHandle string returning empty utf8 0  
179 QPDFObjectHandle operator returning fake value 0 177 QPDFObjectHandle operator returning fake value 0
180 QPDFObjectHandle inlineimage returning empty data 0 178 QPDFObjectHandle inlineimage returning empty data 0
181 QPDFObjectHandle array treating as empty vector 0 179 QPDFObjectHandle array treating as empty vector 0
qpdf/qtest/json.test
@@ -38,7 +38,7 @@ my @json_files = ( @@ -38,7 +38,7 @@ my @json_files = (
38 ['page-labels-and-outlines', 38 ['page-labels-and-outlines',
39 ['--json-key=objects', '--json-object=trailer', '--json-object=2 0 R']], 39 ['--json-key=objects', '--json-object=trailer', '--json-object=2 0 R']],
40 ['field-types', ['--json-key=acroform']], 40 ['field-types', ['--json-key=acroform']],
41 - ['need-appearances', ['--json-key=acroform']], 41 + ['need-appearances-utf8', ['--json-key=acroform']],
42 ['V4-aes', ['--json-key=encrypt']], 42 ['V4-aes', ['--json-key=encrypt']],
43 ['V4-aes', ['--json-key=encrypt', '--show-encryption-key']], 43 ['V4-aes', ['--json-key=encrypt', '--show-encryption-key']],
44 ); 44 );
qpdf/qtest/qpdf/json-need-appearances-acroform-v1.out renamed to qpdf/qtest/qpdf/json-need-appearances-utf8-acroform-v1.out
@@ -27,7 +27,7 @@ @@ -27,7 +27,7 @@
27 "parent": null, 27 "parent": null,
28 "partialname": "text", 28 "partialname": "text",
29 "quadding": 0, 29 "quadding": 0,
30 - "value": "abc" 30 + "value": "abcde"
31 }, 31 },
32 { 32 {
33 "alternativename": "r1", 33 "alternativename": "r1",
qpdf/qtest/qpdf/json-need-appearances-acroform-v2.out renamed to qpdf/qtest/qpdf/json-need-appearances-utf8-acroform-v2.out
@@ -27,7 +27,7 @@ @@ -27,7 +27,7 @@
27 "parent": null, 27 "parent": null,
28 "partialname": "text", 28 "partialname": "text",
29 "quadding": 0, 29 "quadding": 0,
30 - "value": "u:abc" 30 + "value": "u:abcde"
31 }, 31 },
32 { 32 {
33 "alternativename": "r1", 33 "alternativename": "r1",
qpdf/qtest/qpdf/need-appearances-utf8.pdf 0 → 100644
No preview for this file type