Commit 2d6512eb3e7fd508de452b5bfa75dd7e224142da
Committed by
GitHub
Merge pull request #1575 from m-holger/string
Add new private-API class String
Showing
18 changed files
with
4006 additions
and
115 deletions
include/qpdf/ObjectHandle.hh
| ... | ... | @@ -105,6 +105,9 @@ namespace qpdf |
| 105 | 105 | void warn(QPDFExc&&) const; |
| 106 | 106 | void warn(std::string const& warning) const; |
| 107 | 107 | |
| 108 | + inline std::shared_ptr<QPDFObject> const& obj_sp() const; | |
| 109 | + inline QPDFObjectHandle oh() const; | |
| 110 | + | |
| 108 | 111 | protected: |
| 109 | 112 | BaseHandle() = default; |
| 110 | 113 | BaseHandle(std::shared_ptr<QPDFObject> const& obj) : | ... | ... |
include/qpdf/QPDFObjectHandle.hh
| ... | ... | @@ -1321,21 +1321,6 @@ class QPDFObjectHandle: public qpdf::BaseHandle |
| 1321 | 1321 | { |
| 1322 | 1322 | return obj; |
| 1323 | 1323 | } |
| 1324 | - std::shared_ptr<QPDFObject> | |
| 1325 | - getObj() const | |
| 1326 | - { | |
| 1327 | - return obj; | |
| 1328 | - } | |
| 1329 | - QPDFObject* | |
| 1330 | - getObjectPtr() | |
| 1331 | - { | |
| 1332 | - return obj.get(); | |
| 1333 | - } | |
| 1334 | - QPDFObject* const | |
| 1335 | - getObjectPtr() const | |
| 1336 | - { | |
| 1337 | - return obj.get(); | |
| 1338 | - } | |
| 1339 | 1324 | |
| 1340 | 1325 | void writeJSON(int json_version, JSON::Writer& p, bool dereference_indirect = false) const; |
| 1341 | 1326 | ... | ... |
libqpdf/QPDFFormFieldObjectHelper.cc
| ... | ... | @@ -765,7 +765,7 @@ QPDFFormFieldObjectHelper::generateTextAppearance(QPDFAnnotationObjectHelper& ao |
| 765 | 765 | return; |
| 766 | 766 | } |
| 767 | 767 | |
| 768 | - if (AS.getObj().use_count() > 4) { | |
| 768 | + if (AS.obj_sp().use_count() > 3) { | |
| 769 | 769 | aoh.warn("unable to generate text appearance from shared appearance stream for update"); |
| 770 | 770 | return; |
| 771 | 771 | } | ... | ... |
libqpdf/QPDFObjectHandle.cc
| ... | ... | @@ -318,7 +318,7 @@ BaseHandle::copy(bool shallow) const |
| 318 | 318 | throw std::logic_error("attempted to shallow copy QPDFObjectHandle from destroyed QPDF"); |
| 319 | 319 | return {}; // does not return |
| 320 | 320 | case ::ot_reference: |
| 321 | - return obj->qpdf->getObject(obj->og).getObj(); | |
| 321 | + return obj->qpdf->getObject(obj->og).obj_sp(); | |
| 322 | 322 | } |
| 323 | 323 | return {}; // unreachable |
| 324 | 324 | } |
| ... | ... | @@ -470,7 +470,7 @@ BaseHandle::write_json(int json_version, JSON::Writer& p) const |
| 470 | 470 | p.writeNext() << "null"; |
| 471 | 471 | } |
| 472 | 472 | p.writeNext(); |
| 473 | - auto item_og = value.getObj()->getObjGen(); | |
| 473 | + auto item_og = value.id_gen(); | |
| 474 | 474 | if (item_og.isIndirect()) { |
| 475 | 475 | p << "\"" << item_og.unparse(' ') << " R\""; |
| 476 | 476 | } else { |
| ... | ... | @@ -999,50 +999,110 @@ QPDFObjectHandle::getValueAsName(std::string& value) const |
| 999 | 999 | return true; |
| 1000 | 1000 | } |
| 1001 | 1001 | |
| 1002 | -// String accessors | |
| 1002 | +// String methods | |
| 1003 | + | |
| 1004 | +QPDFObjectHandle | |
| 1005 | +QPDFObjectHandle::newString(std::string const& str) | |
| 1006 | +{ | |
| 1007 | + return {QPDFObject::create<QPDF_String>(str)}; | |
| 1008 | +} | |
| 1009 | + | |
| 1010 | +QPDFObjectHandle | |
| 1011 | +QPDFObjectHandle::newUnicodeString(std::string const& utf8_str) | |
| 1012 | +{ | |
| 1013 | + return {String::utf16(utf8_str).obj_sp()}; | |
| 1014 | +} | |
| 1015 | + | |
| 1016 | +String::String(std::string const& str) : | |
| 1017 | + BaseHandle(QPDFObject::create<QPDF_String>(str)) | |
| 1018 | +{ | |
| 1019 | +} | |
| 1020 | + | |
| 1021 | +String::String(std::string&& str) : | |
| 1022 | + BaseHandle(QPDFObject::create<QPDF_String>(std::move(str))) | |
| 1023 | +{ | |
| 1024 | +} | |
| 1025 | + | |
| 1026 | +String | |
| 1027 | +String::utf16(std::string const& utf8_str) | |
| 1028 | +{ | |
| 1029 | + std::string result; | |
| 1030 | + if (QUtil::utf8_to_pdf_doc(utf8_str, result, '?')) { | |
| 1031 | + return String(result); | |
| 1032 | + } | |
| 1033 | + return String(QUtil::utf8_to_utf16(utf8_str)); | |
| 1034 | +} | |
| 1035 | + | |
| 1036 | +std::string const& | |
| 1037 | +String::value() const | |
| 1038 | +{ | |
| 1039 | + auto* s = as<QPDF_String>(); | |
| 1040 | + if (!s) { | |
| 1041 | + throw invalid_error("String"); | |
| 1042 | + } | |
| 1043 | + return s->val; | |
| 1044 | +} | |
| 1045 | + | |
| 1046 | +std::string | |
| 1047 | +String::utf8_value() const | |
| 1048 | +{ | |
| 1049 | + auto* s = as<QPDF_String>(); | |
| 1050 | + if (!s) { | |
| 1051 | + throw invalid_error("String"); | |
| 1052 | + } | |
| 1053 | + if (util::is_utf16(s->val)) { | |
| 1054 | + return QUtil::utf16_to_utf8(s->val); | |
| 1055 | + } | |
| 1056 | + if (util::is_explicit_utf8(s->val)) { | |
| 1057 | + // PDF 2.0 allows UTF-8 strings when explicitly prefixed with the three-byte representation | |
| 1058 | + // of U+FEFF. | |
| 1059 | + return s->val.substr(3); | |
| 1060 | + } | |
| 1061 | + return QUtil::pdf_doc_to_utf8(s->val); | |
| 1062 | +} | |
| 1003 | 1063 | |
| 1004 | 1064 | std::string |
| 1005 | 1065 | QPDFObjectHandle::getStringValue() const |
| 1006 | 1066 | { |
| 1007 | - if (isString()) { | |
| 1008 | - return obj->getStringValue(); | |
| 1009 | - } else { | |
| 1067 | + try { | |
| 1068 | + return String(obj).value(); | |
| 1069 | + } catch (std::invalid_argument&) { | |
| 1010 | 1070 | typeWarning("string", "returning empty string"); |
| 1011 | - QTC::TC("qpdf", "QPDFObjectHandle string returning empty string"); | |
| 1012 | - return ""; | |
| 1071 | + return {}; | |
| 1013 | 1072 | } |
| 1014 | 1073 | } |
| 1015 | 1074 | |
| 1016 | 1075 | bool |
| 1017 | 1076 | QPDFObjectHandle::getValueAsString(std::string& value) const |
| 1018 | 1077 | { |
| 1019 | - if (!isString()) { | |
| 1078 | + try { | |
| 1079 | + value = String(obj).value(); | |
| 1080 | + return true; | |
| 1081 | + } catch (std::invalid_argument&) { | |
| 1020 | 1082 | return false; |
| 1021 | 1083 | } |
| 1022 | - value = obj->getStringValue(); | |
| 1023 | - return true; | |
| 1024 | 1084 | } |
| 1025 | 1085 | |
| 1026 | 1086 | std::string |
| 1027 | 1087 | QPDFObjectHandle::getUTF8Value() const |
| 1028 | 1088 | { |
| 1029 | - if (auto str = as<QPDF_String>()) { | |
| 1030 | - return str->getUTF8Val(); | |
| 1031 | - } else { | |
| 1089 | + try { | |
| 1090 | + return String(obj).utf8_value(); | |
| 1091 | + } catch (std::invalid_argument&) { | |
| 1032 | 1092 | typeWarning("string", "returning empty string"); |
| 1033 | - QTC::TC("qpdf", "QPDFObjectHandle string returning empty utf8"); | |
| 1034 | - return ""; | |
| 1093 | + return {}; | |
| 1035 | 1094 | } |
| 1036 | 1095 | } |
| 1037 | 1096 | |
| 1038 | 1097 | bool |
| 1039 | 1098 | QPDFObjectHandle::getValueAsUTF8(std::string& value) const |
| 1040 | 1099 | { |
| 1041 | - if (auto str = as<QPDF_String>()) { | |
| 1042 | - value = str->getUTF8Val(); | |
| 1100 | + try { | |
| 1101 | + value = String(obj).utf8_value(); | |
| 1043 | 1102 | return true; |
| 1103 | + } catch (std::invalid_argument&) { | |
| 1104 | + return false; | |
| 1044 | 1105 | } |
| 1045 | - return false; | |
| 1046 | 1106 | } |
| 1047 | 1107 | |
| 1048 | 1108 | // Operator and Inline Image accessors |
| ... | ... | @@ -1718,18 +1778,6 @@ QPDFObjectHandle::newReal(double value, int decimal_places, bool trim_trailing_z |
| 1718 | 1778 | } |
| 1719 | 1779 | |
| 1720 | 1780 | QPDFObjectHandle |
| 1721 | -QPDFObjectHandle::newString(std::string const& str) | |
| 1722 | -{ | |
| 1723 | - return {QPDFObject::create<QPDF_String>(str)}; | |
| 1724 | -} | |
| 1725 | - | |
| 1726 | -QPDFObjectHandle | |
| 1727 | -QPDFObjectHandle::newUnicodeString(std::string const& utf8_str) | |
| 1728 | -{ | |
| 1729 | - return {QPDF_String::create_utf16(utf8_str)}; | |
| 1730 | -} | |
| 1731 | - | |
| 1732 | -QPDFObjectHandle | |
| 1733 | 1781 | QPDFObjectHandle::newOperator(std::string const& value) |
| 1734 | 1782 | { |
| 1735 | 1783 | return {QPDFObject::create<QPDF_Operator>(value)}; | ... | ... |
libqpdf/QPDFParser.cc
| ... | ... | @@ -626,8 +626,8 @@ QPDFParser::fixMissingKeys() |
| 626 | 626 | { |
| 627 | 627 | std::set<std::string> names; |
| 628 | 628 | for (auto& obj: frame->olist) { |
| 629 | - if (obj.getObj()->getTypeCode() == ::ot_name) { | |
| 630 | - names.insert(obj.getObj()->getStringValue()); | |
| 629 | + if (obj.raw_type_code() == ::ot_name) { | |
| 630 | + names.insert(obj.obj_sp()->getStringValue()); | |
| 631 | 631 | } |
| 632 | 632 | } |
| 633 | 633 | int next_fake_key = 1; | ... | ... |
libqpdf/QPDF_String.cc
| ... | ... | @@ -2,6 +2,7 @@ |
| 2 | 2 | |
| 3 | 3 | #include <qpdf/QPDFObjectHandle_private.hh> |
| 4 | 4 | #include <qpdf/QUtil.hh> |
| 5 | +#include <qpdf/Util.hh> | |
| 5 | 6 | |
| 6 | 7 | // DO NOT USE ctype -- it is locale dependent for some things, and it's not worth the risk of |
| 7 | 8 | // including it in case it may accidentally be used. |
| ... | ... | @@ -9,40 +10,46 @@ |
| 9 | 10 | static bool |
| 10 | 11 | is_iso_latin1_printable(char ch) |
| 11 | 12 | { |
| 12 | - return (((ch >= 32) && (ch <= 126)) || (static_cast<unsigned char>(ch) >= 160)); | |
| 13 | -} | |
| 14 | - | |
| 15 | -std::shared_ptr<QPDFObject> | |
| 16 | -QPDF_String::create_utf16(std::string const& utf8_val) | |
| 17 | -{ | |
| 18 | - std::string result; | |
| 19 | - if (!QUtil::utf8_to_pdf_doc(utf8_val, result, '?')) { | |
| 20 | - result = QUtil::utf8_to_utf16(utf8_val); | |
| 21 | - } | |
| 22 | - return QPDFObject::create<QPDF_String>(result); | |
| 13 | + return (ch >= 32 && ch <= 126) || static_cast<unsigned char>(ch) >= 160; | |
| 23 | 14 | } |
| 24 | 15 | |
| 25 | 16 | void |
| 26 | 17 | QPDF_String::writeJSON(int json_version, JSON::Writer& p) |
| 27 | 18 | { |
| 28 | - auto candidate = getUTF8Val(); | |
| 29 | 19 | if (json_version == 1) { |
| 30 | - p << "\"" << JSON::Writer::encode_string(candidate) << "\""; | |
| 31 | - } else { | |
| 32 | - // See if we can unambiguously represent as Unicode. | |
| 33 | - if (QUtil::is_utf16(val) || QUtil::is_explicit_utf8(val)) { | |
| 20 | + if (util::is_utf16(val)) { | |
| 21 | + p << "\"" << JSON::Writer::encode_string(QUtil::utf16_to_utf8(val)) << "\""; | |
| 22 | + return; | |
| 23 | + } | |
| 24 | + if (util::is_explicit_utf8(val)) { | |
| 25 | + // PDF 2.0 allows UTF-8 strings when explicitly prefixed with the three-byte | |
| 26 | + // representation of U+FEFF. | |
| 27 | + p << "\"" << JSON::Writer::encode_string(val.substr(3)) << "\""; | |
| 28 | + return; | |
| 29 | + } | |
| 30 | + p << "\"" << JSON::Writer::encode_string(QUtil::pdf_doc_to_utf8(val)) << "\""; | |
| 31 | + return; | |
| 32 | + } | |
| 33 | + // See if we can unambiguously represent as Unicode. | |
| 34 | + if (util::is_utf16(val)) { | |
| 35 | + p << "\"u:" << JSON::Writer::encode_string(QUtil::utf16_to_utf8(val)) << "\""; | |
| 36 | + return; | |
| 37 | + } | |
| 38 | + // See if we can unambiguously represent as Unicode. | |
| 39 | + if (util::is_explicit_utf8(val)) { | |
| 40 | + p << "\"u:" << JSON::Writer::encode_string(val.substr(3)) << "\""; | |
| 41 | + return; | |
| 42 | + } | |
| 43 | + if (!useHexString()) { | |
| 44 | + auto candidate = QUtil::pdf_doc_to_utf8(val); | |
| 45 | + std::string test; | |
| 46 | + if (QUtil::utf8_to_pdf_doc(candidate, test, '?') && test == val) { | |
| 47 | + // This is a PDF-doc string that can be losslessly encoded as Unicode. | |
| 34 | 48 | p << "\"u:" << JSON::Writer::encode_string(candidate) << "\""; |
| 35 | 49 | return; |
| 36 | - } else if (!useHexString()) { | |
| 37 | - std::string test; | |
| 38 | - if (QUtil::utf8_to_pdf_doc(candidate, test, '?') && (test == val)) { | |
| 39 | - // This is a PDF-doc string that can be losslessly encoded as Unicode. | |
| 40 | - p << "\"u:" << JSON::Writer::encode_string(candidate) << "\""; | |
| 41 | - return; | |
| 42 | - } | |
| 43 | 50 | } |
| 44 | - p << "\"b:" << QUtil::hex_encode(val) << "\""; | |
| 45 | 51 | } |
| 52 | + p << "\"b:" << QUtil::hex_encode(val) << "\""; | |
| 46 | 53 | } |
| 47 | 54 | |
| 48 | 55 | bool |
| ... | ... | @@ -133,17 +140,3 @@ QPDF_String::unparse(bool force_binary) |
| 133 | 140 | |
| 134 | 141 | return result; |
| 135 | 142 | } |
| 136 | - | |
| 137 | -std::string | |
| 138 | -QPDF_String::getUTF8Val() const | |
| 139 | -{ | |
| 140 | - if (QUtil::is_utf16(val)) { | |
| 141 | - return QUtil::utf16_to_utf8(val); | |
| 142 | - } else if (QUtil::is_explicit_utf8(val)) { | |
| 143 | - // PDF 2.0 allows UTF-8 strings when explicitly prefixed with the three-byte representation | |
| 144 | - // of U+FEFF. | |
| 145 | - return val.substr(3); | |
| 146 | - } else { | |
| 147 | - return QUtil::pdf_doc_to_utf8(val); | |
| 148 | - } | |
| 149 | -} | ... | ... |
libqpdf/QPDF_json.cc
| ... | ... | @@ -693,7 +693,7 @@ QPDF::JSONReactor::setObjectDescription(QPDFObjectHandle& oh, JSON const& value) |
| 693 | 693 | QPDFObject::JSON_Descr(j_descr.input, cur_object)); |
| 694 | 694 | } |
| 695 | 695 | |
| 696 | - oh.getObjectPtr()->setDescription(&pdf, descr, value.getStart()); | |
| 696 | + oh.obj_sp()->setDescription(&pdf, descr, value.getStart()); | |
| 697 | 697 | } |
| 698 | 698 | |
| 699 | 699 | QPDFObjectHandle | ... | ... |
libqpdf/QPDF_objects.cc
| ... | ... | @@ -1554,7 +1554,7 @@ Objects::readObjectAtOffset( |
| 1554 | 1554 | break; |
| 1555 | 1555 | } |
| 1556 | 1556 | } |
| 1557 | - m->objects.updateCache(og, oh.getObj(), end_before_space, m->file->tell()); | |
| 1557 | + m->objects.updateCache(og, oh.obj_sp(), end_before_space, m->file->tell()); | |
| 1558 | 1558 | } |
| 1559 | 1559 | |
| 1560 | 1560 | QPDFObjectHandle |
| ... | ... | @@ -1613,7 +1613,7 @@ Objects::readObjectAtOffset( |
| 1613 | 1613 | break; |
| 1614 | 1614 | } |
| 1615 | 1615 | } |
| 1616 | - m->objects.updateCache(og, oh.getObj(), end_before_space, m->file->tell()); | |
| 1616 | + m->objects.updateCache(og, oh.obj_sp(), end_before_space, m->file->tell()); | |
| 1617 | 1617 | |
| 1618 | 1618 | return oh; |
| 1619 | 1619 | } |
| ... | ... | @@ -1805,7 +1805,7 @@ Objects::resolveObjectsInStream(int obj_stream_number) |
| 1805 | 1805 | entry->second.getObjStreamNumber() == obj_stream_number) { |
| 1806 | 1806 | is::OffsetBuffer in("", {b_start + obj_offset, obj_size}, obj_offset); |
| 1807 | 1807 | auto oh = readObjectInStream(in, obj_stream_number, obj_id); |
| 1808 | - updateCache(og, oh.getObj(), end_before_space, end_after_space); | |
| 1808 | + updateCache(og, oh.obj_sp(), end_before_space, end_after_space); | |
| 1809 | 1809 | } else { |
| 1810 | 1810 | QTC::TC("qpdf", "QPDF not caching overridden objstm object"); |
| 1811 | 1811 | } |
| ... | ... | @@ -1874,7 +1874,7 @@ QPDF::makeIndirectObject(QPDFObjectHandle oh) |
| 1874 | 1874 | if (!oh) { |
| 1875 | 1875 | throw std::logic_error("attempted to make an uninitialized QPDFObjectHandle indirect"); |
| 1876 | 1876 | } |
| 1877 | - return m->objects.makeIndirectFromQPDFObject(oh.getObj()); | |
| 1877 | + return m->objects.makeIndirectFromQPDFObject(oh.obj_sp()); | |
| 1878 | 1878 | } |
| 1879 | 1879 | |
| 1880 | 1880 | std::shared_ptr<QPDFObject> |
| ... | ... | @@ -1935,7 +1935,7 @@ QPDF::replaceObject(QPDFObjGen og, QPDFObjectHandle oh) |
| 1935 | 1935 | if (!oh || (oh.isIndirect() && !(oh.isStream() && oh.getObjGen() == og))) { |
| 1936 | 1936 | throw std::logic_error("QPDF::replaceObject called with indirect object handle"); |
| 1937 | 1937 | } |
| 1938 | - m->objects.updateCache(og, oh.getObj(), -1, -1, false); | |
| 1938 | + m->objects.updateCache(og, oh.obj_sp(), -1, -1, false); | |
| 1939 | 1939 | } |
| 1940 | 1940 | |
| 1941 | 1941 | void | ... | ... |
libqpdf/QUtil.cc
| ... | ... | @@ -1688,19 +1688,13 @@ QUtil::utf8_to_pdf_doc(std::string const& utf8, std::string& pdfdoc, char unknow |
| 1688 | 1688 | bool |
| 1689 | 1689 | QUtil::is_utf16(std::string const& val) |
| 1690 | 1690 | { |
| 1691 | - return ( | |
| 1692 | - (val.length() >= 2) && | |
| 1693 | - (((val.at(0) == '\xfe') && (val.at(1) == '\xff')) || | |
| 1694 | - ((val.at(0) == '\xff') && (val.at(1) == '\xfe')))); | |
| 1691 | + return util::is_utf16(val); | |
| 1695 | 1692 | } |
| 1696 | 1693 | |
| 1697 | 1694 | bool |
| 1698 | 1695 | QUtil::is_explicit_utf8(std::string const& val) |
| 1699 | 1696 | { |
| 1700 | - // QPDF_String.cc knows that this is a 3-byte sequence. | |
| 1701 | - return ( | |
| 1702 | - (val.length() >= 3) && (val.at(0) == '\xef') && (val.at(1) == '\xbb') && | |
| 1703 | - (val.at(2) == '\xbf')); | |
| 1697 | + return util::is_explicit_utf8(val); | |
| 1704 | 1698 | } |
| 1705 | 1699 | |
| 1706 | 1700 | std::string | ... | ... |
libqpdf/qpdf/QPDFObjectHandle_private.hh
| ... | ... | @@ -412,15 +412,15 @@ namespace qpdf |
| 412 | 412 | { |
| 413 | 413 | } |
| 414 | 414 | |
| 415 | - // Return the name value. If the object is not a valid Name, throw a | |
| 416 | - // std::invalid_argument exception. | |
| 415 | + // Return the name value. If the object is not a valid Name, throw a std::invalid_argument | |
| 416 | + // exception. | |
| 417 | 417 | operator std::string() const& |
| 418 | 418 | { |
| 419 | 419 | return value(); |
| 420 | 420 | } |
| 421 | 421 | |
| 422 | - // Return the integer value. If the object is not a valid integer, throw a | |
| 423 | - // std::invalid_argument exception. | |
| 422 | + // Return the name value. If the object is not a valid name, throw a std::invalid_argument | |
| 423 | + // exception. | |
| 424 | 424 | std::string const& value() const; |
| 425 | 425 | |
| 426 | 426 | // Return true if object value is equal to the 'rhs' value. Return false if the object is |
| ... | ... | @@ -589,7 +589,56 @@ namespace qpdf |
| 589 | 589 | void warn(std::string const& message); |
| 590 | 590 | |
| 591 | 591 | static std::map<std::string, std::string> filter_abbreviations; |
| 592 | - }; | |
| 592 | + }; // class Stream | |
| 593 | + | |
| 594 | + class String final: public BaseHandle | |
| 595 | + { | |
| 596 | + public: | |
| 597 | + String() = default; | |
| 598 | + String(String const&) = default; | |
| 599 | + String(String&&) = default; | |
| 600 | + String& operator=(String const&) = default; | |
| 601 | + String& operator=(String&&) = default; | |
| 602 | + ~String() = default; | |
| 603 | + | |
| 604 | + explicit String(std::string const&); | |
| 605 | + explicit String(std::string&&); | |
| 606 | + | |
| 607 | + String(QPDFObjectHandle const& oh) : | |
| 608 | + BaseHandle(oh.type_code() == ::ot_string ? oh : QPDFObjectHandle()) | |
| 609 | + { | |
| 610 | + } | |
| 611 | + | |
| 612 | + String(QPDFObjectHandle&& oh) : | |
| 613 | + BaseHandle(oh.type_code() == ::ot_string ? std::move(oh) : QPDFObjectHandle()) | |
| 614 | + { | |
| 615 | + } | |
| 616 | + | |
| 617 | + static String utf16(std::string const&); | |
| 618 | + | |
| 619 | + // Return the string value. If the object is not a valid string, throw a | |
| 620 | + // std::invalid_argument exception. | |
| 621 | + operator std::string() const& | |
| 622 | + { | |
| 623 | + return value(); | |
| 624 | + } | |
| 625 | + | |
| 626 | + // Return the string value. If the object is not a valid string, throw a | |
| 627 | + // std::invalid_argument exception. | |
| 628 | + std::string const& value() const; | |
| 629 | + | |
| 630 | + // Return the string value. If the object is not a valid string, throw a | |
| 631 | + // std::invalid_argument exception. | |
| 632 | + std::string utf8_value() const; | |
| 633 | + | |
| 634 | + // Return true if object value is equal to the 'rhs' value. Return false if the object is | |
| 635 | + // not a valid String. | |
| 636 | + friend bool | |
| 637 | + operator==(String const& lhs, std::string_view rhs) | |
| 638 | + { | |
| 639 | + return lhs && lhs.value() == rhs; | |
| 640 | + } | |
| 641 | + }; // class String | |
| 593 | 642 | |
| 594 | 643 | template <typename T> |
| 595 | 644 | T* |
| ... | ... | @@ -621,6 +670,18 @@ namespace qpdf |
| 621 | 670 | { |
| 622 | 671 | } |
| 623 | 672 | |
| 673 | + inline std::shared_ptr<QPDFObject> const& | |
| 674 | + BaseHandle::obj_sp() const | |
| 675 | + { | |
| 676 | + return obj; | |
| 677 | + } | |
| 678 | + | |
| 679 | + inline QPDFObjectHandle | |
| 680 | + BaseHandle::oh() const | |
| 681 | + { | |
| 682 | + return {obj}; | |
| 683 | + } | |
| 684 | + | |
| 624 | 685 | inline void |
| 625 | 686 | BaseHandle::assign(qpdf_object_type_e required, BaseHandle const& other) |
| 626 | 687 | { | ... | ... |
libqpdf/qpdf/QPDFObject_private.hh
| ... | ... | @@ -30,6 +30,7 @@ namespace qpdf |
| 30 | 30 | class Integer; |
| 31 | 31 | class Name; |
| 32 | 32 | class Stream; |
| 33 | + class String; | |
| 33 | 34 | |
| 34 | 35 | namespace impl |
| 35 | 36 | { |
| ... | ... | @@ -261,20 +262,24 @@ class QPDF_String final |
| 261 | 262 | { |
| 262 | 263 | friend class QPDFObject; |
| 263 | 264 | friend class qpdf::BaseHandle; |
| 265 | + friend class qpdf::String; | |
| 264 | 266 | friend class qpdf::impl::Writer; |
| 265 | 267 | |
| 266 | 268 | public: |
| 267 | - static std::shared_ptr<QPDFObject> create_utf16(std::string const& utf8_val); | |
| 268 | 269 | std::string unparse(bool force_binary = false); |
| 269 | 270 | void writeJSON(int json_version, JSON::Writer& p); |
| 270 | - std::string getUTF8Val() const; | |
| 271 | 271 | |
| 272 | 272 | private: |
| 273 | - QPDF_String(std::string val) : | |
| 273 | + QPDF_String(std::string const& val) : | |
| 274 | + val(val) | |
| 275 | + { | |
| 276 | + } | |
| 277 | + QPDF_String(std::string&& val) : | |
| 274 | 278 | val(std::move(val)) |
| 275 | 279 | { |
| 276 | 280 | } |
| 277 | 281 | bool useHexString() const; |
| 282 | + | |
| 278 | 283 | std::string val; |
| 279 | 284 | }; |
| 280 | 285 | ... | ... |
libqpdf/qpdf/Util.hh
| ... | ... | @@ -74,6 +74,19 @@ namespace qpdf::util |
| 74 | 74 | s.insert(0, 1, '1'); |
| 75 | 75 | } |
| 76 | 76 | |
| 77 | + inline bool | |
| 78 | + is_utf16(std::string const& str) | |
| 79 | + { | |
| 80 | + return str.starts_with("\xfe\xff") || str.starts_with("\xff\xfe"); | |
| 81 | + } | |
| 82 | + | |
| 83 | + inline bool | |
| 84 | + is_explicit_utf8(std::string const& str) | |
| 85 | + { | |
| 86 | + // QPDF_String.cc knows that this is a 3-byte sequence. | |
| 87 | + return str.starts_with("\xef\xbb\xbf"); | |
| 88 | + } | |
| 89 | + | |
| 77 | 90 | std::string random_string(size_t len); |
| 78 | 91 | |
| 79 | 92 | } // namespace qpdf::util | ... | ... |
libtests/qutil.cc
| ... | ... | @@ -367,6 +367,16 @@ check_analyze(std::string const& str, bool has8bit, bool utf8, bool utf16) |
| 367 | 367 | } |
| 368 | 368 | |
| 369 | 369 | void |
| 370 | +explicit_utf8_test() | |
| 371 | +{ | |
| 372 | + assert(QUtil::is_explicit_utf8("\xef\xbb\xbfnot empty")); | |
| 373 | + assert(QUtil::is_explicit_utf8("\xef\xbb\xbf")); | |
| 374 | + assert(!QUtil::is_explicit_utf8("\xef\xbb\xbenot explicit")); | |
| 375 | + assert(!QUtil::is_explicit_utf8("\xef\xbe\xbfnot explicit")); | |
| 376 | + assert(!QUtil::is_explicit_utf8("\xee\xbb\xbfnot explicit")); | |
| 377 | +} | |
| 378 | + | |
| 379 | +void | |
| 370 | 380 | print_alternatives(std::string const& str) |
| 371 | 381 | { |
| 372 | 382 | std::vector<std::string> result = QUtil::possible_repaired_encodings(str); |
| ... | ... | @@ -432,7 +442,7 @@ transcoding_test() |
| 432 | 442 | std::string other_to_utf8; |
| 433 | 443 | assert(!QUtil::utf8_to_pdf_doc(other_utf8, other_to_utf8)); |
| 434 | 444 | std::cout << other_to_utf8 << '\n'; |
| 435 | - std::cout << "done other characters" << '\n'; | |
| 445 | + std::cout << "done other characters\n"; | |
| 436 | 446 | // These valid UTF8 strings when converted to PDFDoc would end up |
| 437 | 447 | // with a byte sequence that would be recognized as UTF-8 or |
| 438 | 448 | // UTF-16 rather than PDFDoc. A special case is required to store |
| ... | ... | @@ -747,6 +757,7 @@ main(int argc, char* argv[]) |
| 747 | 757 | getenv_test(); |
| 748 | 758 | std::cout << "---- utf8" << '\n'; |
| 749 | 759 | to_utf8_test(); |
| 760 | + explicit_utf8_test(); | |
| 750 | 761 | std::cout << "---- utf16" << '\n'; |
| 751 | 762 | to_utf16_test(); |
| 752 | 763 | std::cout << "---- utf8_to_ascii" << '\n'; | ... | ... |
qpdf/qpdf.testcov
| ... | ... | @@ -174,8 +174,6 @@ QPDFParser eof in parse 0 |
| 174 | 174 | QPDFParser eof in parseRemainder 0 |
| 175 | 175 | QPDFObjectHandle boolean returning false 0 |
| 176 | 176 | QPDFObjectHandle real returning 0.0 0 |
| 177 | -QPDFObjectHandle string returning empty string 0 | |
| 178 | -QPDFObjectHandle string returning empty utf8 0 | |
| 179 | 177 | QPDFObjectHandle operator returning fake value 0 |
| 180 | 178 | QPDFObjectHandle inlineimage returning empty data 0 |
| 181 | 179 | QPDFObjectHandle array treating as empty vector 0 | ... | ... |
qpdf/qtest/json.test
| ... | ... | @@ -38,7 +38,7 @@ my @json_files = ( |
| 38 | 38 | ['page-labels-and-outlines', |
| 39 | 39 | ['--json-key=objects', '--json-object=trailer', '--json-object=2 0 R']], |
| 40 | 40 | ['field-types', ['--json-key=acroform']], |
| 41 | - ['need-appearances', ['--json-key=acroform']], | |
| 41 | + ['need-appearances-utf8', ['--json-key=acroform']], | |
| 42 | 42 | ['V4-aes', ['--json-key=encrypt']], |
| 43 | 43 | ['V4-aes', ['--json-key=encrypt', '--show-encryption-key']], |
| 44 | 44 | ); | ... | ... |
qpdf/qtest/qpdf/json-need-appearances-acroform-v1.out renamed to qpdf/qtest/qpdf/json-need-appearances-utf8-acroform-v1.out
qpdf/qtest/qpdf/json-need-appearances-acroform-v2.out renamed to qpdf/qtest/qpdf/json-need-appearances-utf8-acroform-v2.out
qpdf/qtest/qpdf/need-appearances-utf8.pdf
0 → 100644
No preview for this file type