Commit 2d6512eb3e7fd508de452b5bfa75dd7e224142da
Committed by
GitHub
Merge pull request #1575 from m-holger/string
Add new private-API class String
Showing
18 changed files
with
4006 additions
and
115 deletions
include/qpdf/ObjectHandle.hh
| @@ -105,6 +105,9 @@ namespace qpdf | @@ -105,6 +105,9 @@ namespace qpdf | ||
| 105 | void warn(QPDFExc&&) const; | 105 | void warn(QPDFExc&&) const; |
| 106 | void warn(std::string const& warning) const; | 106 | void warn(std::string const& warning) const; |
| 107 | 107 | ||
| 108 | + inline std::shared_ptr<QPDFObject> const& obj_sp() const; | ||
| 109 | + inline QPDFObjectHandle oh() const; | ||
| 110 | + | ||
| 108 | protected: | 111 | protected: |
| 109 | BaseHandle() = default; | 112 | BaseHandle() = default; |
| 110 | BaseHandle(std::shared_ptr<QPDFObject> const& obj) : | 113 | BaseHandle(std::shared_ptr<QPDFObject> const& obj) : |
include/qpdf/QPDFObjectHandle.hh
| @@ -1321,21 +1321,6 @@ class QPDFObjectHandle: public qpdf::BaseHandle | @@ -1321,21 +1321,6 @@ class QPDFObjectHandle: public qpdf::BaseHandle | ||
| 1321 | { | 1321 | { |
| 1322 | return obj; | 1322 | return obj; |
| 1323 | } | 1323 | } |
| 1324 | - std::shared_ptr<QPDFObject> | ||
| 1325 | - getObj() const | ||
| 1326 | - { | ||
| 1327 | - return obj; | ||
| 1328 | - } | ||
| 1329 | - QPDFObject* | ||
| 1330 | - getObjectPtr() | ||
| 1331 | - { | ||
| 1332 | - return obj.get(); | ||
| 1333 | - } | ||
| 1334 | - QPDFObject* const | ||
| 1335 | - getObjectPtr() const | ||
| 1336 | - { | ||
| 1337 | - return obj.get(); | ||
| 1338 | - } | ||
| 1339 | 1324 | ||
| 1340 | void writeJSON(int json_version, JSON::Writer& p, bool dereference_indirect = false) const; | 1325 | void writeJSON(int json_version, JSON::Writer& p, bool dereference_indirect = false) const; |
| 1341 | 1326 |
libqpdf/QPDFFormFieldObjectHelper.cc
| @@ -765,7 +765,7 @@ QPDFFormFieldObjectHelper::generateTextAppearance(QPDFAnnotationObjectHelper& ao | @@ -765,7 +765,7 @@ QPDFFormFieldObjectHelper::generateTextAppearance(QPDFAnnotationObjectHelper& ao | ||
| 765 | return; | 765 | return; |
| 766 | } | 766 | } |
| 767 | 767 | ||
| 768 | - if (AS.getObj().use_count() > 4) { | 768 | + if (AS.obj_sp().use_count() > 3) { |
| 769 | aoh.warn("unable to generate text appearance from shared appearance stream for update"); | 769 | aoh.warn("unable to generate text appearance from shared appearance stream for update"); |
| 770 | return; | 770 | return; |
| 771 | } | 771 | } |
libqpdf/QPDFObjectHandle.cc
| @@ -318,7 +318,7 @@ BaseHandle::copy(bool shallow) const | @@ -318,7 +318,7 @@ BaseHandle::copy(bool shallow) const | ||
| 318 | throw std::logic_error("attempted to shallow copy QPDFObjectHandle from destroyed QPDF"); | 318 | throw std::logic_error("attempted to shallow copy QPDFObjectHandle from destroyed QPDF"); |
| 319 | return {}; // does not return | 319 | return {}; // does not return |
| 320 | case ::ot_reference: | 320 | case ::ot_reference: |
| 321 | - return obj->qpdf->getObject(obj->og).getObj(); | 321 | + return obj->qpdf->getObject(obj->og).obj_sp(); |
| 322 | } | 322 | } |
| 323 | return {}; // unreachable | 323 | return {}; // unreachable |
| 324 | } | 324 | } |
| @@ -470,7 +470,7 @@ BaseHandle::write_json(int json_version, JSON::Writer& p) const | @@ -470,7 +470,7 @@ BaseHandle::write_json(int json_version, JSON::Writer& p) const | ||
| 470 | p.writeNext() << "null"; | 470 | p.writeNext() << "null"; |
| 471 | } | 471 | } |
| 472 | p.writeNext(); | 472 | p.writeNext(); |
| 473 | - auto item_og = value.getObj()->getObjGen(); | 473 | + auto item_og = value.id_gen(); |
| 474 | if (item_og.isIndirect()) { | 474 | if (item_og.isIndirect()) { |
| 475 | p << "\"" << item_og.unparse(' ') << " R\""; | 475 | p << "\"" << item_og.unparse(' ') << " R\""; |
| 476 | } else { | 476 | } else { |
| @@ -999,50 +999,110 @@ QPDFObjectHandle::getValueAsName(std::string& value) const | @@ -999,50 +999,110 @@ QPDFObjectHandle::getValueAsName(std::string& value) const | ||
| 999 | return true; | 999 | return true; |
| 1000 | } | 1000 | } |
| 1001 | 1001 | ||
| 1002 | -// String accessors | 1002 | +// String methods |
| 1003 | + | ||
| 1004 | +QPDFObjectHandle | ||
| 1005 | +QPDFObjectHandle::newString(std::string const& str) | ||
| 1006 | +{ | ||
| 1007 | + return {QPDFObject::create<QPDF_String>(str)}; | ||
| 1008 | +} | ||
| 1009 | + | ||
| 1010 | +QPDFObjectHandle | ||
| 1011 | +QPDFObjectHandle::newUnicodeString(std::string const& utf8_str) | ||
| 1012 | +{ | ||
| 1013 | + return {String::utf16(utf8_str).obj_sp()}; | ||
| 1014 | +} | ||
| 1015 | + | ||
| 1016 | +String::String(std::string const& str) : | ||
| 1017 | + BaseHandle(QPDFObject::create<QPDF_String>(str)) | ||
| 1018 | +{ | ||
| 1019 | +} | ||
| 1020 | + | ||
| 1021 | +String::String(std::string&& str) : | ||
| 1022 | + BaseHandle(QPDFObject::create<QPDF_String>(std::move(str))) | ||
| 1023 | +{ | ||
| 1024 | +} | ||
| 1025 | + | ||
| 1026 | +String | ||
| 1027 | +String::utf16(std::string const& utf8_str) | ||
| 1028 | +{ | ||
| 1029 | + std::string result; | ||
| 1030 | + if (QUtil::utf8_to_pdf_doc(utf8_str, result, '?')) { | ||
| 1031 | + return String(result); | ||
| 1032 | + } | ||
| 1033 | + return String(QUtil::utf8_to_utf16(utf8_str)); | ||
| 1034 | +} | ||
| 1035 | + | ||
| 1036 | +std::string const& | ||
| 1037 | +String::value() const | ||
| 1038 | +{ | ||
| 1039 | + auto* s = as<QPDF_String>(); | ||
| 1040 | + if (!s) { | ||
| 1041 | + throw invalid_error("String"); | ||
| 1042 | + } | ||
| 1043 | + return s->val; | ||
| 1044 | +} | ||
| 1045 | + | ||
| 1046 | +std::string | ||
| 1047 | +String::utf8_value() const | ||
| 1048 | +{ | ||
| 1049 | + auto* s = as<QPDF_String>(); | ||
| 1050 | + if (!s) { | ||
| 1051 | + throw invalid_error("String"); | ||
| 1052 | + } | ||
| 1053 | + if (util::is_utf16(s->val)) { | ||
| 1054 | + return QUtil::utf16_to_utf8(s->val); | ||
| 1055 | + } | ||
| 1056 | + if (util::is_explicit_utf8(s->val)) { | ||
| 1057 | + // PDF 2.0 allows UTF-8 strings when explicitly prefixed with the three-byte representation | ||
| 1058 | + // of U+FEFF. | ||
| 1059 | + return s->val.substr(3); | ||
| 1060 | + } | ||
| 1061 | + return QUtil::pdf_doc_to_utf8(s->val); | ||
| 1062 | +} | ||
| 1003 | 1063 | ||
| 1004 | std::string | 1064 | std::string |
| 1005 | QPDFObjectHandle::getStringValue() const | 1065 | QPDFObjectHandle::getStringValue() const |
| 1006 | { | 1066 | { |
| 1007 | - if (isString()) { | ||
| 1008 | - return obj->getStringValue(); | ||
| 1009 | - } else { | 1067 | + try { |
| 1068 | + return String(obj).value(); | ||
| 1069 | + } catch (std::invalid_argument&) { | ||
| 1010 | typeWarning("string", "returning empty string"); | 1070 | typeWarning("string", "returning empty string"); |
| 1011 | - QTC::TC("qpdf", "QPDFObjectHandle string returning empty string"); | ||
| 1012 | - return ""; | 1071 | + return {}; |
| 1013 | } | 1072 | } |
| 1014 | } | 1073 | } |
| 1015 | 1074 | ||
| 1016 | bool | 1075 | bool |
| 1017 | QPDFObjectHandle::getValueAsString(std::string& value) const | 1076 | QPDFObjectHandle::getValueAsString(std::string& value) const |
| 1018 | { | 1077 | { |
| 1019 | - if (!isString()) { | 1078 | + try { |
| 1079 | + value = String(obj).value(); | ||
| 1080 | + return true; | ||
| 1081 | + } catch (std::invalid_argument&) { | ||
| 1020 | return false; | 1082 | return false; |
| 1021 | } | 1083 | } |
| 1022 | - value = obj->getStringValue(); | ||
| 1023 | - return true; | ||
| 1024 | } | 1084 | } |
| 1025 | 1085 | ||
| 1026 | std::string | 1086 | std::string |
| 1027 | QPDFObjectHandle::getUTF8Value() const | 1087 | QPDFObjectHandle::getUTF8Value() const |
| 1028 | { | 1088 | { |
| 1029 | - if (auto str = as<QPDF_String>()) { | ||
| 1030 | - return str->getUTF8Val(); | ||
| 1031 | - } else { | 1089 | + try { |
| 1090 | + return String(obj).utf8_value(); | ||
| 1091 | + } catch (std::invalid_argument&) { | ||
| 1032 | typeWarning("string", "returning empty string"); | 1092 | typeWarning("string", "returning empty string"); |
| 1033 | - QTC::TC("qpdf", "QPDFObjectHandle string returning empty utf8"); | ||
| 1034 | - return ""; | 1093 | + return {}; |
| 1035 | } | 1094 | } |
| 1036 | } | 1095 | } |
| 1037 | 1096 | ||
| 1038 | bool | 1097 | bool |
| 1039 | QPDFObjectHandle::getValueAsUTF8(std::string& value) const | 1098 | QPDFObjectHandle::getValueAsUTF8(std::string& value) const |
| 1040 | { | 1099 | { |
| 1041 | - if (auto str = as<QPDF_String>()) { | ||
| 1042 | - value = str->getUTF8Val(); | 1100 | + try { |
| 1101 | + value = String(obj).utf8_value(); | ||
| 1043 | return true; | 1102 | return true; |
| 1103 | + } catch (std::invalid_argument&) { | ||
| 1104 | + return false; | ||
| 1044 | } | 1105 | } |
| 1045 | - return false; | ||
| 1046 | } | 1106 | } |
| 1047 | 1107 | ||
| 1048 | // Operator and Inline Image accessors | 1108 | // Operator and Inline Image accessors |
| @@ -1718,18 +1778,6 @@ QPDFObjectHandle::newReal(double value, int decimal_places, bool trim_trailing_z | @@ -1718,18 +1778,6 @@ QPDFObjectHandle::newReal(double value, int decimal_places, bool trim_trailing_z | ||
| 1718 | } | 1778 | } |
| 1719 | 1779 | ||
| 1720 | QPDFObjectHandle | 1780 | QPDFObjectHandle |
| 1721 | -QPDFObjectHandle::newString(std::string const& str) | ||
| 1722 | -{ | ||
| 1723 | - return {QPDFObject::create<QPDF_String>(str)}; | ||
| 1724 | -} | ||
| 1725 | - | ||
| 1726 | -QPDFObjectHandle | ||
| 1727 | -QPDFObjectHandle::newUnicodeString(std::string const& utf8_str) | ||
| 1728 | -{ | ||
| 1729 | - return {QPDF_String::create_utf16(utf8_str)}; | ||
| 1730 | -} | ||
| 1731 | - | ||
| 1732 | -QPDFObjectHandle | ||
| 1733 | QPDFObjectHandle::newOperator(std::string const& value) | 1781 | QPDFObjectHandle::newOperator(std::string const& value) |
| 1734 | { | 1782 | { |
| 1735 | return {QPDFObject::create<QPDF_Operator>(value)}; | 1783 | return {QPDFObject::create<QPDF_Operator>(value)}; |
libqpdf/QPDFParser.cc
| @@ -626,8 +626,8 @@ QPDFParser::fixMissingKeys() | @@ -626,8 +626,8 @@ QPDFParser::fixMissingKeys() | ||
| 626 | { | 626 | { |
| 627 | std::set<std::string> names; | 627 | std::set<std::string> names; |
| 628 | for (auto& obj: frame->olist) { | 628 | for (auto& obj: frame->olist) { |
| 629 | - if (obj.getObj()->getTypeCode() == ::ot_name) { | ||
| 630 | - names.insert(obj.getObj()->getStringValue()); | 629 | + if (obj.raw_type_code() == ::ot_name) { |
| 630 | + names.insert(obj.obj_sp()->getStringValue()); | ||
| 631 | } | 631 | } |
| 632 | } | 632 | } |
| 633 | int next_fake_key = 1; | 633 | int next_fake_key = 1; |
libqpdf/QPDF_String.cc
| @@ -2,6 +2,7 @@ | @@ -2,6 +2,7 @@ | ||
| 2 | 2 | ||
| 3 | #include <qpdf/QPDFObjectHandle_private.hh> | 3 | #include <qpdf/QPDFObjectHandle_private.hh> |
| 4 | #include <qpdf/QUtil.hh> | 4 | #include <qpdf/QUtil.hh> |
| 5 | +#include <qpdf/Util.hh> | ||
| 5 | 6 | ||
| 6 | // DO NOT USE ctype -- it is locale dependent for some things, and it's not worth the risk of | 7 | // DO NOT USE ctype -- it is locale dependent for some things, and it's not worth the risk of |
| 7 | // including it in case it may accidentally be used. | 8 | // including it in case it may accidentally be used. |
| @@ -9,40 +10,46 @@ | @@ -9,40 +10,46 @@ | ||
| 9 | static bool | 10 | static bool |
| 10 | is_iso_latin1_printable(char ch) | 11 | is_iso_latin1_printable(char ch) |
| 11 | { | 12 | { |
| 12 | - return (((ch >= 32) && (ch <= 126)) || (static_cast<unsigned char>(ch) >= 160)); | ||
| 13 | -} | ||
| 14 | - | ||
| 15 | -std::shared_ptr<QPDFObject> | ||
| 16 | -QPDF_String::create_utf16(std::string const& utf8_val) | ||
| 17 | -{ | ||
| 18 | - std::string result; | ||
| 19 | - if (!QUtil::utf8_to_pdf_doc(utf8_val, result, '?')) { | ||
| 20 | - result = QUtil::utf8_to_utf16(utf8_val); | ||
| 21 | - } | ||
| 22 | - return QPDFObject::create<QPDF_String>(result); | 13 | + return (ch >= 32 && ch <= 126) || static_cast<unsigned char>(ch) >= 160; |
| 23 | } | 14 | } |
| 24 | 15 | ||
| 25 | void | 16 | void |
| 26 | QPDF_String::writeJSON(int json_version, JSON::Writer& p) | 17 | QPDF_String::writeJSON(int json_version, JSON::Writer& p) |
| 27 | { | 18 | { |
| 28 | - auto candidate = getUTF8Val(); | ||
| 29 | if (json_version == 1) { | 19 | if (json_version == 1) { |
| 30 | - p << "\"" << JSON::Writer::encode_string(candidate) << "\""; | ||
| 31 | - } else { | ||
| 32 | - // See if we can unambiguously represent as Unicode. | ||
| 33 | - if (QUtil::is_utf16(val) || QUtil::is_explicit_utf8(val)) { | 20 | + if (util::is_utf16(val)) { |
| 21 | + p << "\"" << JSON::Writer::encode_string(QUtil::utf16_to_utf8(val)) << "\""; | ||
| 22 | + return; | ||
| 23 | + } | ||
| 24 | + if (util::is_explicit_utf8(val)) { | ||
| 25 | + // PDF 2.0 allows UTF-8 strings when explicitly prefixed with the three-byte | ||
| 26 | + // representation of U+FEFF. | ||
| 27 | + p << "\"" << JSON::Writer::encode_string(val.substr(3)) << "\""; | ||
| 28 | + return; | ||
| 29 | + } | ||
| 30 | + p << "\"" << JSON::Writer::encode_string(QUtil::pdf_doc_to_utf8(val)) << "\""; | ||
| 31 | + return; | ||
| 32 | + } | ||
| 33 | + // See if we can unambiguously represent as Unicode. | ||
| 34 | + if (util::is_utf16(val)) { | ||
| 35 | + p << "\"u:" << JSON::Writer::encode_string(QUtil::utf16_to_utf8(val)) << "\""; | ||
| 36 | + return; | ||
| 37 | + } | ||
| 38 | + // See if we can unambiguously represent as Unicode. | ||
| 39 | + if (util::is_explicit_utf8(val)) { | ||
| 40 | + p << "\"u:" << JSON::Writer::encode_string(val.substr(3)) << "\""; | ||
| 41 | + return; | ||
| 42 | + } | ||
| 43 | + if (!useHexString()) { | ||
| 44 | + auto candidate = QUtil::pdf_doc_to_utf8(val); | ||
| 45 | + std::string test; | ||
| 46 | + if (QUtil::utf8_to_pdf_doc(candidate, test, '?') && test == val) { | ||
| 47 | + // This is a PDF-doc string that can be losslessly encoded as Unicode. | ||
| 34 | p << "\"u:" << JSON::Writer::encode_string(candidate) << "\""; | 48 | p << "\"u:" << JSON::Writer::encode_string(candidate) << "\""; |
| 35 | return; | 49 | return; |
| 36 | - } else if (!useHexString()) { | ||
| 37 | - std::string test; | ||
| 38 | - if (QUtil::utf8_to_pdf_doc(candidate, test, '?') && (test == val)) { | ||
| 39 | - // This is a PDF-doc string that can be losslessly encoded as Unicode. | ||
| 40 | - p << "\"u:" << JSON::Writer::encode_string(candidate) << "\""; | ||
| 41 | - return; | ||
| 42 | - } | ||
| 43 | } | 50 | } |
| 44 | - p << "\"b:" << QUtil::hex_encode(val) << "\""; | ||
| 45 | } | 51 | } |
| 52 | + p << "\"b:" << QUtil::hex_encode(val) << "\""; | ||
| 46 | } | 53 | } |
| 47 | 54 | ||
| 48 | bool | 55 | bool |
| @@ -133,17 +140,3 @@ QPDF_String::unparse(bool force_binary) | @@ -133,17 +140,3 @@ QPDF_String::unparse(bool force_binary) | ||
| 133 | 140 | ||
| 134 | return result; | 141 | return result; |
| 135 | } | 142 | } |
| 136 | - | ||
| 137 | -std::string | ||
| 138 | -QPDF_String::getUTF8Val() const | ||
| 139 | -{ | ||
| 140 | - if (QUtil::is_utf16(val)) { | ||
| 141 | - return QUtil::utf16_to_utf8(val); | ||
| 142 | - } else if (QUtil::is_explicit_utf8(val)) { | ||
| 143 | - // PDF 2.0 allows UTF-8 strings when explicitly prefixed with the three-byte representation | ||
| 144 | - // of U+FEFF. | ||
| 145 | - return val.substr(3); | ||
| 146 | - } else { | ||
| 147 | - return QUtil::pdf_doc_to_utf8(val); | ||
| 148 | - } | ||
| 149 | -} |
libqpdf/QPDF_json.cc
| @@ -693,7 +693,7 @@ QPDF::JSONReactor::setObjectDescription(QPDFObjectHandle& oh, JSON const& value) | @@ -693,7 +693,7 @@ QPDF::JSONReactor::setObjectDescription(QPDFObjectHandle& oh, JSON const& value) | ||
| 693 | QPDFObject::JSON_Descr(j_descr.input, cur_object)); | 693 | QPDFObject::JSON_Descr(j_descr.input, cur_object)); |
| 694 | } | 694 | } |
| 695 | 695 | ||
| 696 | - oh.getObjectPtr()->setDescription(&pdf, descr, value.getStart()); | 696 | + oh.obj_sp()->setDescription(&pdf, descr, value.getStart()); |
| 697 | } | 697 | } |
| 698 | 698 | ||
| 699 | QPDFObjectHandle | 699 | QPDFObjectHandle |
libqpdf/QPDF_objects.cc
| @@ -1554,7 +1554,7 @@ Objects::readObjectAtOffset( | @@ -1554,7 +1554,7 @@ Objects::readObjectAtOffset( | ||
| 1554 | break; | 1554 | break; |
| 1555 | } | 1555 | } |
| 1556 | } | 1556 | } |
| 1557 | - m->objects.updateCache(og, oh.getObj(), end_before_space, m->file->tell()); | 1557 | + m->objects.updateCache(og, oh.obj_sp(), end_before_space, m->file->tell()); |
| 1558 | } | 1558 | } |
| 1559 | 1559 | ||
| 1560 | QPDFObjectHandle | 1560 | QPDFObjectHandle |
| @@ -1613,7 +1613,7 @@ Objects::readObjectAtOffset( | @@ -1613,7 +1613,7 @@ Objects::readObjectAtOffset( | ||
| 1613 | break; | 1613 | break; |
| 1614 | } | 1614 | } |
| 1615 | } | 1615 | } |
| 1616 | - m->objects.updateCache(og, oh.getObj(), end_before_space, m->file->tell()); | 1616 | + m->objects.updateCache(og, oh.obj_sp(), end_before_space, m->file->tell()); |
| 1617 | 1617 | ||
| 1618 | return oh; | 1618 | return oh; |
| 1619 | } | 1619 | } |
| @@ -1805,7 +1805,7 @@ Objects::resolveObjectsInStream(int obj_stream_number) | @@ -1805,7 +1805,7 @@ Objects::resolveObjectsInStream(int obj_stream_number) | ||
| 1805 | entry->second.getObjStreamNumber() == obj_stream_number) { | 1805 | entry->second.getObjStreamNumber() == obj_stream_number) { |
| 1806 | is::OffsetBuffer in("", {b_start + obj_offset, obj_size}, obj_offset); | 1806 | is::OffsetBuffer in("", {b_start + obj_offset, obj_size}, obj_offset); |
| 1807 | auto oh = readObjectInStream(in, obj_stream_number, obj_id); | 1807 | auto oh = readObjectInStream(in, obj_stream_number, obj_id); |
| 1808 | - updateCache(og, oh.getObj(), end_before_space, end_after_space); | 1808 | + updateCache(og, oh.obj_sp(), end_before_space, end_after_space); |
| 1809 | } else { | 1809 | } else { |
| 1810 | QTC::TC("qpdf", "QPDF not caching overridden objstm object"); | 1810 | QTC::TC("qpdf", "QPDF not caching overridden objstm object"); |
| 1811 | } | 1811 | } |
| @@ -1874,7 +1874,7 @@ QPDF::makeIndirectObject(QPDFObjectHandle oh) | @@ -1874,7 +1874,7 @@ QPDF::makeIndirectObject(QPDFObjectHandle oh) | ||
| 1874 | if (!oh) { | 1874 | if (!oh) { |
| 1875 | throw std::logic_error("attempted to make an uninitialized QPDFObjectHandle indirect"); | 1875 | throw std::logic_error("attempted to make an uninitialized QPDFObjectHandle indirect"); |
| 1876 | } | 1876 | } |
| 1877 | - return m->objects.makeIndirectFromQPDFObject(oh.getObj()); | 1877 | + return m->objects.makeIndirectFromQPDFObject(oh.obj_sp()); |
| 1878 | } | 1878 | } |
| 1879 | 1879 | ||
| 1880 | std::shared_ptr<QPDFObject> | 1880 | std::shared_ptr<QPDFObject> |
| @@ -1935,7 +1935,7 @@ QPDF::replaceObject(QPDFObjGen og, QPDFObjectHandle oh) | @@ -1935,7 +1935,7 @@ QPDF::replaceObject(QPDFObjGen og, QPDFObjectHandle oh) | ||
| 1935 | if (!oh || (oh.isIndirect() && !(oh.isStream() && oh.getObjGen() == og))) { | 1935 | if (!oh || (oh.isIndirect() && !(oh.isStream() && oh.getObjGen() == og))) { |
| 1936 | throw std::logic_error("QPDF::replaceObject called with indirect object handle"); | 1936 | throw std::logic_error("QPDF::replaceObject called with indirect object handle"); |
| 1937 | } | 1937 | } |
| 1938 | - m->objects.updateCache(og, oh.getObj(), -1, -1, false); | 1938 | + m->objects.updateCache(og, oh.obj_sp(), -1, -1, false); |
| 1939 | } | 1939 | } |
| 1940 | 1940 | ||
| 1941 | void | 1941 | void |
libqpdf/QUtil.cc
| @@ -1688,19 +1688,13 @@ QUtil::utf8_to_pdf_doc(std::string const& utf8, std::string& pdfdoc, char unknow | @@ -1688,19 +1688,13 @@ QUtil::utf8_to_pdf_doc(std::string const& utf8, std::string& pdfdoc, char unknow | ||
| 1688 | bool | 1688 | bool |
| 1689 | QUtil::is_utf16(std::string const& val) | 1689 | QUtil::is_utf16(std::string const& val) |
| 1690 | { | 1690 | { |
| 1691 | - return ( | ||
| 1692 | - (val.length() >= 2) && | ||
| 1693 | - (((val.at(0) == '\xfe') && (val.at(1) == '\xff')) || | ||
| 1694 | - ((val.at(0) == '\xff') && (val.at(1) == '\xfe')))); | 1691 | + return util::is_utf16(val); |
| 1695 | } | 1692 | } |
| 1696 | 1693 | ||
| 1697 | bool | 1694 | bool |
| 1698 | QUtil::is_explicit_utf8(std::string const& val) | 1695 | QUtil::is_explicit_utf8(std::string const& val) |
| 1699 | { | 1696 | { |
| 1700 | - // QPDF_String.cc knows that this is a 3-byte sequence. | ||
| 1701 | - return ( | ||
| 1702 | - (val.length() >= 3) && (val.at(0) == '\xef') && (val.at(1) == '\xbb') && | ||
| 1703 | - (val.at(2) == '\xbf')); | 1697 | + return util::is_explicit_utf8(val); |
| 1704 | } | 1698 | } |
| 1705 | 1699 | ||
| 1706 | std::string | 1700 | std::string |
libqpdf/qpdf/QPDFObjectHandle_private.hh
| @@ -412,15 +412,15 @@ namespace qpdf | @@ -412,15 +412,15 @@ namespace qpdf | ||
| 412 | { | 412 | { |
| 413 | } | 413 | } |
| 414 | 414 | ||
| 415 | - // Return the name value. If the object is not a valid Name, throw a | ||
| 416 | - // std::invalid_argument exception. | 415 | + // Return the name value. If the object is not a valid Name, throw a std::invalid_argument |
| 416 | + // exception. | ||
| 417 | operator std::string() const& | 417 | operator std::string() const& |
| 418 | { | 418 | { |
| 419 | return value(); | 419 | return value(); |
| 420 | } | 420 | } |
| 421 | 421 | ||
| 422 | - // Return the integer value. If the object is not a valid integer, throw a | ||
| 423 | - // std::invalid_argument exception. | 422 | + // Return the name value. If the object is not a valid name, throw a std::invalid_argument |
| 423 | + // exception. | ||
| 424 | std::string const& value() const; | 424 | std::string const& value() const; |
| 425 | 425 | ||
| 426 | // Return true if object value is equal to the 'rhs' value. Return false if the object is | 426 | // Return true if object value is equal to the 'rhs' value. Return false if the object is |
| @@ -589,7 +589,56 @@ namespace qpdf | @@ -589,7 +589,56 @@ namespace qpdf | ||
| 589 | void warn(std::string const& message); | 589 | void warn(std::string const& message); |
| 590 | 590 | ||
| 591 | static std::map<std::string, std::string> filter_abbreviations; | 591 | static std::map<std::string, std::string> filter_abbreviations; |
| 592 | - }; | 592 | + }; // class Stream |
| 593 | + | ||
| 594 | + class String final: public BaseHandle | ||
| 595 | + { | ||
| 596 | + public: | ||
| 597 | + String() = default; | ||
| 598 | + String(String const&) = default; | ||
| 599 | + String(String&&) = default; | ||
| 600 | + String& operator=(String const&) = default; | ||
| 601 | + String& operator=(String&&) = default; | ||
| 602 | + ~String() = default; | ||
| 603 | + | ||
| 604 | + explicit String(std::string const&); | ||
| 605 | + explicit String(std::string&&); | ||
| 606 | + | ||
| 607 | + String(QPDFObjectHandle const& oh) : | ||
| 608 | + BaseHandle(oh.type_code() == ::ot_string ? oh : QPDFObjectHandle()) | ||
| 609 | + { | ||
| 610 | + } | ||
| 611 | + | ||
| 612 | + String(QPDFObjectHandle&& oh) : | ||
| 613 | + BaseHandle(oh.type_code() == ::ot_string ? std::move(oh) : QPDFObjectHandle()) | ||
| 614 | + { | ||
| 615 | + } | ||
| 616 | + | ||
| 617 | + static String utf16(std::string const&); | ||
| 618 | + | ||
| 619 | + // Return the string value. If the object is not a valid string, throw a | ||
| 620 | + // std::invalid_argument exception. | ||
| 621 | + operator std::string() const& | ||
| 622 | + { | ||
| 623 | + return value(); | ||
| 624 | + } | ||
| 625 | + | ||
| 626 | + // Return the string value. If the object is not a valid string, throw a | ||
| 627 | + // std::invalid_argument exception. | ||
| 628 | + std::string const& value() const; | ||
| 629 | + | ||
| 630 | + // Return the string value. If the object is not a valid string, throw a | ||
| 631 | + // std::invalid_argument exception. | ||
| 632 | + std::string utf8_value() const; | ||
| 633 | + | ||
| 634 | + // Return true if object value is equal to the 'rhs' value. Return false if the object is | ||
| 635 | + // not a valid String. | ||
| 636 | + friend bool | ||
| 637 | + operator==(String const& lhs, std::string_view rhs) | ||
| 638 | + { | ||
| 639 | + return lhs && lhs.value() == rhs; | ||
| 640 | + } | ||
| 641 | + }; // class String | ||
| 593 | 642 | ||
| 594 | template <typename T> | 643 | template <typename T> |
| 595 | T* | 644 | T* |
| @@ -621,6 +670,18 @@ namespace qpdf | @@ -621,6 +670,18 @@ namespace qpdf | ||
| 621 | { | 670 | { |
| 622 | } | 671 | } |
| 623 | 672 | ||
| 673 | + inline std::shared_ptr<QPDFObject> const& | ||
| 674 | + BaseHandle::obj_sp() const | ||
| 675 | + { | ||
| 676 | + return obj; | ||
| 677 | + } | ||
| 678 | + | ||
| 679 | + inline QPDFObjectHandle | ||
| 680 | + BaseHandle::oh() const | ||
| 681 | + { | ||
| 682 | + return {obj}; | ||
| 683 | + } | ||
| 684 | + | ||
| 624 | inline void | 685 | inline void |
| 625 | BaseHandle::assign(qpdf_object_type_e required, BaseHandle const& other) | 686 | BaseHandle::assign(qpdf_object_type_e required, BaseHandle const& other) |
| 626 | { | 687 | { |
libqpdf/qpdf/QPDFObject_private.hh
| @@ -30,6 +30,7 @@ namespace qpdf | @@ -30,6 +30,7 @@ namespace qpdf | ||
| 30 | class Integer; | 30 | class Integer; |
| 31 | class Name; | 31 | class Name; |
| 32 | class Stream; | 32 | class Stream; |
| 33 | + class String; | ||
| 33 | 34 | ||
| 34 | namespace impl | 35 | namespace impl |
| 35 | { | 36 | { |
| @@ -261,20 +262,24 @@ class QPDF_String final | @@ -261,20 +262,24 @@ class QPDF_String final | ||
| 261 | { | 262 | { |
| 262 | friend class QPDFObject; | 263 | friend class QPDFObject; |
| 263 | friend class qpdf::BaseHandle; | 264 | friend class qpdf::BaseHandle; |
| 265 | + friend class qpdf::String; | ||
| 264 | friend class qpdf::impl::Writer; | 266 | friend class qpdf::impl::Writer; |
| 265 | 267 | ||
| 266 | public: | 268 | public: |
| 267 | - static std::shared_ptr<QPDFObject> create_utf16(std::string const& utf8_val); | ||
| 268 | std::string unparse(bool force_binary = false); | 269 | std::string unparse(bool force_binary = false); |
| 269 | void writeJSON(int json_version, JSON::Writer& p); | 270 | void writeJSON(int json_version, JSON::Writer& p); |
| 270 | - std::string getUTF8Val() const; | ||
| 271 | 271 | ||
| 272 | private: | 272 | private: |
| 273 | - QPDF_String(std::string val) : | 273 | + QPDF_String(std::string const& val) : |
| 274 | + val(val) | ||
| 275 | + { | ||
| 276 | + } | ||
| 277 | + QPDF_String(std::string&& val) : | ||
| 274 | val(std::move(val)) | 278 | val(std::move(val)) |
| 275 | { | 279 | { |
| 276 | } | 280 | } |
| 277 | bool useHexString() const; | 281 | bool useHexString() const; |
| 282 | + | ||
| 278 | std::string val; | 283 | std::string val; |
| 279 | }; | 284 | }; |
| 280 | 285 |
libqpdf/qpdf/Util.hh
| @@ -74,6 +74,19 @@ namespace qpdf::util | @@ -74,6 +74,19 @@ namespace qpdf::util | ||
| 74 | s.insert(0, 1, '1'); | 74 | s.insert(0, 1, '1'); |
| 75 | } | 75 | } |
| 76 | 76 | ||
| 77 | + inline bool | ||
| 78 | + is_utf16(std::string const& str) | ||
| 79 | + { | ||
| 80 | + return str.starts_with("\xfe\xff") || str.starts_with("\xff\xfe"); | ||
| 81 | + } | ||
| 82 | + | ||
| 83 | + inline bool | ||
| 84 | + is_explicit_utf8(std::string const& str) | ||
| 85 | + { | ||
| 86 | + // QPDF_String.cc knows that this is a 3-byte sequence. | ||
| 87 | + return str.starts_with("\xef\xbb\xbf"); | ||
| 88 | + } | ||
| 89 | + | ||
| 77 | std::string random_string(size_t len); | 90 | std::string random_string(size_t len); |
| 78 | 91 | ||
| 79 | } // namespace qpdf::util | 92 | } // namespace qpdf::util |
libtests/qutil.cc
| @@ -367,6 +367,16 @@ check_analyze(std::string const& str, bool has8bit, bool utf8, bool utf16) | @@ -367,6 +367,16 @@ check_analyze(std::string const& str, bool has8bit, bool utf8, bool utf16) | ||
| 367 | } | 367 | } |
| 368 | 368 | ||
| 369 | void | 369 | void |
| 370 | +explicit_utf8_test() | ||
| 371 | +{ | ||
| 372 | + assert(QUtil::is_explicit_utf8("\xef\xbb\xbfnot empty")); | ||
| 373 | + assert(QUtil::is_explicit_utf8("\xef\xbb\xbf")); | ||
| 374 | + assert(!QUtil::is_explicit_utf8("\xef\xbb\xbenot explicit")); | ||
| 375 | + assert(!QUtil::is_explicit_utf8("\xef\xbe\xbfnot explicit")); | ||
| 376 | + assert(!QUtil::is_explicit_utf8("\xee\xbb\xbfnot explicit")); | ||
| 377 | +} | ||
| 378 | + | ||
| 379 | +void | ||
| 370 | print_alternatives(std::string const& str) | 380 | print_alternatives(std::string const& str) |
| 371 | { | 381 | { |
| 372 | std::vector<std::string> result = QUtil::possible_repaired_encodings(str); | 382 | std::vector<std::string> result = QUtil::possible_repaired_encodings(str); |
| @@ -432,7 +442,7 @@ transcoding_test() | @@ -432,7 +442,7 @@ transcoding_test() | ||
| 432 | std::string other_to_utf8; | 442 | std::string other_to_utf8; |
| 433 | assert(!QUtil::utf8_to_pdf_doc(other_utf8, other_to_utf8)); | 443 | assert(!QUtil::utf8_to_pdf_doc(other_utf8, other_to_utf8)); |
| 434 | std::cout << other_to_utf8 << '\n'; | 444 | std::cout << other_to_utf8 << '\n'; |
| 435 | - std::cout << "done other characters" << '\n'; | 445 | + std::cout << "done other characters\n"; |
| 436 | // These valid UTF8 strings when converted to PDFDoc would end up | 446 | // These valid UTF8 strings when converted to PDFDoc would end up |
| 437 | // with a byte sequence that would be recognized as UTF-8 or | 447 | // with a byte sequence that would be recognized as UTF-8 or |
| 438 | // UTF-16 rather than PDFDoc. A special case is required to store | 448 | // UTF-16 rather than PDFDoc. A special case is required to store |
| @@ -747,6 +757,7 @@ main(int argc, char* argv[]) | @@ -747,6 +757,7 @@ main(int argc, char* argv[]) | ||
| 747 | getenv_test(); | 757 | getenv_test(); |
| 748 | std::cout << "---- utf8" << '\n'; | 758 | std::cout << "---- utf8" << '\n'; |
| 749 | to_utf8_test(); | 759 | to_utf8_test(); |
| 760 | + explicit_utf8_test(); | ||
| 750 | std::cout << "---- utf16" << '\n'; | 761 | std::cout << "---- utf16" << '\n'; |
| 751 | to_utf16_test(); | 762 | to_utf16_test(); |
| 752 | std::cout << "---- utf8_to_ascii" << '\n'; | 763 | std::cout << "---- utf8_to_ascii" << '\n'; |
qpdf/qpdf.testcov
| @@ -174,8 +174,6 @@ QPDFParser eof in parse 0 | @@ -174,8 +174,6 @@ QPDFParser eof in parse 0 | ||
| 174 | QPDFParser eof in parseRemainder 0 | 174 | QPDFParser eof in parseRemainder 0 |
| 175 | QPDFObjectHandle boolean returning false 0 | 175 | QPDFObjectHandle boolean returning false 0 |
| 176 | QPDFObjectHandle real returning 0.0 0 | 176 | QPDFObjectHandle real returning 0.0 0 |
| 177 | -QPDFObjectHandle string returning empty string 0 | ||
| 178 | -QPDFObjectHandle string returning empty utf8 0 | ||
| 179 | QPDFObjectHandle operator returning fake value 0 | 177 | QPDFObjectHandle operator returning fake value 0 |
| 180 | QPDFObjectHandle inlineimage returning empty data 0 | 178 | QPDFObjectHandle inlineimage returning empty data 0 |
| 181 | QPDFObjectHandle array treating as empty vector 0 | 179 | QPDFObjectHandle array treating as empty vector 0 |
qpdf/qtest/json.test
| @@ -38,7 +38,7 @@ my @json_files = ( | @@ -38,7 +38,7 @@ my @json_files = ( | ||
| 38 | ['page-labels-and-outlines', | 38 | ['page-labels-and-outlines', |
| 39 | ['--json-key=objects', '--json-object=trailer', '--json-object=2 0 R']], | 39 | ['--json-key=objects', '--json-object=trailer', '--json-object=2 0 R']], |
| 40 | ['field-types', ['--json-key=acroform']], | 40 | ['field-types', ['--json-key=acroform']], |
| 41 | - ['need-appearances', ['--json-key=acroform']], | 41 | + ['need-appearances-utf8', ['--json-key=acroform']], |
| 42 | ['V4-aes', ['--json-key=encrypt']], | 42 | ['V4-aes', ['--json-key=encrypt']], |
| 43 | ['V4-aes', ['--json-key=encrypt', '--show-encryption-key']], | 43 | ['V4-aes', ['--json-key=encrypt', '--show-encryption-key']], |
| 44 | ); | 44 | ); |
qpdf/qtest/qpdf/json-need-appearances-acroform-v1.out renamed to qpdf/qtest/qpdf/json-need-appearances-utf8-acroform-v1.out
qpdf/qtest/qpdf/json-need-appearances-acroform-v2.out renamed to qpdf/qtest/qpdf/json-need-appearances-utf8-acroform-v2.out
qpdf/qtest/qpdf/need-appearances-utf8.pdf
0 → 100644
No preview for this file type