Commit c56a9ca7f6484925627aa1da374a236949c07cb2

Authored by Jay Berkenbilt
1 parent 47c093c4

JSON: Fix large file support

include/qpdf/JSON.hh
... ... @@ -37,6 +37,7 @@
37 37  
38 38 #include <qpdf/DLL.h>
39 39 #include <qpdf/PointerHolder.hh>
  40 +#include <qpdf/Types.h>
40 41  
41 42 #include <functional>
42 43 #include <list>
... ... @@ -299,13 +300,13 @@ class JSON
299 300 // non-inclusive end offsets of an object relative to its input
300 301 // string. Otherwise, both values are 0.
301 302 QPDF_DLL
302   - void setStart(size_t);
  303 + void setStart(qpdf_offset_t);
303 304 QPDF_DLL
304   - void setEnd(size_t);
  305 + void setEnd(qpdf_offset_t);
305 306 QPDF_DLL
306   - size_t getStart() const;
  307 + qpdf_offset_t getStart() const;
307 308 QPDF_DLL
308   - size_t getEnd() const;
  309 + qpdf_offset_t getEnd() const;
309 310  
310 311 private:
311 312 static std::string encode_string(std::string const& utf8);
... ... @@ -391,8 +392,8 @@ class JSON
391 392  
392 393 std::shared_ptr<JSON_value> value;
393 394 // start and end are only populated for objects created by parse
394   - size_t start;
395   - size_t end;
  395 + qpdf_offset_t start;
  396 + qpdf_offset_t end;
396 397 };
397 398  
398 399 std::shared_ptr<Members> m;
... ...
include/qpdf/QPDF.hh
... ... @@ -1046,7 +1046,7 @@ class QPDF
1046 1046 void nestedState(std::string const& key, JSON const& value, state_e);
1047 1047 void setObjectDescription(QPDFObjectHandle& oh, JSON const& value);
1048 1048 QPDFObjectHandle makeObject(JSON const& value);
1049   - void error(size_t offset, std::string const& message);
  1049 + void error(qpdf_offset_t offset, std::string const& message);
1050 1050 QPDFObjectHandle
1051 1051 reserveObject(int obj, int gen);
1052 1052 void replaceObject(
... ...
libqpdf/JSON.cc
... ... @@ -4,11 +4,19 @@
4 4 #include <qpdf/Pl_Base64.hh>
5 5 #include <qpdf/Pl_Concatenate.hh>
6 6 #include <qpdf/Pl_String.hh>
  7 +#include <qpdf/QIntC.hh>
7 8 #include <qpdf/QTC.hh>
8 9 #include <qpdf/QUtil.hh>
9 10 #include <cstring>
10 11 #include <stdexcept>
11 12  
  13 +template <typename T>
  14 +static qpdf_offset_t
  15 +toO(T const& i)
  16 +{
  17 + return QIntC::to_offset(i);
  18 +}
  19 +
12 20 JSON::Members::Members(std::shared_ptr<JSON_value> value) :
13 21 value(value),
14 22 start(0),
... ... @@ -591,13 +599,13 @@ namespace
591 599 void getToken();
592 600 void handleToken();
593 601 static std::string
594   - decode_string(std::string const& json, size_t offset);
  602 + decode_string(std::string const& json, qpdf_offset_t offset);
595 603 static void handle_u_code(
596 604 char const* s,
597   - size_t offset,
598   - size_t i,
  605 + qpdf_offset_t offset,
  606 + qpdf_offset_t i,
599 607 unsigned long& high_surrogate,
600   - size_t& high_offset,
  608 + qpdf_offset_t& high_offset,
601 609 std::string& result);
602 610  
603 611 enum parser_state_e {
... ... @@ -633,25 +641,25 @@ namespace
633 641 char buf[16384];
634 642 size_t bytes;
635 643 char const* p;
636   - size_t u_count;
637   - size_t offset;
  644 + qpdf_offset_t u_count;
  645 + qpdf_offset_t offset;
638 646 bool done;
639 647 std::string token;
640 648 parser_state_e parser_state;
641 649 std::vector<std::shared_ptr<JSON>> stack;
642 650 std::vector<parser_state_e> ps_stack;
643 651 std::string dict_key;
644   - size_t dict_key_offset;
  652 + qpdf_offset_t dict_key_offset;
645 653 };
646 654 } // namespace
647 655  
648 656 void
649 657 JSONParser::handle_u_code(
650 658 char const* s,
651   - size_t offset,
652   - size_t i,
  659 + qpdf_offset_t offset,
  660 + qpdf_offset_t i,
653 661 unsigned long& high_surrogate,
654   - size_t& high_offset,
  662 + qpdf_offset_t& high_offset,
655 663 std::string& result)
656 664 {
657 665 std::string hex = QUtil::hex_decode(std::string(s + i + 1, s + i + 5));
... ... @@ -662,14 +670,14 @@ JSONParser::handle_u_code(
662 670 codepoint += low;
663 671 if ((codepoint & 0xFC00) == 0xD800) {
664 672 // high surrogate
665   - size_t new_high_offset = offset + i;
  673 + qpdf_offset_t new_high_offset = offset + i;
666 674 if (high_offset) {
667 675 QTC::TC("libtests", "JSON 16 high high");
668 676 throw std::runtime_error(
669   - "JSON: offset " + QUtil::uint_to_string(new_high_offset) +
  677 + "JSON: offset " + QUtil::int_to_string(new_high_offset) +
670 678 ": UTF-16 high surrogate found after previous high surrogate"
671 679 " at offset " +
672   - QUtil::uint_to_string(high_offset));
  680 + QUtil::int_to_string(high_offset));
673 681 }
674 682 high_offset = new_high_offset;
675 683 high_surrogate = codepoint;
... ... @@ -678,7 +686,7 @@ JSONParser::handle_u_code(
678 686 if (offset + i != (high_offset + 6)) {
679 687 QTC::TC("libtests", "JSON 16 low not after high");
680 688 throw std::runtime_error(
681   - "JSON: offset " + QUtil::uint_to_string(offset + i) +
  689 + "JSON: offset " + QUtil::int_to_string(offset + i) +
682 690 ": UTF-16 low surrogate found not immediately after high"
683 691 " surrogate");
684 692 }
... ... @@ -692,7 +700,7 @@ JSONParser::handle_u_code(
692 700 }
693 701  
694 702 std::string
695   -JSONParser::decode_string(std::string const& str, size_t offset)
  703 +JSONParser::decode_string(std::string const& str, qpdf_offset_t offset)
696 704 {
697 705 // The string has already been validated when this private method
698 706 // is called, so errors are logic errors instead of runtime
... ... @@ -708,11 +716,12 @@ JSONParser::decode_string(std::string const&amp; str, size_t offset)
708 716 len -= 2;
709 717 // Keep track of UTF-16 surrogate pairs.
710 718 unsigned long high_surrogate = 0;
711   - size_t high_offset = 0;
  719 + qpdf_offset_t high_offset = 0;
712 720 std::string result;
713   - for (size_t i = 0; i < len; ++i) {
  721 + qpdf_offset_t olen = toO(len);
  722 + for (qpdf_offset_t i = 0; i < olen; ++i) {
714 723 if (s[i] == '\\') {
715   - if (i + 1 >= len) {
  724 + if (i + 1 >= olen) {
716 725 throw std::logic_error("JSON parse: nothing after \\");
717 726 }
718 727 char ch = s[++i];
... ... @@ -740,7 +749,7 @@ JSONParser::decode_string(std::string const&amp; str, size_t offset)
740 749 result.append(1, '\t');
741 750 break;
742 751 case 'u':
743   - if (i + 4 >= len) {
  752 + if (i + 4 >= olen) {
744 753 throw std::logic_error(
745 754 "JSON parse: not enough characters after \\u");
746 755 }
... ... @@ -759,7 +768,7 @@ JSONParser::decode_string(std::string const&amp; str, size_t offset)
759 768 if (high_offset) {
760 769 QTC::TC("libtests", "JSON 16 dangling high");
761 770 throw std::runtime_error(
762   - "JSON: offset " + QUtil::uint_to_string(high_offset) +
  771 + "JSON: offset " + QUtil::int_to_string(high_offset) +
763 772 ": UTF-16 high surrogate not followed by low surrogate");
764 773 }
765 774 return result;
... ... @@ -785,7 +794,7 @@ JSONParser::getToken()
785 794 QTC::TC("libtests", "JSON parse null character");
786 795 throw std::runtime_error(
787 796 "JSON: null character at offset " +
788   - QUtil::uint_to_string(offset));
  797 + QUtil::int_to_string(offset));
789 798 }
790 799 action = append;
791 800 switch (lex_state) {
... ... @@ -822,7 +831,7 @@ JSONParser::getToken()
822 831 } else {
823 832 QTC::TC("libtests", "JSON parse bad character");
824 833 throw std::runtime_error(
825   - "JSON: offset " + QUtil::uint_to_string(offset) +
  834 + "JSON: offset " + QUtil::int_to_string(offset) +
826 835 ": unexpected character " + std::string(p, 1));
827 836 }
828 837 break;
... ... @@ -840,12 +849,12 @@ JSONParser::getToken()
840 849 if (number_saw_e) {
841 850 QTC::TC("libtests", "JSON parse point after e");
842 851 throw std::runtime_error(
843   - "JSON: offset " + QUtil::uint_to_string(offset) +
  852 + "JSON: offset " + QUtil::int_to_string(offset) +
844 853 ": numeric literal: decimal point after e");
845 854 } else if (number_saw_point) {
846 855 QTC::TC("libtests", "JSON parse duplicate point");
847 856 throw std::runtime_error(
848   - "JSON: offset " + QUtil::uint_to_string(offset) +
  857 + "JSON: offset " + QUtil::int_to_string(offset) +
849 858 ": numeric literal: decimal point already seen");
850 859 } else {
851 860 number_saw_point = true;
... ... @@ -854,7 +863,7 @@ JSONParser::getToken()
854 863 if (number_saw_e) {
855 864 QTC::TC("libtests", "JSON parse duplicate e");
856 865 throw std::runtime_error(
857   - "JSON: offset " + QUtil::uint_to_string(offset) +
  866 + "JSON: offset " + QUtil::int_to_string(offset) +
858 867 ": numeric literal: e already seen");
859 868 } else {
860 869 number_saw_e = true;
... ... @@ -865,7 +874,7 @@ JSONParser::getToken()
865 874 } else {
866 875 QTC::TC("libtests", "JSON parse unexpected sign");
867 876 throw std::runtime_error(
868   - "JSON: offset " + QUtil::uint_to_string(offset) +
  877 + "JSON: offset " + QUtil::int_to_string(offset) +
869 878 ": numeric literal: unexpected sign");
870 879 }
871 880 } else if (QUtil::is_space(*p)) {
... ... @@ -877,7 +886,7 @@ JSONParser::getToken()
877 886 } else {
878 887 QTC::TC("libtests", "JSON parse numeric bad character");
879 888 throw std::runtime_error(
880   - "JSON: offset " + QUtil::uint_to_string(offset) +
  889 + "JSON: offset " + QUtil::int_to_string(offset) +
881 890 ": numeric literal: unexpected character " +
882 891 std::string(p, 1));
883 892 }
... ... @@ -895,7 +904,7 @@ JSONParser::getToken()
895 904 } else {
896 905 QTC::TC("libtests", "JSON parse keyword bad character");
897 906 throw std::runtime_error(
898   - "JSON: offset " + QUtil::uint_to_string(offset) +
  907 + "JSON: offset " + QUtil::int_to_string(offset) +
899 908 ": keyword: unexpected character " + std::string(p, 1));
900 909 }
901 910 break;
... ... @@ -918,7 +927,7 @@ JSONParser::getToken()
918 927 } else {
919 928 QTC::TC("libtests", "JSON parse backslash bad character");
920 929 throw std::runtime_error(
921   - "JSON: offset " + QUtil::uint_to_string(offset) +
  930 + "JSON: offset " + QUtil::int_to_string(offset) +
922 931 ": invalid character after backslash: " +
923 932 std::string(p, 1));
924 933 }
... ... @@ -929,7 +938,7 @@ JSONParser::getToken()
929 938 QTC::TC("libtests", "JSON parse bad hex after u");
930 939 throw std::runtime_error(
931 940 "JSON: offset " +
932   - QUtil::uint_to_string(offset - u_count - 1) +
  941 + QUtil::int_to_string(offset - u_count - 1) +
933 942 ": \\u must be followed by four hex digits");
934 943 }
935 944 if (++u_count == 4) {
... ... @@ -969,14 +978,14 @@ JSONParser::getToken()
969 978 QTC::TC("libtests", "JSON parse premature end of u");
970 979 throw std::runtime_error(
971 980 "JSON: offset " +
972   - QUtil::uint_to_string(offset - u_count - 1) +
  981 + QUtil::int_to_string(offset - u_count - 1) +
973 982 ": \\u must be followed by four characters");
974 983  
975 984 case ls_string:
976 985 case ls_backslash:
977 986 QTC::TC("libtests", "JSON parse unterminated string");
978 987 throw std::runtime_error(
979   - "JSON: offset " + QUtil::uint_to_string(offset) +
  988 + "JSON: offset " + QUtil::int_to_string(offset) +
980 989 ": unterminated string");
981 990 break;
982 991 }
... ... @@ -994,7 +1003,7 @@ JSONParser::handleToken()
994 1003 if (parser_state == ps_done) {
995 1004 QTC::TC("libtests", "JSON parse junk after object");
996 1005 throw std::runtime_error(
997   - "JSON: offset " + QUtil::uint_to_string(offset) +
  1006 + "JSON: offset " + QUtil::int_to_string(offset) +
998 1007 ": material follows end of object: " + token);
999 1008 }
1000 1009  
... ... @@ -1005,7 +1014,7 @@ JSONParser::handleToken()
1005 1014 if (token.length() < 2) {
1006 1015 throw std::logic_error("JSON string length < 2");
1007 1016 }
1008   - s_value = decode_string(token, offset - token.length());
  1017 + s_value = decode_string(token, offset - toO(token.length()));
1009 1018 }
1010 1019 // Based on the lexical state and value, figure out whether we are
1011 1020 // looking at an item or a delimiter. It will always be exactly
... ... @@ -1020,12 +1029,12 @@ JSONParser::handleToken()
1020 1029 switch (first_char) {
1021 1030 case '{':
1022 1031 item = std::make_shared<JSON>(JSON::makeDictionary());
1023   - item->setStart(offset - token.length());
  1032 + item->setStart(offset - toO(token.length()));
1024 1033 break;
1025 1034  
1026 1035 case '[':
1027 1036 item = std::make_shared<JSON>(JSON::makeArray());
1028   - item->setStart(offset - token.length());
  1037 + item->setStart(offset - toO(token.length()));
1029 1038 break;
1030 1039  
1031 1040 default:
... ... @@ -1038,7 +1047,7 @@ JSONParser::handleToken()
1038 1047 if (number_saw_point && (number_after_point == 0)) {
1039 1048 QTC::TC("libtests", "JSON parse decimal with no digits");
1040 1049 throw std::runtime_error(
1041   - "JSON: offset " + QUtil::uint_to_string(offset) +
  1050 + "JSON: offset " + QUtil::int_to_string(offset) +
1042 1051 ": decimal point with no digits");
1043 1052 }
1044 1053 if ((number_before_point > 1) &&
... ... @@ -1046,13 +1055,13 @@ JSONParser::handleToken()
1046 1055 ((first_char == '-') && (token.at(1) == '0')))) {
1047 1056 QTC::TC("libtests", "JSON parse leading zero");
1048 1057 throw std::runtime_error(
1049   - "JSON: offset " + QUtil::uint_to_string(offset) +
  1058 + "JSON: offset " + QUtil::int_to_string(offset) +
1050 1059 ": number with leading zero");
1051 1060 }
1052 1061 if ((number_before_point == 0) && (number_after_point == 0)) {
1053 1062 QTC::TC("libtests", "JSON parse number no digits");
1054 1063 throw std::runtime_error(
1055   - "JSON: offset " + QUtil::uint_to_string(offset) +
  1064 + "JSON: offset " + QUtil::int_to_string(offset) +
1056 1065 ": number with no digits");
1057 1066 }
1058 1067 item = std::make_shared<JSON>(JSON::makeNumber(token));
... ... @@ -1068,7 +1077,7 @@ JSONParser::handleToken()
1068 1077 } else {
1069 1078 QTC::TC("libtests", "JSON parse invalid keyword");
1070 1079 throw std::runtime_error(
1071   - "JSON: offset " + QUtil::uint_to_string(offset) +
  1080 + "JSON: offset " + QUtil::int_to_string(offset) +
1072 1081 ": invalid keyword " + token);
1073 1082 }
1074 1083 break;
... ... @@ -1101,21 +1110,21 @@ JSONParser::handleToken()
1101 1110 case ps_dict_after_key:
1102 1111 QTC::TC("libtests", "JSON parse expected colon");
1103 1112 throw std::runtime_error(
1104   - "JSON: offset " + QUtil::uint_to_string(offset) +
  1113 + "JSON: offset " + QUtil::int_to_string(offset) +
1105 1114 ": expected ':'");
1106 1115 break;
1107 1116  
1108 1117 case ps_dict_after_item:
1109 1118 QTC::TC("libtests", "JSON parse expected , or }");
1110 1119 throw std::runtime_error(
1111   - "JSON: offset " + QUtil::uint_to_string(offset) +
  1120 + "JSON: offset " + QUtil::int_to_string(offset) +
1112 1121 ": expected ',' or '}'");
1113 1122 break;
1114 1123  
1115 1124 case ps_array_after_item:
1116 1125 QTC::TC("libtests", "JSON parse expected, or ]");
1117 1126 throw std::runtime_error(
1118   - "JSON: offset " + QUtil::uint_to_string(offset) +
  1127 + "JSON: offset " + QUtil::int_to_string(offset) +
1119 1128 ": expected ',' or ']'");
1120 1129 break;
1121 1130  
... ... @@ -1124,7 +1133,7 @@ JSONParser::handleToken()
1124 1133 if (lex_state != ls_string) {
1125 1134 QTC::TC("libtests", "JSON parse string as dict key");
1126 1135 throw std::runtime_error(
1127   - "JSON: offset " + QUtil::uint_to_string(offset) +
  1136 + "JSON: offset " + QUtil::int_to_string(offset) +
1128 1137 ": expect string as dictionary key");
1129 1138 }
1130 1139 break;
... ... @@ -1143,7 +1152,7 @@ JSONParser::handleToken()
1143 1152 {
1144 1153 QTC::TC("libtests", "JSON parse unexpected }");
1145 1154 throw std::runtime_error(
1146   - "JSON: offset " + QUtil::uint_to_string(offset) +
  1155 + "JSON: offset " + QUtil::int_to_string(offset) +
1147 1156 ": unexpected dictionary end delimiter");
1148 1157 }
1149 1158 } else if (delimiter == ']') {
... ... @@ -1153,14 +1162,14 @@ JSONParser::handleToken()
1153 1162 {
1154 1163 QTC::TC("libtests", "JSON parse unexpected ]");
1155 1164 throw std::runtime_error(
1156   - "JSON: offset " + QUtil::uint_to_string(offset) +
  1165 + "JSON: offset " + QUtil::int_to_string(offset) +
1157 1166 ": unexpected array end delimiter");
1158 1167 }
1159 1168 } else if (delimiter == ':') {
1160 1169 if (parser_state != ps_dict_after_key) {
1161 1170 QTC::TC("libtests", "JSON parse unexpected :");
1162 1171 throw std::runtime_error(
1163   - "JSON: offset " + QUtil::uint_to_string(offset) +
  1172 + "JSON: offset " + QUtil::int_to_string(offset) +
1164 1173 ": unexpected colon");
1165 1174 }
1166 1175 } else if (delimiter == ',') {
... ... @@ -1168,7 +1177,7 @@ JSONParser::handleToken()
1168 1177 (parser_state == ps_array_after_item))) {
1169 1178 QTC::TC("libtests", "JSON parse unexpected ,");
1170 1179 throw std::runtime_error(
1171   - "JSON: offset " + QUtil::uint_to_string(offset) +
  1180 + "JSON: offset " + QUtil::int_to_string(offset) +
1172 1181 ": unexpected comma");
1173 1182 }
1174 1183 } else if (delimiter != '\0') {
... ... @@ -1206,7 +1215,7 @@ JSONParser::handleToken()
1206 1215 "JSONParser::handleToken: unexpected delimiter in transition");
1207 1216 } else if (item.get()) {
1208 1217 if (!(item->isArray() || item->isDictionary())) {
1209   - item->setStart(offset - token.length());
  1218 + item->setStart(offset - toO(token.length()));
1210 1219 item->setEnd(offset);
1211 1220 }
1212 1221  
... ... @@ -1227,7 +1236,7 @@ JSONParser::handleToken()
1227 1236 if (tos->checkDictionaryKeySeen(dict_key)) {
1228 1237 QTC::TC("libtests", "JSON parse duplicate key");
1229 1238 throw std::runtime_error(
1230   - "JSON: offset " + QUtil::uint_to_string(dict_key_offset) +
  1239 + "JSON: offset " + QUtil::int_to_string(dict_key_offset) +
1231 1240 ": duplicated dictionary key");
1232 1241 }
1233 1242 if (!reactor || !reactor->dictionaryItem(dict_key, *item)) {
... ... @@ -1288,7 +1297,7 @@ JSONParser::handleToken()
1288 1297 }
1289 1298 if (ps_stack.size() > 500) {
1290 1299 throw std::runtime_error(
1291   - "JSON: offset " + QUtil::uint_to_string(offset) +
  1300 + "JSON: offset " + QUtil::int_to_string(offset) +
1292 1301 ": maximum object depth exceeded");
1293 1302 }
1294 1303 parser_state = next_state;
... ... @@ -1329,24 +1338,24 @@ JSON::parse(std::string const&amp; s)
1329 1338 }
1330 1339  
1331 1340 void
1332   -JSON::setStart(size_t start)
  1341 +JSON::setStart(qpdf_offset_t start)
1333 1342 {
1334 1343 this->m->start = start;
1335 1344 }
1336 1345  
1337 1346 void
1338   -JSON::setEnd(size_t end)
  1347 +JSON::setEnd(qpdf_offset_t end)
1339 1348 {
1340 1349 this->m->end = end;
1341 1350 }
1342 1351  
1343   -size_t
  1352 +qpdf_offset_t
1344 1353 JSON::getStart() const
1345 1354 {
1346 1355 return this->m->start;
1347 1356 }
1348 1357  
1349   -size_t
  1358 +qpdf_offset_t
1350 1359 JSON::getEnd() const
1351 1360 {
1352 1361 return this->m->end;
... ...
libqpdf/QPDF_json.cc
... ... @@ -197,14 +197,14 @@ QPDF::test_json_validators()
197 197 }
198 198  
199 199 static std::function<void(Pipeline*)>
200   -provide_data(std::shared_ptr<InputSource> is, size_t start, size_t end)
  200 +provide_data(std::shared_ptr<InputSource> is, qpdf_offset_t start, qpdf_offset_t end)
201 201 {
202 202 return [is, start, end](Pipeline* p) {
203 203 Pl_Base64 decode("base64-decode", p, Pl_Base64::a_decode);
204 204 p = &decode;
205   - size_t bytes = end - start;
  205 + size_t bytes = QIntC::to_size(end - start);
206 206 char buf[8192];
207   - is->seek(QIntC::to_offset(start), SEEK_SET);
  207 + is->seek(start, SEEK_SET);
208 208 size_t len = 0;
209 209 while ((len = is->read(buf, std::min(bytes, sizeof(buf)))) > 0) {
210 210 p->write(buf, len);
... ... @@ -241,14 +241,14 @@ QPDF::JSONReactor::JSONReactor(
241 241 }
242 242  
243 243 void
244   -QPDF::JSONReactor::error(size_t offset, std::string const& msg)
  244 +QPDF::JSONReactor::error(qpdf_offset_t offset, std::string const& msg)
245 245 {
246 246 this->errors = true;
247 247 std::string object = this->cur_object;
248 248 if (is->getName() != pdf.getFilename()) {
249 249 object += " from " + is->getName();
250 250 }
251   - this->pdf.warn(qpdf_e_json, object, QIntC::to_offset(offset), msg);
  251 + this->pdf.warn(qpdf_e_json, object, offset, msg);
252 252 }
253 253  
254 254 bool
... ... @@ -616,7 +616,7 @@ QPDF::JSONReactor::setObjectDescription(QPDFObjectHandle&amp; oh, JSON const&amp; value)
616 616 if (!this->cur_object.empty()) {
617 617 description += ", " + this->cur_object;
618 618 }
619   - description += " at offset " + QUtil::uint_to_string(value.getStart());
  619 + description += " at offset " + QUtil::int_to_string(value.getStart());
620 620 oh.setObjectDescription(&this->pdf, description);
621 621 }
622 622  
... ...