JSON.hh 16.3 KB
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427
// Copyright (c) 2005-2022 Jay Berkenbilt
//
// This file is part of qpdf.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Versions of qpdf prior to version 7 were released under the terms
// of version 2.0 of the Artistic License. At your option, you may
// continue to consider qpdf to be licensed under those terms. Please
// see the manual for additional information.

#ifndef JSON_HH
#define JSON_HH

// This is a simple JSON serializer and parser, primarily designed for
// serializing QPDF Objects as JSON. While it may work as a
// general-purpose JSON parser/serializer, there are better options.
// JSON objects contain their data as smart pointers. When one JSON object
// is added to another, this pointer is copied. This means you can
// create temporary JSON objects on the stack, add them to other
// objects, and let them go out of scope safely. It also means that if
// a JSON object is added in more than one place, all copies
// share the underlying data. This makes them similar in structure and
// behavior to QPDFObjectHandle and may feel natural within the QPDF
// codebase, but it is also a good reason not to use this as a
// general-purpose JSON package.

#include <qpdf/DLL.h>
#include <qpdf/PointerHolder.hh> // unused -- remove in qpdf 12 (see #785)
#include <qpdf/Types.h>

#include <functional>
#include <list>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <vector>

class Pipeline;
class InputSource;

class JSON
{
  public:
    static int constexpr LATEST = 2;

    QPDF_DLL
    std::string unparse() const;

    // Write the JSON object through a pipeline. The `depth` parameter
    // specifies how deeply nested this is in another JSON structure,
    // which makes it possible to write clean-looking JSON
    // incrementally.
    QPDF_DLL
    void write(Pipeline*, size_t depth = 0) const;

    // Helper methods for writing JSON incrementally.
    //
    // "first" -- Several methods take a `bool& first` parameter. The
    // open methods always set it to true, and the methods to output
    // items always set it to false. This way, the item and close
    // methods can always know whether or not a first item is being
    // written. The intended mode of operation is to start with a new
    // `bool first = true` each time a new container is opened and
    // to pass that `first` through to all the methods that are
    // called to add top-level items to the container as well as to
    // close the container. This lets the JSON object use it to keep
    // track of when it's writing a first object and when it's not. If
    // incrementally writing multiple levels of depth, a new `first`
    // should used for each new container that is opened.
    //
    // "depth" -- Indicate the level of depth. This is used for
    // consistent indentation. When writing incrementally, whenever
    // you call a method to add an item to a container, the value of
    // `depth` should be one more than whatever value is passed to the
    // container open and close methods.

    // Open methods ignore the value of first and set it to false
    QPDF_DLL
    static void writeDictionaryOpen(Pipeline*, bool& first, size_t depth = 0);
    QPDF_DLL
    static void writeArrayOpen(Pipeline*, bool& first, size_t depth = 0);
    // Close methods don't modify first. A true value indicates that
    // we are closing an empty object.
    QPDF_DLL
    static void writeDictionaryClose(Pipeline*, bool first, size_t depth = 0);
    QPDF_DLL
    static void writeArrayClose(Pipeline*, bool first, size_t depth = 0);
    // The item methods use the value of first to determine if this is
    // the first item and always set it to false.
    QPDF_DLL
    static void writeDictionaryItem(
        Pipeline*,
        bool& first,
        std::string const& key,
        JSON const& value,
        size_t depth = 0);
    // Write just the key of a new dictionary item, useful if writing
    // nested structures. Calls writeNext.
    QPDF_DLL
    static void writeDictionaryKey(
        Pipeline* p, bool& first, std::string const& key, size_t depth = 0);
    QPDF_DLL
    static void writeArrayItem(
        Pipeline*, bool& first, JSON const& element, size_t depth = 0);
    // If writing nested structures incrementally, call writeNext
    // before opening a new array or container in the midst of an
    // existing one. The `first` you pass to writeNext should be the
    // one for the parent object. The depth should be the one for the
    // child object. Then start a new `first` for the nested item.
    // Note that writeDictionaryKey and writeArrayItem call writeNext
    // for you, so this is most important when writing subsequent
    // items or container openers to an array.
    QPDF_DLL
    static void writeNext(Pipeline* p, bool& first, size_t depth = 0);

    // The JSON spec calls dictionaries "objects", but that creates
    // too much confusion when referring to instances of the JSON
    // class.
    QPDF_DLL
    static JSON makeDictionary();
    // addDictionaryMember returns the newly added item.
    QPDF_DLL
    JSON addDictionaryMember(std::string const& key, JSON const&);
    QPDF_DLL
    static JSON makeArray();
    // addArrayElement returns the newly added item.
    QPDF_DLL
    JSON addArrayElement(JSON const&);
    QPDF_DLL
    static JSON makeString(std::string const& utf8);
    QPDF_DLL
    static JSON makeInt(long long int value);
    QPDF_DLL
    static JSON makeReal(double value);
    QPDF_DLL
    static JSON makeNumber(std::string const& encoded);
    QPDF_DLL
    static JSON makeBool(bool value);
    QPDF_DLL
    static JSON makeNull();

    // A blob serializes as a string. The function will be called by
    // JSON with a pipeline and should write binary data to the
    // pipeline but not call finish(). JSON will call finish() at the
    // right time.
    QPDF_DLL
    static JSON makeBlob(std::function<void(Pipeline*)>);

    QPDF_DLL
    bool isArray() const;

    QPDF_DLL
    bool isDictionary() const;

    // If the key is already in the dictionary, return true.
    // Otherwise, mark it as seen and return false. This is primarily
    // intended to be used by the parser to detect duplicate keys when
    // the reactor blocks them from being added to the final
    // dictionary.
    QPDF_DLL
    bool checkDictionaryKeySeen(std::string const& key);

    // Accessors. Accessor behavior:
    //
    // - If argument is wrong type, including null, return false
    // - If argument is right type, return true and initialize the value
    QPDF_DLL
    bool getString(std::string& utf8) const;
    QPDF_DLL
    bool getNumber(std::string& value) const;
    QPDF_DLL
    bool getBool(bool& value) const;
    QPDF_DLL
    bool isNull() const;
    QPDF_DLL
    bool forEachDictItem(
        std::function<void(std::string const& key, JSON value)> fn) const;
    QPDF_DLL
    bool forEachArrayItem(std::function<void(JSON value)> fn) const;

    // Check this JSON object against a "schema". This is not a schema
    // according to any standard. It's just a template of what the
    // JSON is supposed to contain. The checking does the following:
    //
    //   * The schema is a nested structure containing dictionaries,
    //     single-element arrays, and strings only.
    //   * Recursively walk the schema. In the items below, "schema
    //     object" refers to an object in the schema, and "checked
    //     object" refers to the corresponding part of the object
    //     being checked.
    //   * If the schema object is a dictionary, the checked object
    //     must have a dictionary in the same place with the same
    //     keys. If flags contains f_optional, a key in the schema
    //     does not have to be present in the object. Otherwise, all
    //     keys have to be present. Any key in the object must be
    //     present in the schema.
    //   * If the schema object is an array of length 1, the checked
    //     object may either be a single item or an array of items.
    //     The single item or each element of the checked object's
    //     array is validated against the single element of the
    //     schema's array. The rationale behind this logic is that a
    //     single element may appear wherever the schema allows a
    //     variable-length array. This makes it possible to start
    //     allowing an array in the future where a single element was
    //     previously required without breaking backward
    //     compatibility.
    //   * If the schema object is an array of length > 1, the checked
    //     object must be an array of the same length. In this case,
    //     each element of the checked object array is validated
    //     against the corresponding element of the schema array.
    //   * Otherwise, the value must be a string whose value is a
    //     description of the object's corresponding value, which may
    //     have any type.
    //
    // QPDF's JSON output conforms to certain strict compatibility
    // rules as discussed in the manual. The idea is that a JSON
    // structure created manually in qpdf.cc doubles as both JSON help
    // information and a schema for validating the JSON that qpdf
    // generates. Any discrepancies are a bug in qpdf.
    //
    // Flags is a bitwise or of values from check_flags_e.
    enum check_flags_e {
        f_none = 0,
        f_optional = 1 << 0,
    };
    QPDF_DLL
    bool checkSchema(
        JSON schema, unsigned long flags, std::list<std::string>& errors);

    // Same as passing 0 for flags
    QPDF_DLL
    bool checkSchema(JSON schema, std::list<std::string>& errors);

    // An pointer to a Reactor class can be passed to parse, which
    // will enable the caller to react to incremental events in the
    // construction of the JSON object. This makes it possible to
    // implement SAX-like handling of very large JSON objects.
    class QPDF_DLL_CLASS Reactor
    {
      public:
        QPDF_DLL
        virtual ~Reactor() = default;

        // The start/end methods are called when parsing of a
        // dictionary or array is started or ended. The item methods
        // are called when an item is added to a dictionary or array.
        // When adding a container to another container, the item
        // method is called with an empty container before the lower
        // container's start method is called. See important notes in
        // "Item methods" below.

        // During parsing of a JSON string, the parser is operating on
        // a single object at a time. When a dictionary or array is
        // started, a new context begins, and when that dictionary or
        // array is ended, the previous context is resumed. So, for
        // example, if you have `{"a": [1]}`, you will receive the
        // following method calls
        //
        // dictionaryStart -- current object is the top-level dictionary
        // dictionaryItem  -- called with "a" and an empty array
        // arrayStart      -- current object is the array
        // arrayItem       -- called with the "1" object
        // containerEnd    -- now current object is the dictionary again
        // containerEnd    -- current object is undefined
        //
        // If the top-level item in a JSON string is a scalar, the
        // topLevelScalar() method will be called. No argument is
        // passed since the object is the same as what is returned by
        // parse().

        QPDF_DLL
        virtual void dictionaryStart() = 0;
        QPDF_DLL
        virtual void arrayStart() = 0;
        QPDF_DLL
        virtual void containerEnd(JSON const& value) = 0;
        QPDF_DLL
        virtual void topLevelScalar() = 0;

        // Item methods:
        //
        // The return value of the item methods indicate whether the
        // item has been "consumed". If the item method returns true,
        // then the item will not be added to the containing JSON
        // object. This is what allows arbitrarily large JSON objects
        // to be parsed and not have to be kept in memory.
        //
        // NOTE: When a dictionary or an array is added to a
        // container, the dictionaryItem or arrayItem method is called
        // when the child item's start delimiter is encountered, so
        // the JSON object passed in at that time will always be in
        // its initial, empty state. Additionally, the child item's
        // start method is not called until after the parent item's
        // item method is called. This makes it possible to keep track
        // of the current depth level by incrementing level on start
        // methods and decrementing on end methods.

        QPDF_DLL
        virtual bool
        dictionaryItem(std::string const& key, JSON const& value) = 0;
        QPDF_DLL
        virtual bool arrayItem(JSON const& value) = 0;
    };

    // Create a JSON object from a string.
    QPDF_DLL
    static JSON parse(std::string const&);
    // Create a JSON object from an input source. See above for
    // information about how to use the Reactor.
    QPDF_DLL
    static JSON parse(InputSource&, Reactor* reactor = nullptr);

    // parse calls setOffsets to set the inclusive start and
    // non-inclusive end offsets of an object relative to its input
    // string. Otherwise, both values are 0.
    QPDF_DLL
    void setStart(qpdf_offset_t);
    QPDF_DLL
    void setEnd(qpdf_offset_t);
    QPDF_DLL
    qpdf_offset_t getStart() const;
    QPDF_DLL
    qpdf_offset_t getEnd() const;

  private:
    static std::string encode_string(std::string const& utf8);
    static void
    writeClose(Pipeline* p, bool first, size_t depth, char const* delimeter);

    struct JSON_value
    {
        virtual ~JSON_value() = default;
        virtual void write(Pipeline*, size_t depth) const = 0;
    };
    struct JSON_dictionary: public JSON_value
    {
        virtual ~JSON_dictionary() = default;
        virtual void write(Pipeline*, size_t depth) const;
        std::map<std::string, std::shared_ptr<JSON_value>> members;
        std::set<std::string> parsed_keys;
    };
    struct JSON_array: public JSON_value
    {
        virtual ~JSON_array() = default;
        virtual void write(Pipeline*, size_t depth) const;
        std::vector<std::shared_ptr<JSON_value>> elements;
    };
    struct JSON_string: public JSON_value
    {
        JSON_string(std::string const& utf8);
        virtual ~JSON_string() = default;
        virtual void write(Pipeline*, size_t depth) const;
        std::string utf8;
        std::string encoded;
    };
    struct JSON_number: public JSON_value
    {
        JSON_number(long long val);
        JSON_number(double val);
        JSON_number(std::string const& val);
        virtual ~JSON_number() = default;
        virtual void write(Pipeline*, size_t depth) const;
        std::string encoded;
    };
    struct JSON_bool: public JSON_value
    {
        JSON_bool(bool val);
        virtual ~JSON_bool() = default;
        virtual void write(Pipeline*, size_t depth) const;
        bool value;
    };
    struct JSON_null: public JSON_value
    {
        virtual ~JSON_null() = default;
        virtual void write(Pipeline*, size_t depth) const;
    };
    struct JSON_blob: public JSON_value
    {
        JSON_blob(std::function<void(Pipeline*)> fn);
        virtual ~JSON_blob() = default;
        virtual void write(Pipeline*, size_t depth) const;
        std::function<void(Pipeline*)> fn;
    };

    JSON(std::shared_ptr<JSON_value>);

    static bool checkSchemaInternal(
        JSON_value* this_v,
        JSON_value* sch_v,
        unsigned long flags,
        std::list<std::string>& errors,
        std::string prefix);

    class Members
    {
        friend class JSON;

      public:
        QPDF_DLL
        ~Members() = default;

      private:
        Members(std::shared_ptr<JSON_value>);
        Members(Members const&) = delete;

        std::shared_ptr<JSON_value> value;
        // start and end are only populated for objects created by parse
        qpdf_offset_t start;
        qpdf_offset_t end;
    };

    std::shared_ptr<Members> m;
};

#endif // JSON_HH