JSON.hh 16.2 KB
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407
// Copyright (c) 2005-2021 Jay Berkenbilt
// Copyright (c) 2022-2025 Jay Berkenbilt and Manfred Holger
//
// This file is part of qpdf.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under
// the License.
//
// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
// License. At your option, you may continue to consider qpdf to be licensed under those terms.
// Please see the manual for additional information.

#ifndef JSON_HH
#define JSON_HH

// This is a simple JSON serializer and parser, primarily designed for serializing QPDF Objects as
// JSON. While it may work as a general-purpose JSON parser/serializer, there are better options.
// JSON objects contain their data as smart pointers. When one JSON object is added to another, this
// pointer is copied. This means you can create temporary JSON objects on the stack, add them to
// other objects, and let them go out of scope safely. It also means that if a JSON object is added
// in more than one place, all copies share the underlying data. This makes them similar in
// structure and behavior to QPDFObjectHandle and may feel natural within the QPDF codebase, but it
// is also a good reason not to use this as a general-purpose JSON package.

#include <qpdf/DLL.h>
#include <qpdf/Types.h>

#include <functional>
#include <list>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <vector>

class Pipeline;
class InputSource;

class JSON
{
  public:
    static int constexpr LATEST = 2;

    QPDF_DLL
    JSON() = default;

    QPDF_DLL
    std::string unparse() const;

    // Write the JSON object through a pipeline. The `depth` parameter specifies how deeply nested
    // this is in another JSON structure, which makes it possible to write clean-looking JSON
    // incrementally.
    QPDF_DLL
    void write(Pipeline*, size_t depth = 0) const;

    // Helper methods for writing JSON incrementally.
    //
    // "first" -- Several methods take a `bool& first` parameter. The open methods always set it to
    // true, and the methods to output items always set it to false. This way, the item and close
    // methods can always know whether or not a first item is being written. The intended mode of
    // operation is to start with a new `bool first = true` each time a new container is opened and
    // to pass that `first` through to all the methods that are called to add top-level items to the
    // container as well as to close the container. This lets the JSON object use it to keep track
    // of when it's writing a first object and when it's not. If incrementally writing multiple
    // levels of depth, a new `first` should be used for each new container that is opened.
    //
    // "depth" -- Indicate the level of depth. This is used for consistent indentation. When writing
    // incrementally, whenever you call a method to add an item to a container, the value of `depth`
    // should be one more than whatever value is passed to the container open and close methods.

    // Open methods ignore the value of first and set it to false
    QPDF_DLL
    static void writeDictionaryOpen(Pipeline*, bool& first, size_t depth = 0);
    QPDF_DLL
    static void writeArrayOpen(Pipeline*, bool& first, size_t depth = 0);
    // Close methods don't modify first. A true value indicates that we are closing an empty object.
    QPDF_DLL
    static void writeDictionaryClose(Pipeline*, bool first, size_t depth = 0);
    QPDF_DLL
    static void writeArrayClose(Pipeline*, bool first, size_t depth = 0);
    // The item methods use the value of first to determine if this is the first item and always set
    // it to false.
    QPDF_DLL
    static void writeDictionaryItem(
        Pipeline*, bool& first, std::string const& key, JSON const& value, size_t depth = 0);
    // Write just the key of a new dictionary item, useful if writing nested structures. Calls
    // writeNext.
    QPDF_DLL
    static void
    writeDictionaryKey(Pipeline* p, bool& first, std::string const& key, size_t depth = 0);
    QPDF_DLL
    static void writeArrayItem(Pipeline*, bool& first, JSON const& element, size_t depth = 0);
    // If writing nested structures incrementally, call writeNext before opening a new array or
    // container in the midst of an existing one. The `first` you pass to writeNext should be the
    // one for the parent object. The depth should be the one for the child object. Then start a new
    // `first` for the nested item. Note that writeDictionaryKey and writeArrayItem call writeNext
    // for you, so this is most important when writing subsequent items or container openers to an
    // array.
    QPDF_DLL
    static void writeNext(Pipeline* p, bool& first, size_t depth = 0);

    // The JSON spec calls dictionaries "objects", but that creates too much confusion when
    // referring to instances of the JSON class.
    QPDF_DLL
    static JSON makeDictionary();
    // addDictionaryMember returns the newly added item.
    QPDF_DLL
    JSON addDictionaryMember(std::string const& key, JSON const&);
    QPDF_DLL
    static JSON makeArray();
    // addArrayElement returns the newly added item.
    QPDF_DLL
    JSON addArrayElement(JSON const&);
    QPDF_DLL
    static JSON makeString(std::string const& utf8);
    QPDF_DLL
    static JSON makeInt(long long int value);
    QPDF_DLL
    static JSON makeReal(double value);
    QPDF_DLL
    static JSON makeNumber(std::string const& encoded);
    QPDF_DLL
    static JSON makeBool(bool value);
    QPDF_DLL
    static JSON makeNull();

    // A blob serializes as a string. The function will be called by JSON with a pipeline and should
    // write binary data to the pipeline but not call finish(). JSON will call finish() at the right
    // time.
    QPDF_DLL
    static JSON makeBlob(std::function<void(Pipeline*)>);

    QPDF_DLL
    bool isArray() const;

    QPDF_DLL
    bool isDictionary() const;

    // Accessors. Accessor behavior:
    //
    // - If argument is wrong type, including null, return false
    // - If argument is right type, return true and initialize the value
    QPDF_DLL
    bool getString(std::string& utf8) const;
    QPDF_DLL
    bool getNumber(std::string& value) const;
    QPDF_DLL
    bool getBool(bool& value) const;
    QPDF_DLL
    bool isNull() const;
    QPDF_DLL
    JSON getDictItem(std::string const& key) const;
    QPDF_DLL
    bool forEachDictItem(std::function<void(std::string const& key, JSON value)> fn) const;
    QPDF_DLL
    bool forEachArrayItem(std::function<void(JSON value)> fn) const;

    // Check this JSON object against a "schema". This is not a schema according to any standard.
    // It's just a template of what the JSON is supposed to contain. The checking does the
    // following:
    //
    //   * The schema is a nested structure containing dictionaries, single-element arrays, and
    //     strings only.
    //   * Recursively walk the schema. In the items below, "schema object" refers to an object in
    //     the schema, and "checked object" refers to the corresponding part of the object being
    //     checked.
    //   * If the schema object is a dictionary, the checked object must have a dictionary in the
    //     same place with the same keys. If flags contains f_optional, a key in the schema does not
    //     have to be present in the object. Otherwise, all keys have to be present. Any key in the
    //     object must be present in the schema.
    //   * If the schema object is an array of length 1, the checked object may either be a single
    //     item or an array of items. The single item or each element of the checked object's
    //     array is validated against the single element of the schema's array. The rationale behind
    //     this logic is that a single element may appear wherever the schema allows a
    //     variable-length array. This makes it possible to start allowing an array in the future
    //     where a single element was previously required without breaking backward compatibility.
    //   * If the schema object is an array of length > 1, the checked object must be an array of
    //     the same length. In this case, each element of the checked object array is validated
    //     against the corresponding element of the schema array.
    //   * Otherwise, the value must be a string whose value is a description of the object's
    //     corresponding value, which may have any type.
    //
    // QPDF's JSON output conforms to certain strict compatibility rules as discussed in the manual.
    // The idea is that a JSON structure created manually in qpdf.cc doubles as both JSON help
    // information and a schema for validating the JSON that qpdf generates. Any discrepancies are a
    // bug in qpdf.
    //
    // Flags is a bitwise or of values from check_flags_e.
    enum check_flags_e {
        f_none = 0,
        f_optional = 1 << 0,
    };
    QPDF_DLL
    bool checkSchema(JSON schema, unsigned long flags, std::list<std::string>& errors);

    // Same as passing 0 for flags
    QPDF_DLL
    bool checkSchema(JSON schema, std::list<std::string>& errors);

    // A pointer to a Reactor class can be passed to parse, which will enable the caller to react
    // to incremental events in the construction of the JSON object. This makes it possible to
    // implement SAX-like handling of very large JSON objects.
    class QPDF_DLL_CLASS Reactor
    {
      public:
        QPDF_DLL
        virtual ~Reactor() = default;

        // The start/end methods are called when parsing of a dictionary or array is started or
        // ended. The item methods are called when an item is added to a dictionary or array. When
        // adding a container to another container, the item method is called with an empty
        // container before the lower container's start method is called. See important notes in
        // "Item methods" below.

        // During parsing of a JSON string, the parser is operating on a single object at a time.
        // When a dictionary or array is started, a new context begins, and when that dictionary or
        // array is ended, the previous context is resumed. So, for
        // example, if you have `{"a": [1]}`, you will receive the
        // following method calls
        //
        // dictionaryStart -- current object is the top-level dictionary
        // dictionaryItem  -- called with "a" and an empty array
        // arrayStart      -- current object is the array
        // arrayItem       -- called with the "1" object
        // containerEnd    -- now current object is the dictionary again
        // containerEnd    -- current object is undefined
        //
        // If the top-level item in a JSON string is a scalar, the topLevelScalar() method will be
        // called. No argument is passed since the object is the same as what is returned by
        // parse().

        QPDF_DLL
        virtual void dictionaryStart() = 0;
        QPDF_DLL
        virtual void arrayStart() = 0;
        QPDF_DLL
        virtual void containerEnd(JSON const& value) = 0;
        QPDF_DLL
        virtual void topLevelScalar() = 0;

        // Item methods:
        //
        // The return value of the item methods indicate whether the item has been "consumed". If
        // the item method returns true, then the item will not be added to the containing JSON
        // object. This is what allows arbitrarily large JSON objects
        // to be parsed and not have to be kept in memory.
        //
        // NOTE: When a dictionary or an array is added to a container, the dictionaryItem or
        // arrayItem method is called when the child item's start delimiter is encountered, so the
        // JSON object passed in at that time will always be in its initial, empty state.
        // Additionally, the child item's start method is not called until after the parent item's
        // item method is called. This makes it possible to keep track of the current depth level by
        // incrementing level on start methods and decrementing on end methods.

        QPDF_DLL
        virtual bool dictionaryItem(std::string const& key, JSON const& value) = 0;
        QPDF_DLL
        virtual bool arrayItem(JSON const& value) = 0;
    };

    // Create a JSON object from a string.
    QPDF_DLL
    static JSON parse(std::string const&);
    // Create a JSON object from an input source. See above for information about how to use the
    // Reactor.
    QPDF_DLL
    static JSON parse(InputSource&, Reactor* reactor = nullptr);

    // parse calls setOffsets to set the inclusive start and non-inclusive end offsets of an object
    // relative to its input string. Otherwise, both values are 0.
    QPDF_DLL
    void setStart(qpdf_offset_t);
    QPDF_DLL
    void setEnd(qpdf_offset_t);
    QPDF_DLL
    qpdf_offset_t getStart() const;
    QPDF_DLL
    qpdf_offset_t getEnd() const;

    // The following class does not form part of the public API and is for internal use only.

    class Writer;

  private:
    static void writeClose(Pipeline* p, bool first, size_t depth, char const* delimeter);

    enum value_type_e {
        vt_none,
        vt_dictionary,
        vt_array,
        vt_string,
        vt_number,
        vt_bool,
        vt_null,
        vt_blob,
    };

    struct JSON_value
    {
        JSON_value(value_type_e type_code) :
            type_code(type_code)
        {
        }
        virtual ~JSON_value() = default;
        virtual void write(Pipeline*, size_t depth) const = 0;
        const value_type_e type_code{vt_none};
    };
    struct JSON_dictionary: public JSON_value
    {
        JSON_dictionary() :
            JSON_value(vt_dictionary)
        {
        }
        ~JSON_dictionary() override = default;
        void write(Pipeline*, size_t depth) const override;
        std::map<std::string, JSON> members;
    };
    struct JSON_array;
    struct JSON_string: public JSON_value
    {
        JSON_string(std::string const& utf8);
        ~JSON_string() override = default;
        void write(Pipeline*, size_t depth) const override;
        std::string utf8;
    };
    struct JSON_number: public JSON_value
    {
        JSON_number(long long val);
        JSON_number(double val);
        JSON_number(std::string const& val);
        ~JSON_number() override = default;
        void write(Pipeline*, size_t depth) const override;
        std::string encoded;
    };
    struct JSON_bool: public JSON_value
    {
        JSON_bool(bool val);
        ~JSON_bool() override = default;
        void write(Pipeline*, size_t depth) const override;
        bool value;
    };
    struct JSON_null: public JSON_value
    {
        JSON_null() :
            JSON_value(vt_null)
        {
        }
        ~JSON_null() override = default;
        void write(Pipeline*, size_t depth) const override;
    };
    struct JSON_blob: public JSON_value
    {
        JSON_blob(std::function<void(Pipeline*)> fn);
        ~JSON_blob() override = default;
        void write(Pipeline*, size_t depth) const override;
        std::function<void(Pipeline*)> fn;
    };

    JSON(std::unique_ptr<JSON_value>);

    static bool checkSchemaInternal(
        JSON_value* this_v,
        JSON_value* sch_v,
        unsigned long flags,
        std::list<std::string>& errors,
        std::string prefix);

    class Members
    {
        friend class JSON;

      public:
        QPDF_DLL
        ~Members() = default;

      private:
        Members(std::unique_ptr<JSON_value>);
        Members(Members const&) = delete;

        std::unique_ptr<JSON_value> value;
        // start and end are only populated for objects created by parse
        qpdf_offset_t start{0};
        qpdf_offset_t end{0};
    };

    std::shared_ptr<Members> m;
};

struct JSON::JSON_array: public JSON_value
{
    JSON_array() :
        JSON_value(vt_array)
    {
    }
    ~JSON_array() override = default;
    void write(Pipeline*, size_t depth) const override;
    std::vector<JSON> elements;
};

#endif // JSON_HH