QUtil.hh 12.9 KB
// Copyright (c) 2005-2019 Jay Berkenbilt
//
// This file is part of qpdf.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//   http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Versions of qpdf prior to version 7 were released under the terms
// of version 2.0 of the Artistic License. At your option, you may
// continue to consider qpdf to be licensed under those terms. Please
// see the manual for additional information.

#ifndef QUTIL_HH
#define QUTIL_HH

#include <qpdf/DLL.h>
#include <qpdf/Types.h>
#include <qpdf/PointerHolder.hh>
#include <string>
#include <list>
#include <vector>
#include <stdexcept>
#include <stdio.h>
#include <time.h>

class RandomDataProvider;

namespace QUtil
{
    // This is a collection of useful utility functions that don't
    // really go anywhere else.
    QPDF_DLL
    std::string int_to_string(long long, int length = 0);
    QPDF_DLL
    std::string uint_to_string(unsigned long long, int length = 0);
    QPDF_DLL
    std::string int_to_string_base(long long, int base, int length = 0);
    QPDF_DLL
    std::string uint_to_string_base(unsigned long long, int base, int length = 0);
    QPDF_DLL
    std::string double_to_string(double, int decimal_places = 0);

    // These string to number methods throw std::runtime_error on
    // underflow/overflow.
    QPDF_DLL
    long long string_to_ll(char const* str);
    QPDF_DLL
    int string_to_int(char const* str);
    QPDF_DLL
    unsigned long long string_to_ull(char const* str);
    QPDF_DLL
    unsigned int string_to_uint(char const* str);

    // Pipeline's write method wants unsigned char*, but we often have
    // some other type of string.  These methods do combinations of
    // const_cast and reinterpret_cast to give us an unsigned char*.
    // They should only be used when it is known that it is safe.
    // None of the pipelines in qpdf modify the data passed to them,
    // so within qpdf, it should always be safe.
    QPDF_DLL
    unsigned char* unsigned_char_pointer(std::string const& str);
    QPDF_DLL
    unsigned char* unsigned_char_pointer(char const* str);

    // Throw QPDFSystemError, which is derived from
    // std::runtime_error, with a string formed by appending to
    // "description: " the standard string corresponding to the
    // current value of errno. You can retrieve the value of errno by
    // calling getErrno() on the QPDFSystemError. Prior to qpdf 8.2.0,
    // this method threw system::runtime_error directly, but since
    // QPDFSystemError is derived from system::runtime_error, old code
    // that specifically catches std::runtime_error will still work.
    QPDF_DLL
    void throw_system_error(std::string const& description);

    // The status argument is assumed to be the return value of a
    // standard library call that sets errno when it fails.  If status
    // is -1, convert the current value of errno to a
    // std::runtime_error that includes the standard error string.
    // Otherwise, return status.
    QPDF_DLL
    int os_wrapper(std::string const& description, int status);

    // If the open fails, throws std::runtime_error.  Otherwise, the
    // FILE* is returned.
    QPDF_DLL
    FILE* safe_fopen(char const* filename, char const* mode);

    // The FILE* argument is assumed to be the return of fopen.  If
    // null, throw std::runtime_error.  Otherwise, return the FILE*
    // argument.
    QPDF_DLL
    FILE* fopen_wrapper(std::string const&, FILE*);

    // Wrap around off_t versions of fseek and ftell if available
    QPDF_DLL
    int seek(FILE* stream, qpdf_offset_t offset, int whence);
    QPDF_DLL
    qpdf_offset_t tell(FILE* stream);

    QPDF_DLL
    bool same_file(char const* name1, char const* name2);

    QPDF_DLL
    char* copy_string(std::string const&);

    // Returns lower-case hex-encoded version of the string, treating
    // each character in the input string as unsigned.  The output
    // string will be twice as long as the input string.
    QPDF_DLL
    std::string hex_encode(std::string const&);

    // Returns a string that is the result of decoding the input
    // string. The input string may consist of mixed case hexadecimal
    // digits. Any characters that are not hexadecimal digits will be
    // silently ignored. If there are an odd number of hexadecimal
    // digits, a trailing 0 will be assumed.
    QPDF_DLL
    std::string hex_decode(std::string const&);

    // Set stdin, stdout to binary mode
    QPDF_DLL
    void binary_stdout();
    QPDF_DLL
    void binary_stdin();
    // Set stdout to line buffered
    QPDF_DLL
    void setLineBuf(FILE*);


    // May modify argv0
    QPDF_DLL
    char* getWhoami(char* argv0);

    // Get the value of an environment variable in a portable fashion.
    // Returns true iff the variable is defined.  If `value' is
    // non-null, initializes it with the value of the variable.
    QPDF_DLL
    bool get_env(std::string const& var, std::string* value = 0);

    QPDF_DLL
    time_t get_current_time();

    // Return a string containing the byte representation of the UTF-8
    // encoding for the unicode value passed in.
    QPDF_DLL
    std::string toUTF8(unsigned long uval);

    // Return a string containing the byte representation of the
    // UTF-16 big-endian encoding for the unicode value passed in.
    // Unrepresentable code points are converted to U+FFFD.
    QPDF_DLL
    std::string toUTF16(unsigned long uval);

    // Test whether this is a UTF-16 big-endian string. This is
    // indicated by first two bytes being 0xFE 0xFF.
    QPDF_DLL
    bool is_utf16(std::string const&);

    // Convert a UTF-8 encoded string to UTF-16 big-endian.
    // Unrepresentable code points are converted to U+FFFD.
    QPDF_DLL
    std::string utf8_to_utf16(std::string const& utf8);

    // Convert a UTF-8 encoded string to the specified single-byte
    // encoding system by replacing all unsupported characters with
    // the given unknown_char.
    QPDF_DLL
    std::string utf8_to_ascii(
        std::string const& utf8, char unknown_char = '?');
    QPDF_DLL
    std::string utf8_to_win_ansi(
        std::string const& utf8, char unknown_char = '?');
    QPDF_DLL
    std::string utf8_to_mac_roman(
        std::string const& utf8, char unknown_char = '?');
    QPDF_DLL
    std::string utf8_to_pdf_doc(
        std::string const& utf8, char unknown_char = '?');

    // These versions return true if the conversion was successful and
    // false if any unrepresentable characters were found and had to
    // be substituted with the unknown character.
    QPDF_DLL
    bool utf8_to_ascii(
        std::string const& utf8, std::string& ascii, char unknown_char = '?');
    QPDF_DLL
    bool utf8_to_win_ansi(
        std::string const& utf8, std::string& win, char unknown_char = '?');
    QPDF_DLL
    bool utf8_to_mac_roman(
        std::string const& utf8, std::string& mac, char unknown_char = '?');
    QPDF_DLL
    bool utf8_to_pdf_doc(
        std::string const& utf8, std::string& pdfdoc, char unknown_char = '?');

    // Convert a UTF-16 big-endian encoded string to UTF-8.
    // Unrepresentable code points are converted to U+FFFD.
    QPDF_DLL
    std::string utf16_to_utf8(std::string const& utf16);

    // Convert from the specified single-byte encoding system to
    // UTF-8. There is no ascii_to_utf8 because all ASCII strings are
    // already valid UTF-8.
    QPDF_DLL
    std::string win_ansi_to_utf8(std::string const& win);
    QPDF_DLL
    std::string mac_roman_to_utf8(std::string const& mac);
    QPDF_DLL
    std::string pdf_doc_to_utf8(std::string const& pdfdoc);

    // Analyze a string for encoding. We can't tell the difference
    // between any single-byte encodings, and we can't tell for sure
    // whether a string that happens to be valid UTF-8 isn't a
    // different encoding, but we can at least tell a few things to
    // help us guess. If there are no characters with the high bit
    // set, has_8bit_chars is false, and the other values are also
    // false, even though ASCII strings are valid UTF-8. is_valid_utf8
    // means that the string is non-trivially valid UTF-8.
    QPDF_DLL
    void analyze_encoding(std::string const& str,
                          bool& has_8bit_chars,
                          bool& is_valid_utf8,
                          bool& is_utf16);

    // Try to compensate for previously incorrectly encoded strings.
    // We want to compensate for the following errors:
    //
    // * The string was supposed to be UTF-8 but was one of the
    //   single-byte encodings
    // * The string was supposed to be PDF Doc but was either UTF-8 or
    //   one of the other single-byte encodings
    //
    // The returned vector always contains the original string first,
    // and then it contains what the correct string would be in the
    // event that the original string was the result of any of the
    // above errors.
    //
    // This method is useful for attempting to recover a password that
    // may have been previously incorrectly encoded. For example, the
    // password was supposed to be UTF-8 but the previous application
    // used a password encoded in WinAnsi, or if the previous password
    // was supposed to be PDFDoc but was actually given as UTF-8 or
    // WinAnsi, this method would find the correct password.
    QPDF_DLL
    std::vector<std::string> possible_repaired_encodings(std::string);

    // If secure random number generation is supported on your
    // platform and qpdf was not compiled with insecure random number
    // generation, this returns a cryptographically secure random
    // number.  Otherwise it falls back to random from stdlib and
    // calls srandom automatically the first time it is called.
    QPDF_DLL
    long random();

    // Wrapper around srandom from stdlib.  Seeds the standard library
    // weak random number generator, which is not used if secure
    // random number generation is being used.  You never need to call
    // this method as it is called automatically if needed.
    QPDF_DLL
    void srandom(unsigned int seed);

    // Initialize a buffer with random bytes.  By default, qpdf tries
    // to use a secure random number source.  It can be configured at
    // compile time to use an insecure random number source (from
    // stdlib).  You can also call setRandomDataProvider with a
    // RandomDataProvider, in which case this method will get its
    // random bytes from that.

    QPDF_DLL
    void initializeWithRandomBytes(unsigned char* data, size_t len);

    // Supply a random data provider.  If not supplied, depending on
    // compile time options, qpdf will either use the operating
    // system's secure random number source or an insecure random
    // source from stdlib.  The caller is responsible for managing the
    // memory for the RandomDataProvider.  This method modifies a
    // static variable.  If you are providing your own random data
    // provider, you should call this at the beginning of your program
    // before creating any QPDF objects.  Passing a null to this
    // method will reset the library back to whichever of the built-in
    // random data handlers is appropriate based on how qpdf was
    // compiled.
    QPDF_DLL
    void setRandomDataProvider(RandomDataProvider*);

    // This returns the random data provider that would be used the
    // next time qpdf needs random data.  It will never return null.
    // If no random data provider has been provided and the library
    // was not compiled with any random data provider available, an
    // exception will be thrown.
    QPDF_DLL
    RandomDataProvider* getRandomDataProvider();

    QPDF_DLL
    std::list<std::string> read_lines_from_file(char const* filename);
    QPDF_DLL
    std::list<std::string> read_lines_from_file(std::istream&);
    QPDF_DLL
    void read_file_into_memory(
        char const* filename, PointerHolder<char>& file_buf, size_t& size);

    // This used to be called strcasecmp, but that is a macro on some
    // platforms, so we have to give it a name that is not likely to
    // be a macro anywhere.
    QPDF_DLL
    int str_compare_nocase(char const *, char const *);

    // These routines help the tokenizer recognize certain character
    // classes without using ctype, which we avoid because of locale
    // considerations.
    QPDF_DLL
    bool is_hex_digit(char);

    QPDF_DLL
    bool is_space(char);

    QPDF_DLL
    bool is_digit(char);

    QPDF_DLL
    bool is_number(char const*);

    // This method parses the numeric range syntax used by the qpdf
    // command-line tool. May throw std::runtime_error.
    QPDF_DLL
    std::vector<int> parse_numrange(char const* range, int max);
};

#endif // QUTIL_HH