From 67edbfd954a4249d6c78bd375294e4a5bbde3846 Mon Sep 17 00:00:00 2001 From: m-holger Date: Mon, 3 Mar 2025 11:18:25 +0000 Subject: [PATCH] Un-inline QUtil functions --- include/qpdf/QUtil.hh | 73 ++++++------------------------------------------------------------------- libqpdf/JSON.cc | 8 ++++++-- libqpdf/Pl_Base64.cc | 6 +++++- libqpdf/QPDF.cc | 67 +++++++++++++++++++++++++++++++++++-------------------------------- libqpdf/QPDFArgParser.cc | 5 ++++- libqpdf/QPDFJob.cc | 5 ++++- libqpdf/QPDFObjectHandle.cc | 3 ++- libqpdf/QPDFTokenizer.cc | 21 ++++++++++++--------- libqpdf/QPDF_json.cc | 14 +++++++++----- libqpdf/QPDF_linearization.cc | 7 +++++-- libqpdf/QUtil.cc | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- libqpdf/qpdf/Util.hh | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 12 files changed, 202 insertions(+), 123 deletions(-) create mode 100644 libqpdf/qpdf/Util.hh diff --git a/include/qpdf/QUtil.hh b/include/qpdf/QUtil.hh index 16e5693..2f0a748 100644 --- a/include/qpdf/QUtil.hh +++ b/include/qpdf/QUtil.hh @@ -190,7 +190,7 @@ namespace QUtil // Returns lower-case hex-encoded version of the char including a leading "#". QPDF_DLL - inline std::string hex_encode_char(char); + std::string hex_encode_char(char); // Returns a string that is the result of decoding the input string. The input string may // consist of mixed case hexadecimal digits. Any characters that are not hexadecimal digits will @@ -202,7 +202,7 @@ namespace QUtil // Decode a single hex digit into a char in the range 0 <= char < 16. Return a char >= 16 if // digit is not a valid hex digit. QPDF_DLL - inline constexpr char hex_decode_char(char digit) noexcept; + char hex_decode_char(char digit); // Set stdin, stdout to binary mode QPDF_DLL @@ -431,16 +431,16 @@ namespace QUtil // These routines help the tokenizer recognize certain character classes without using ctype, // which we avoid because of locale considerations. QPDF_DLL - inline bool is_hex_digit(char); + bool is_hex_digit(char); QPDF_DLL - inline bool is_space(char); + bool is_space(char); QPDF_DLL - inline bool is_digit(char); + bool is_digit(char); QPDF_DLL - inline bool is_number(char const*); + bool is_number(char const*); // This method parses the numeric range syntax used by the qpdf command-line tool. May throw // std::runtime_error. A numeric range is as comma-separated list of groups. A group may be a @@ -489,65 +489,4 @@ namespace QUtil size_t get_max_memory_usage(); }; // namespace QUtil -inline bool -QUtil::is_hex_digit(char ch) -{ - return hex_decode_char(ch) < '\20'; -} - -inline bool -QUtil::is_space(char ch) -{ - return ch == ' ' || ch == '\n' || ch == '\r' || ch == '\t' || ch == '\f' || ch == '\v'; -} - -inline bool -QUtil::is_digit(char ch) -{ - return ((ch >= '0') && (ch <= '9')); -} - -inline bool -QUtil::is_number(char const* p) -{ - // ^[\+\-]?(\.\d*|\d+(\.\d*)?)$ - if (!*p) { - return false; - } - if ((*p == '-') || (*p == '+')) { - ++p; - } - bool found_dot = false; - bool found_digit = false; - for (; *p; ++p) { - if (*p == '.') { - if (found_dot) { - // only one dot - return false; - } - found_dot = true; - } else if (QUtil::is_digit(*p)) { - found_digit = true; - } else { - return false; - } - } - return found_digit; -} - -inline std::string -QUtil::hex_encode_char(char c) -{ - static auto constexpr hexchars = "0123456789abcdef"; - return {'#', hexchars[static_cast(c) >> 4], hexchars[c & 0x0f]}; -} - -inline constexpr char -QUtil::hex_decode_char(char digit) noexcept -{ - return digit <= '9' && digit >= '0' - ? char(digit - '0') - : (digit >= 'a' ? char(digit - 'a' + 10) : (digit >= 'A' ? char(digit - 'A' + 10) : '\20')); -} - #endif // QUTIL_HH diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index 41ea8c1..80bd6c6 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -8,9 +8,13 @@ #include #include #include +#include + #include #include +using namespace qpdf; + JSON::Members::Members(std::unique_ptr value) : value(std::move(value)) { @@ -761,7 +765,7 @@ JSONParser::tokenError() QTC::TC("libtests", "JSON parse unexpected sign"); throw std::runtime_error( "JSON: offset " + std::to_string(offset) + ": numeric literal: unexpected sign"); - } else if (QUtil::is_space(*p) || strchr("{}[]:,", *p)) { + } else if (util::is_space(*p) || strchr("{}[]:,", *p)) { QTC::TC("libtests", "JSON parse incomplete number"); throw std::runtime_error( "JSON: offset " + std::to_string(offset) + ": numeric literal: incomplete number"); @@ -1078,7 +1082,7 @@ JSONParser::getToken() case ls_u4: using ui = unsigned int; - if (ui val = ui(QUtil::hex_decode_char(*p)); val < 16) { + if (ui val = ui(util::hex_decode_char(*p)); val < 16) { u_value = 16 * u_value + val; } else { tokenError(); diff --git a/libqpdf/Pl_Base64.cc b/libqpdf/Pl_Base64.cc index 4fa4091..afc782b 100644 --- a/libqpdf/Pl_Base64.cc +++ b/libqpdf/Pl_Base64.cc @@ -2,9 +2,13 @@ #include #include +#include + #include #include +using namespace qpdf; + static char to_c(unsigned int ch) { @@ -50,7 +54,7 @@ Pl_Base64::decode(unsigned char const* data, size_t len) { unsigned char const* p = data; while (len > 0) { - if (!QUtil::is_space(to_c(*p))) { + if (!util::is_space(to_c(*p))) { this->buf[this->pos++] = *p; if (this->pos == 4) { flush(); diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index a19cd1c..7cbc656 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -22,6 +22,9 @@ #include #include #include +#include + +using namespace qpdf; // This must be a fixed value. This API returns a const reference to it, and the C API relies on its // being static as well. @@ -368,14 +371,14 @@ QPDF::numWarnings() const bool QPDF::validatePDFVersion(char const*& p, std::string& version) { - bool valid = QUtil::is_digit(*p); + bool valid = util::is_digit(*p); if (valid) { - while (QUtil::is_digit(*p)) { + while (util::is_digit(*p)) { version.append(1, *p++); } - if ((*p == '.') && QUtil::is_digit(*(p + 1))) { + if ((*p == '.') && util::is_digit(*(p + 1))) { version.append(1, *p++); - while (QUtil::is_digit(*p)) { + while (util::is_digit(*p)) { version.append(1, *p++); } } else { @@ -709,7 +712,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset) while (!done) { char ch; if (1 == m->file->read(&ch, 1)) { - if (QUtil::is_space(ch)) { + if (util::is_space(ch)) { skipped_space = true; } else { m->file->unreadCh(ch); @@ -724,7 +727,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset) m->file->read(buf, sizeof(buf) - 1); // The PDF spec says xref must be followed by a line terminator, but files exist in the wild // where it is terminated by arbitrary whitespace. - if ((strncmp(buf, "xref", 4) == 0) && QUtil::is_space(buf[4])) { + if ((strncmp(buf, "xref", 4) == 0) && util::is_space(buf[4])) { if (skipped_space) { QTC::TC("qpdf", "QPDF xref skipped space"); warn(damagedPDF("", 0, "extraneous whitespace seen before xref")); @@ -737,8 +740,8 @@ QPDF::read_xref(qpdf_offset_t xref_offset) : (buf[4] == ' ') ? 2 : 9999)); int skip = 4; - // buf is null-terminated, and QUtil::is_space('\0') is false, so this won't overrun. - while (QUtil::is_space(buf[skip])) { + // buf is null-terminated, and util::is_space('\0') is false, so this won't overrun. + while (util::is_space(buf[skip])) { ++skip; } xref_offset = read_xrefTable(xref_offset + skip); @@ -795,37 +798,37 @@ QPDF::parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes) char const* start = line.c_str(); // Skip zero or more spaces - while (QUtil::is_space(*p)) { + while (util::is_space(*p)) { ++p; } // Require digit - if (!QUtil::is_digit(*p)) { + if (!util::is_digit(*p)) { return false; } // Gather digits std::string obj_str; - while (QUtil::is_digit(*p)) { + while (util::is_digit(*p)) { obj_str.append(1, *p++); } // Require space - if (!QUtil::is_space(*p)) { + if (!util::is_space(*p)) { return false; } // Skip spaces - while (QUtil::is_space(*p)) { + while (util::is_space(*p)) { ++p; } // Require digit - if (!QUtil::is_digit(*p)) { + if (!util::is_digit(*p)) { return false; } // Gather digits std::string num_str; - while (QUtil::is_digit(*p)) { + while (util::is_digit(*p)) { num_str.append(1, *p++); } // Skip any space including line terminators - while (QUtil::is_space(*p)) { + while (util::is_space(*p)) { ++p; } bytes = toI(p - start); @@ -847,51 +850,51 @@ QPDF::read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type) // Skip zero or more spaces. There aren't supposed to be any. bool invalid = false; - while (QUtil::is_space(*p)) { + while (util::is_space(*p)) { ++p; QTC::TC("qpdf", "QPDF ignore first space in xref entry"); invalid = true; } // Require digit - if (!QUtil::is_digit(*p)) { + if (!util::is_digit(*p)) { return false; } // Gather digits std::string f1_str; - while (QUtil::is_digit(*p)) { + while (util::is_digit(*p)) { f1_str.append(1, *p++); } // Require space - if (!QUtil::is_space(*p)) { + if (!util::is_space(*p)) { return false; } - if (QUtil::is_space(*(p + 1))) { + if (util::is_space(*(p + 1))) { QTC::TC("qpdf", "QPDF ignore first extra space in xref entry"); invalid = true; } // Skip spaces - while (QUtil::is_space(*p)) { + while (util::is_space(*p)) { ++p; } // Require digit - if (!QUtil::is_digit(*p)) { + if (!util::is_digit(*p)) { return false; } // Gather digits std::string f2_str; - while (QUtil::is_digit(*p)) { + while (util::is_digit(*p)) { f2_str.append(1, *p++); } // Require space - if (!QUtil::is_space(*p)) { + if (!util::is_space(*p)) { return false; } - if (QUtil::is_space(*(p + 1))) { + if (util::is_space(*(p + 1))) { QTC::TC("qpdf", "QPDF ignore second extra space in xref entry"); invalid = true; } // Skip spaces - while (QUtil::is_space(*p)) { + while (util::is_space(*p)) { ++p; } if ((*p == 'f') || (*p == 'n')) { @@ -938,12 +941,12 @@ QPDF::read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type) ++f1_len; ++p; } - while (QUtil::is_digit(*p) && f1_len++ < 10) { + while (util::is_digit(*p) && f1_len++ < 10) { f1 *= 10; f1 += *p++ - '0'; } // Require space - if (!QUtil::is_space(*p++)) { + if (!util::is_space(*p++)) { // Entry doesn't start with space or digit. // C++20: [[unlikely]] return false; @@ -953,11 +956,11 @@ QPDF::read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type) ++f2_len; ++p; } - while (QUtil::is_digit(*p) && f2_len++ < 5) { + while (util::is_digit(*p) && f2_len++ < 5) { f2 *= 10; f2 += static_cast(*p++ - '0'); } - if (QUtil::is_space(*p++) && (*p == 'f' || *p == 'n')) { + if (util::is_space(*p++) && (*p == 'f' || *p == 'n')) { // C++20: [[likely]] type = *p; // No test for valid line[19]. @@ -1602,7 +1605,7 @@ QPDF::validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset } return; } - if (!QUtil::is_space(ch)) { + if (!util::is_space(ch)) { QTC::TC("qpdf", "QPDF stream without newline"); m->file->unreadCh(ch); warn(damagedPDF( diff --git a/libqpdf/QPDFArgParser.cc b/libqpdf/QPDFArgParser.cc index fc8f2ef..c16c18b 100644 --- a/libqpdf/QPDFArgParser.cc +++ b/libqpdf/QPDFArgParser.cc @@ -5,10 +5,13 @@ #include #include #include +#include + #include #include #include +using namespace qpdf; using namespace std::literals; QPDFArgParser::Members::Members(int argc, char const* const argv[], char const* progname_env) : @@ -285,7 +288,7 @@ QPDFArgParser::handleBashArguments() bool append = false; switch (state) { case st_top: - if (QUtil::is_space(ch)) { + if (util::is_space(ch)) { if (!arg.empty()) { m->bash_argv.push_back(QUtil::make_shared_cstr(arg)); arg.clear(); diff --git a/libqpdf/QPDFJob.cc b/libqpdf/QPDFJob.cc index 0aa5609..6b1d6cb 100644 --- a/libqpdf/QPDFJob.cc +++ b/libqpdf/QPDFJob.cc @@ -29,9 +29,12 @@ #include #include #include +#include #include // JOB_SCHEMA_DATA +using namespace qpdf; + namespace { class ImageOptimizer: public QPDFObjectHandle::StreamDataProvider @@ -388,7 +391,7 @@ QPDFJob::parseRotationParameter(std::string const& parameter) if ((first == '+') || (first == '-')) { relative = ((first == '+') ? 1 : -1); angle_str = angle_str.substr(1); - } else if (!QUtil::is_digit(angle_str.at(0))) { + } else if (!util::is_digit(angle_str.at(0))) { angle_str = ""; } } diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index b7bfba0..13f04fd 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -279,7 +280,7 @@ Name::normalize(std::string const& name) } else if ( ch < 33 || ch == '#' || ch == '/' || ch == '(' || ch == ')' || ch == '{' || ch == '}' || ch == '<' || ch == '>' || ch == '[' || ch == ']' || ch == '%' || ch > 126) { - result += QUtil::hex_encode_char(ch); + result += util::hex_encode_char(ch); } else { result += ch; } diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc index 7f7c6d9..00796a2 100644 --- a/libqpdf/QPDFTokenizer.cc +++ b/libqpdf/QPDFTokenizer.cc @@ -8,11 +8,14 @@ #include #include #include +#include #include #include #include +using namespace qpdf; + static inline bool is_delimiter(char ch) { @@ -123,7 +126,7 @@ QPDFTokenizer::includeIgnorable() bool QPDFTokenizer::isSpace(char ch) { - return ((ch == '\0') || QUtil::is_space(ch)); + return (ch == '\0' || util::is_space(ch)); } bool @@ -440,7 +443,7 @@ QPDFTokenizer::inNameHex1(char ch) { this->hex_char = ch; - if (char hval = QUtil::hex_decode_char(ch); hval < '\20') { + if (char hval = util::hex_decode_char(ch); hval < '\20') { this->char_code = int(hval) << 4; this->state = st_name_hex2; } else { @@ -456,7 +459,7 @@ QPDFTokenizer::inNameHex1(char ch) void QPDFTokenizer::inNameHex2(char ch) { - if (char hval = QUtil::hex_decode_char(ch); hval < '\20') { + if (char hval = util::hex_decode_char(ch); hval < '\20') { this->char_code |= int(hval); } else { QTC::TC("qpdf", "QPDFTokenizer bad name 2"); @@ -483,7 +486,7 @@ QPDFTokenizer::inNameHex2(char ch) void QPDFTokenizer::inSign(char ch) { - if (QUtil::is_digit(ch)) { + if (util::is_digit(ch)) { this->state = st_number; } else if (ch == '.') { this->state = st_decimal; @@ -496,7 +499,7 @@ QPDFTokenizer::inSign(char ch) void QPDFTokenizer::inDecimal(char ch) { - if (QUtil::is_digit(ch)) { + if (util::is_digit(ch)) { this->state = st_real; } else { this->state = st_literal; @@ -507,7 +510,7 @@ QPDFTokenizer::inDecimal(char ch) void QPDFTokenizer::inNumber(char ch) { - if (QUtil::is_digit(ch)) { + if (util::is_digit(ch)) { } else if (ch == '.') { this->state = st_real; } else if (isDelimiter(ch)) { @@ -523,7 +526,7 @@ QPDFTokenizer::inNumber(char ch) void QPDFTokenizer::inReal(char ch) { - if (QUtil::is_digit(ch)) { + if (util::is_digit(ch)) { } else if (isDelimiter(ch)) { this->type = tt_real; this->state = st_token_ready; @@ -645,7 +648,7 @@ QPDFTokenizer::inLiteral(char ch) void QPDFTokenizer::inHexstring(char ch) { - if (char hval = QUtil::hex_decode_char(ch); hval < '\20') { + if (char hval = util::hex_decode_char(ch); hval < '\20') { this->char_code = int(hval) << 4; this->state = st_in_hexstring_2nd; @@ -667,7 +670,7 @@ QPDFTokenizer::inHexstring(char ch) void QPDFTokenizer::inHexstring2nd(char ch) { - if (char hval = QUtil::hex_decode_char(ch); hval < '\20') { + if (char hval = util::hex_decode_char(ch); hval < '\20') { this->val += char(this->char_code) | hval; this->state = st_in_hexstring; diff --git a/libqpdf/QPDF_json.cc b/libqpdf/QPDF_json.cc index 288f265..81c5506 100644 --- a/libqpdf/QPDF_json.cc +++ b/libqpdf/QPDF_json.cc @@ -9,9 +9,13 @@ #include #include #include +#include + #include #include +using namespace qpdf; + // This chart shows an example of the state transitions that would occur in parsing a minimal file. // | @@ -67,10 +71,10 @@ is_indirect_object(std::string const& v, int& obj, int& gen) char const* p = v.c_str(); std::string o_str; std::string g_str; - if (!QUtil::is_digit(*p)) { + if (!util::is_digit(*p)) { return false; } - while (QUtil::is_digit(*p)) { + while (util::is_digit(*p)) { o_str.append(1, *p++); } if (*p != ' ') { @@ -79,10 +83,10 @@ is_indirect_object(std::string const& v, int& obj, int& gen) while (*p == ' ') { ++p; } - if (!QUtil::is_digit(*p)) { + if (!util::is_digit(*p)) { return false; } - while (QUtil::is_digit(*p)) { + while (util::is_digit(*p)) { g_str.append(1, *p++); } if (*p != ' ') { @@ -128,7 +132,7 @@ is_binary_string(std::string const& v, std::string& str) str = v.substr(2); int count = 0; for (char c: str) { - if (!QUtil::is_hex_digit(c)) { + if (!util::is_hex_digit(c)) { return false; } ++count; diff --git a/libqpdf/QPDF_linearization.cc b/libqpdf/QPDF_linearization.cc index 703617a..b183c07 100644 --- a/libqpdf/QPDF_linearization.cc +++ b/libqpdf/QPDF_linearization.cc @@ -12,11 +12,14 @@ #include #include #include +#include #include #include #include +using namespace qpdf; + template static void load_vector_int( @@ -105,7 +108,7 @@ QPDF::isLinearized() char* p = buf; while (lindict_obj == -1) { // Find a digit or end of buffer - while (((p - buf) < tbuf_size) && (!QUtil::is_digit(*p))) { + while (((p - buf) < tbuf_size) && (!util::is_digit(*p))) { ++p; } if (p - buf == tbuf_size) { @@ -114,7 +117,7 @@ QPDF::isLinearized() // Seek to the digit. Then skip over digits for a potential // next iteration. m->file->seek(p - buf, SEEK_SET); - while (((p - buf) < tbuf_size) && QUtil::is_digit(*p)) { + while (((p - buf) < tbuf_size) && util::is_digit(*p)) { ++p; } diff --git a/libqpdf/QUtil.cc b/libqpdf/QUtil.cc index e075399..05b53ba 100644 --- a/libqpdf/QUtil.cc +++ b/libqpdf/QUtil.cc @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -37,6 +38,8 @@ # include #endif +using namespace qpdf; + // First element is 24 static unsigned short pdf_doc_low_to_unicode[] = { 0x02d8, // 0x18 BREVE @@ -396,7 +399,7 @@ unsigned long long QUtil::string_to_ull(char const* str) { char const* p = str; - while (*p && is_space(*p)) { + while (*p && util::is_space(*p)) { ++p; } if (*p == '-') { @@ -739,7 +742,7 @@ QUtil::hex_decode(std::string const& input) bool first = true; char decoded; for (auto ch: input) { - ch = hex_decode_char(ch); + ch = util::hex_decode_char(ch); if (ch < '\20') { if (first) { decoded = static_cast(ch << 4); @@ -2002,3 +2005,63 @@ QUtil::get_max_memory_usage() return 0; #endif } + +char +QUtil::hex_decode_char(char digit) +{ + return util::hex_decode_char(digit); +} + +std::string +QUtil::hex_encode_char(char c) +{ + return util::hex_encode_char(c); +} + +bool +QUtil::is_number(char const* p) +{ + // No longer used by qpdf. + + // ^[\+\-]?(\.\d*|\d+(\.\d*)?)$ + if (!*p) { + return false; + } + if ((*p == '-') || (*p == '+')) { + ++p; + } + bool found_dot = false; + bool found_digit = false; + for (; *p; ++p) { + if (*p == '.') { + if (found_dot) { + // only one dot + return false; + } + found_dot = true; + } else if (util::is_digit(*p)) { + found_digit = true; + } else { + return false; + } + } + return found_digit; +} + +bool +QUtil::is_space(char c) +{ + return util::is_space(c); +} + +bool +QUtil::is_digit(char c) +{ + return util::is_digit(c); +} + +bool +QUtil::is_hex_digit(char c) +{ + return util::is_hex_digit(c); +} diff --git a/libqpdf/qpdf/Util.hh b/libqpdf/qpdf/Util.hh new file mode 100644 index 0000000..9f6b389 --- /dev/null +++ b/libqpdf/qpdf/Util.hh @@ -0,0 +1,49 @@ +#ifndef UTIL_HH +#define UTIL_HH + +#include + +namespace qpdf::util +{ + // This is a collection of useful utility functions for qpdf internal use. They include inline + // functions, some of which are exposed as regular functions in QUtil. Implementations are in + // QUtil.cc. + + inline constexpr char + hex_decode_char(char digit) + { + return digit <= '9' && digit >= '0' + ? char(digit - '0') + : (digit >= 'a' ? char(digit - 'a' + 10) + : (digit >= 'A' ? char(digit - 'A' + 10) : '\20')); + } + + inline constexpr bool + is_hex_digit(char ch) + { + return hex_decode_char(ch) < '\20'; + } + + inline constexpr bool + is_space(char ch) + { + return ch == ' ' || ch == '\n' || ch == '\r' || ch == '\t' || ch == '\f' || ch == '\v'; + } + + inline bool + is_digit(char ch) + { + return (ch >= '0' && ch <= '9'); + } + + // Returns lower-case hex-encoded version of the char including a leading "#". + inline std::string + hex_encode_char(char c) + { + static auto constexpr hexchars = "0123456789abcdef"; + return {'#', hexchars[static_cast(c) >> 4], hexchars[c & 0x0f]}; + } + +} // namespace qpdf::util + +#endif // UTIL_HH -- libgit2 0.21.4