Commit 011b1d7e3af803991ee7e4143b01965296af3cce
1 parent
4ee6ff0a
Use std::string_view in QdfFixer::processLines
Change type of local var lines to string_view. Also, instead of constructing a list of lines, read the entire input into a single string and break it up into lines on the fly.stash
Showing
1 changed file
with
57 additions
and
30 deletions
qpdf/fix-qdf.cc
| @@ -6,6 +6,7 @@ | @@ -6,6 +6,7 @@ | ||
| 6 | #include <cstring> | 6 | #include <cstring> |
| 7 | #include <iostream> | 7 | #include <iostream> |
| 8 | #include <regex> | 8 | #include <regex> |
| 9 | +#include <string_view> | ||
| 9 | 10 | ||
| 10 | static char const* whoami = 0; | 11 | static char const* whoami = 0; |
| 11 | 12 | ||
| @@ -20,7 +21,7 @@ class QdfFixer | @@ -20,7 +21,7 @@ class QdfFixer | ||
| 20 | { | 21 | { |
| 21 | public: | 22 | public: |
| 22 | QdfFixer(std::string const& filename); | 23 | QdfFixer(std::string const& filename); |
| 23 | - void processLines(std::list<std::string>& lines); | 24 | + void processLines(std::string const& input); |
| 24 | 25 | ||
| 25 | private: | 26 | private: |
| 26 | void fatal(std::string const&); | 27 | void fatal(std::string const&); |
| @@ -58,9 +59,9 @@ class QdfFixer | @@ -58,9 +59,9 @@ class QdfFixer | ||
| 58 | size_t xref_f1_nbytes; | 59 | size_t xref_f1_nbytes; |
| 59 | size_t xref_f2_nbytes; | 60 | size_t xref_f2_nbytes; |
| 60 | size_t xref_size; | 61 | size_t xref_size; |
| 61 | - std::vector<std::string> ostream; | 62 | + std::vector<std::string_view> ostream; |
| 62 | std::vector<qpdf_offset_t> ostream_offsets; | 63 | std::vector<qpdf_offset_t> ostream_offsets; |
| 63 | - std::vector<std::string> ostream_discarded; | 64 | + std::vector<std::string_view> ostream_discarded; |
| 64 | size_t ostream_idx; | 65 | size_t ostream_idx; |
| 65 | int ostream_id; | 66 | int ostream_id; |
| 66 | std::string ostream_extends; | 67 | std::string ostream_extends; |
| @@ -92,34 +93,60 @@ QdfFixer::fatal(std::string const& msg) | @@ -92,34 +93,60 @@ QdfFixer::fatal(std::string const& msg) | ||
| 92 | } | 93 | } |
| 93 | 94 | ||
| 94 | void | 95 | void |
| 95 | -QdfFixer::processLines(std::list<std::string>& lines) | 96 | +QdfFixer::processLines(std::string const& input) |
| 96 | { | 97 | { |
| 97 | - static std::regex re_n_0_obj("^(\\d+) 0 obj\n$"); | ||
| 98 | - static std::regex re_xref("^xref\n$"); | ||
| 99 | - static std::regex re_stream("^stream\n$"); | ||
| 100 | - static std::regex re_endobj("^endobj\n$"); | ||
| 101 | - static std::regex re_type_objstm("/Type /ObjStm"); | ||
| 102 | - static std::regex re_type_xref("/Type /XRef"); | ||
| 103 | - static std::regex re_extends("/Extends (\\d+ 0 R)"); | ||
| 104 | - static std::regex re_ostream_obj("^%% Object stream: object (\\d+)"); | ||
| 105 | - static std::regex re_endstream("^endstream\n$"); | ||
| 106 | - static std::regex re_length_or_w("/(Length|W) "); | ||
| 107 | - static std::regex re_size("/Size "); | ||
| 108 | - static std::regex re_ignore_newline("^%QDF: ignore_newline\n$"); | ||
| 109 | - static std::regex re_num("^\\d+\n$"); | ||
| 110 | - static std::regex re_trailer("^trailer <<"); | ||
| 111 | - static std::regex re_size_n("^ /Size \\d+\n$"); | ||
| 112 | - static std::regex re_dict_end("^>>\n$"); | 98 | + static const std::regex re_n_0_obj("^(\\d+) 0 obj\n$"); |
| 99 | + static const std::regex re_xref("^xref\n$"); | ||
| 100 | + static const std::regex re_stream("^stream\n$"); | ||
| 101 | + static const std::regex re_endobj("^endobj\n$"); | ||
| 102 | + static const std::regex re_type_objstm("/Type /ObjStm"); | ||
| 103 | + static const std::regex re_type_xref("/Type /XRef"); | ||
| 104 | + static const std::regex re_extends("/Extends (\\d+ 0 R)"); | ||
| 105 | + static const std::regex re_ostream_obj("^%% Object stream: object (\\d+)"); | ||
| 106 | + static const std::regex re_endstream("^endstream\n$"); | ||
| 107 | + static const std::regex re_length_or_w("/(Length|W) "); | ||
| 108 | + static const std::regex re_size("/Size "); | ||
| 109 | + static const std::regex re_ignore_newline("^%QDF: ignore_newline\n$"); | ||
| 110 | + static const std::regex re_num("^\\d+\n$"); | ||
| 111 | + static const std::regex re_trailer("^trailer <<"); | ||
| 112 | + static const std::regex re_size_n("^ /Size \\d+\n$"); | ||
| 113 | + static const std::regex re_dict_end("^>>\n$"); | ||
| 114 | + | ||
| 115 | + auto sv_diff = [](size_t i) { | ||
| 116 | + return static_cast<std::string_view::difference_type>(i); | ||
| 117 | + }; | ||
| 113 | 118 | ||
| 114 | lineno = 0; | 119 | lineno = 0; |
| 115 | - for (auto const& line: lines) { | 120 | + bool more = true; |
| 121 | + auto len_line = sv_diff(0); | ||
| 122 | + | ||
| 123 | + std::string_view line; | ||
| 124 | + std::string_view input_view{input.data(), input.size()}; | ||
| 125 | + size_t offs = 0; | ||
| 126 | + | ||
| 127 | + auto b_line = input.cbegin(); | ||
| 128 | + std::smatch m; | ||
| 129 | + auto const matches = [&m, &b_line, &len_line](std::regex const& r) { | ||
| 130 | + return std::regex_search(b_line, b_line + len_line, m, r); | ||
| 131 | + }; | ||
| 132 | + | ||
| 133 | + while (more) { | ||
| 116 | ++lineno; | 134 | ++lineno; |
| 117 | last_offset = offset; | 135 | last_offset = offset; |
| 118 | - offset += QIntC::to_offset(line.length()); | ||
| 119 | - std::smatch m; | ||
| 120 | - auto matches = [&m, &line](std::regex& r) { | ||
| 121 | - return std::regex_search(line, m, r); | ||
| 122 | - }; | 136 | + b_line += len_line; |
| 137 | + | ||
| 138 | + offs = input_view.find('\n'); | ||
| 139 | + if (offs == std::string::npos) { | ||
| 140 | + more = false; | ||
| 141 | + line = input_view; | ||
| 142 | + } else { | ||
| 143 | + offs++; | ||
| 144 | + line = input_view.substr(0, offs); | ||
| 145 | + input_view.remove_prefix(offs); | ||
| 146 | + } | ||
| 147 | + len_line = sv_diff(line.size()); | ||
| 148 | + offset += len_line; | ||
| 149 | + | ||
| 123 | if (state == st_top) { | 150 | if (state == st_top) { |
| 124 | if (matches(re_n_0_obj)) { | 151 | if (matches(re_n_0_obj)) { |
| 125 | checkObjId(m[1].str()); | 152 | checkObjId(m[1].str()); |
| @@ -392,17 +419,17 @@ realmain(int argc, char* argv[]) | @@ -392,17 +419,17 @@ realmain(int argc, char* argv[]) | ||
| 392 | } else if (argc == 2) { | 419 | } else if (argc == 2) { |
| 393 | filename = argv[1]; | 420 | filename = argv[1]; |
| 394 | } | 421 | } |
| 395 | - std::list<std::string> lines; | 422 | + std::string input; |
| 396 | if (filename == 0) { | 423 | if (filename == 0) { |
| 397 | filename = "standard input"; | 424 | filename = "standard input"; |
| 398 | QUtil::binary_stdin(); | 425 | QUtil::binary_stdin(); |
| 399 | - lines = QUtil::read_lines_from_file(stdin, true); | 426 | + input = QUtil::read_file_into_string(stdin); |
| 400 | } else { | 427 | } else { |
| 401 | - lines = QUtil::read_lines_from_file(filename, true); | 428 | + input = QUtil::read_file_into_string(filename); |
| 402 | } | 429 | } |
| 403 | QUtil::binary_stdout(); | 430 | QUtil::binary_stdout(); |
| 404 | QdfFixer qf(filename); | 431 | QdfFixer qf(filename); |
| 405 | - qf.processLines(lines); | 432 | + qf.processLines(input); |
| 406 | return 0; | 433 | return 0; |
| 407 | } | 434 | } |
| 408 | 435 |