Commit 011b1d7e3af803991ee7e4143b01965296af3cce

Authored by m-holger
1 parent 4ee6ff0a

Use std::string_view in QdfFixer::processLines

Change type of local var lines to string_view. Also, instead of
constructing a list of lines, read the entire input into a single string
and break it up into lines on the fly.stash
Showing 1 changed file with 57 additions and 30 deletions
qpdf/fix-qdf.cc
@@ -6,6 +6,7 @@ @@ -6,6 +6,7 @@
6 #include <cstring> 6 #include <cstring>
7 #include <iostream> 7 #include <iostream>
8 #include <regex> 8 #include <regex>
  9 +#include <string_view>
9 10
10 static char const* whoami = 0; 11 static char const* whoami = 0;
11 12
@@ -20,7 +21,7 @@ class QdfFixer @@ -20,7 +21,7 @@ class QdfFixer
20 { 21 {
21 public: 22 public:
22 QdfFixer(std::string const& filename); 23 QdfFixer(std::string const& filename);
23 - void processLines(std::list<std::string>& lines); 24 + void processLines(std::string const& input);
24 25
25 private: 26 private:
26 void fatal(std::string const&); 27 void fatal(std::string const&);
@@ -58,9 +59,9 @@ class QdfFixer @@ -58,9 +59,9 @@ class QdfFixer
58 size_t xref_f1_nbytes; 59 size_t xref_f1_nbytes;
59 size_t xref_f2_nbytes; 60 size_t xref_f2_nbytes;
60 size_t xref_size; 61 size_t xref_size;
61 - std::vector<std::string> ostream; 62 + std::vector<std::string_view> ostream;
62 std::vector<qpdf_offset_t> ostream_offsets; 63 std::vector<qpdf_offset_t> ostream_offsets;
63 - std::vector<std::string> ostream_discarded; 64 + std::vector<std::string_view> ostream_discarded;
64 size_t ostream_idx; 65 size_t ostream_idx;
65 int ostream_id; 66 int ostream_id;
66 std::string ostream_extends; 67 std::string ostream_extends;
@@ -92,34 +93,60 @@ QdfFixer::fatal(std::string const&amp; msg) @@ -92,34 +93,60 @@ QdfFixer::fatal(std::string const&amp; msg)
92 } 93 }
93 94
94 void 95 void
95 -QdfFixer::processLines(std::list<std::string>& lines) 96 +QdfFixer::processLines(std::string const& input)
96 { 97 {
97 - static std::regex re_n_0_obj("^(\\d+) 0 obj\n$");  
98 - static std::regex re_xref("^xref\n$");  
99 - static std::regex re_stream("^stream\n$");  
100 - static std::regex re_endobj("^endobj\n$");  
101 - static std::regex re_type_objstm("/Type /ObjStm");  
102 - static std::regex re_type_xref("/Type /XRef");  
103 - static std::regex re_extends("/Extends (\\d+ 0 R)");  
104 - static std::regex re_ostream_obj("^%% Object stream: object (\\d+)");  
105 - static std::regex re_endstream("^endstream\n$");  
106 - static std::regex re_length_or_w("/(Length|W) ");  
107 - static std::regex re_size("/Size ");  
108 - static std::regex re_ignore_newline("^%QDF: ignore_newline\n$");  
109 - static std::regex re_num("^\\d+\n$");  
110 - static std::regex re_trailer("^trailer <<");  
111 - static std::regex re_size_n("^ /Size \\d+\n$");  
112 - static std::regex re_dict_end("^>>\n$"); 98 + static const std::regex re_n_0_obj("^(\\d+) 0 obj\n$");
  99 + static const std::regex re_xref("^xref\n$");
  100 + static const std::regex re_stream("^stream\n$");
  101 + static const std::regex re_endobj("^endobj\n$");
  102 + static const std::regex re_type_objstm("/Type /ObjStm");
  103 + static const std::regex re_type_xref("/Type /XRef");
  104 + static const std::regex re_extends("/Extends (\\d+ 0 R)");
  105 + static const std::regex re_ostream_obj("^%% Object stream: object (\\d+)");
  106 + static const std::regex re_endstream("^endstream\n$");
  107 + static const std::regex re_length_or_w("/(Length|W) ");
  108 + static const std::regex re_size("/Size ");
  109 + static const std::regex re_ignore_newline("^%QDF: ignore_newline\n$");
  110 + static const std::regex re_num("^\\d+\n$");
  111 + static const std::regex re_trailer("^trailer <<");
  112 + static const std::regex re_size_n("^ /Size \\d+\n$");
  113 + static const std::regex re_dict_end("^>>\n$");
  114 +
  115 + auto sv_diff = [](size_t i) {
  116 + return static_cast<std::string_view::difference_type>(i);
  117 + };
113 118
114 lineno = 0; 119 lineno = 0;
115 - for (auto const& line: lines) { 120 + bool more = true;
  121 + auto len_line = sv_diff(0);
  122 +
  123 + std::string_view line;
  124 + std::string_view input_view{input.data(), input.size()};
  125 + size_t offs = 0;
  126 +
  127 + auto b_line = input.cbegin();
  128 + std::smatch m;
  129 + auto const matches = [&m, &b_line, &len_line](std::regex const& r) {
  130 + return std::regex_search(b_line, b_line + len_line, m, r);
  131 + };
  132 +
  133 + while (more) {
116 ++lineno; 134 ++lineno;
117 last_offset = offset; 135 last_offset = offset;
118 - offset += QIntC::to_offset(line.length());  
119 - std::smatch m;  
120 - auto matches = [&m, &line](std::regex& r) {  
121 - return std::regex_search(line, m, r);  
122 - }; 136 + b_line += len_line;
  137 +
  138 + offs = input_view.find('\n');
  139 + if (offs == std::string::npos) {
  140 + more = false;
  141 + line = input_view;
  142 + } else {
  143 + offs++;
  144 + line = input_view.substr(0, offs);
  145 + input_view.remove_prefix(offs);
  146 + }
  147 + len_line = sv_diff(line.size());
  148 + offset += len_line;
  149 +
123 if (state == st_top) { 150 if (state == st_top) {
124 if (matches(re_n_0_obj)) { 151 if (matches(re_n_0_obj)) {
125 checkObjId(m[1].str()); 152 checkObjId(m[1].str());
@@ -392,17 +419,17 @@ realmain(int argc, char* argv[]) @@ -392,17 +419,17 @@ realmain(int argc, char* argv[])
392 } else if (argc == 2) { 419 } else if (argc == 2) {
393 filename = argv[1]; 420 filename = argv[1];
394 } 421 }
395 - std::list<std::string> lines; 422 + std::string input;
396 if (filename == 0) { 423 if (filename == 0) {
397 filename = "standard input"; 424 filename = "standard input";
398 QUtil::binary_stdin(); 425 QUtil::binary_stdin();
399 - lines = QUtil::read_lines_from_file(stdin, true); 426 + input = QUtil::read_file_into_string(stdin);
400 } else { 427 } else {
401 - lines = QUtil::read_lines_from_file(filename, true); 428 + input = QUtil::read_file_into_string(filename);
402 } 429 }
403 QUtil::binary_stdout(); 430 QUtil::binary_stdout();
404 QdfFixer qf(filename); 431 QdfFixer qf(filename);
405 - qf.processLines(lines); 432 + qf.processLines(input);
406 return 0; 433 return 0;
407 } 434 }
408 435