Commit ec9e310c9ea9cee8d9e16cad2a68f0ad096f3a4b

Authored by Jay Berkenbilt
1 parent 31372edc

Refactor QPDFTokenizer's inline image handling

Add a version of expectInlineImage that takes an input source and
searches for EI. This is in preparation for improving the way EI is
found. This commit just refactors the code without changing the
functionality and adds tests to make sure the old and new code behave
identically.
include/qpdf/Pl_QPDFTokenizer.hh
@@ -27,6 +27,7 @@ @@ -27,6 +27,7 @@
27 #include <qpdf/QPDFTokenizer.hh> 27 #include <qpdf/QPDFTokenizer.hh>
28 #include <qpdf/PointerHolder.hh> 28 #include <qpdf/PointerHolder.hh>
29 #include <qpdf/QPDFObjectHandle.hh> 29 #include <qpdf/QPDFObjectHandle.hh>
  30 +#include <qpdf/Pl_Buffer.hh>
30 31
31 // Tokenize the incoming text using QPDFTokenizer and pass the tokens 32 // Tokenize the incoming text using QPDFTokenizer and pass the tokens
32 // in turn to a QPDFObjectHandle::TokenFilter object. All bytes of 33 // in turn to a QPDFObjectHandle::TokenFilter object. All bytes of
@@ -56,9 +57,6 @@ class Pl_QPDFTokenizer: public Pipeline @@ -56,9 +57,6 @@ class Pl_QPDFTokenizer: public Pipeline
56 virtual void finish(); 57 virtual void finish();
57 58
58 private: 59 private:
59 - void processChar(char ch);  
60 - void checkUnread();  
61 -  
62 class Members 60 class Members
63 { 61 {
64 friend class Pl_QPDFTokenizer; 62 friend class Pl_QPDFTokenizer;
@@ -73,9 +71,7 @@ class Pl_QPDFTokenizer: public Pipeline @@ -73,9 +71,7 @@ class Pl_QPDFTokenizer: public Pipeline
73 71
74 QPDFObjectHandle::TokenFilter* filter; 72 QPDFObjectHandle::TokenFilter* filter;
75 QPDFTokenizer tokenizer; 73 QPDFTokenizer tokenizer;
76 - bool last_char_was_cr;  
77 - bool unread_char;  
78 - char char_to_unread; 74 + Pl_Buffer buf;
79 }; 75 };
80 PointerHolder<Members> m; 76 PointerHolder<Members> m;
81 }; 77 };
include/qpdf/QPDFTokenizer.hh
@@ -178,7 +178,15 @@ class QPDFTokenizer @@ -178,7 +178,15 @@ class QPDFTokenizer
178 // including the next EI token. After you call this method, the 178 // including the next EI token. After you call this method, the
179 // next call to readToken (or the token created next time getToken 179 // next call to readToken (or the token created next time getToken
180 // returns true) will either be tt_inline_image or tt_bad. This is 180 // returns true) will either be tt_inline_image or tt_bad. This is
181 - // the only way readToken returns a tt_inline_image token. 181 + // the only way readToken returns a tt_inline_image token. The
  182 + // version of this method that takes a PointerHolder<InputSource>
  183 + // does a better job of locating the end of the inline image and
  184 + // should be used whenever the input source is available. It
  185 + // preserves both tell() and getLastOffset(). The version without
  186 + // the input source will always end the inline image the first
  187 + // time it sees something that looks like an EI operator.
  188 + QPDF_DLL
  189 + void expectInlineImage(PointerHolder<InputSource> input);
182 QPDF_DLL 190 QPDF_DLL
183 void expectInlineImage(); 191 void expectInlineImage();
184 192
@@ -223,6 +231,7 @@ class QPDFTokenizer @@ -223,6 +231,7 @@ class QPDFTokenizer
223 std::string error_message; 231 std::string error_message;
224 bool unread_char; 232 bool unread_char;
225 char char_to_unread; 233 char char_to_unread;
  234 + size_t inline_image_bytes;
226 235
227 // State for strings 236 // State for strings
228 int string_depth; 237 int string_depth;
libqpdf/Pl_QPDFTokenizer.cc
1 #include <qpdf/Pl_QPDFTokenizer.hh> 1 #include <qpdf/Pl_QPDFTokenizer.hh>
2 #include <qpdf/QTC.hh> 2 #include <qpdf/QTC.hh>
  3 +#include <qpdf/QUtil.hh>
  4 +#include <qpdf/BufferInputSource.hh>
3 #include <stdexcept> 5 #include <stdexcept>
4 #include <string.h> 6 #include <string.h>
5 7
6 Pl_QPDFTokenizer::Members::Members() : 8 Pl_QPDFTokenizer::Members::Members() :
7 filter(0), 9 filter(0),
8 - last_char_was_cr(false),  
9 - unread_char(false),  
10 - char_to_unread('\0') 10 + buf("tokenizer buffer")
11 { 11 {
12 } 12 }
13 13
@@ -33,61 +33,36 @@ Pl_QPDFTokenizer::~Pl_QPDFTokenizer() @@ -33,61 +33,36 @@ Pl_QPDFTokenizer::~Pl_QPDFTokenizer()
33 } 33 }
34 34
35 void 35 void
36 -Pl_QPDFTokenizer::processChar(char ch) 36 +Pl_QPDFTokenizer::write(unsigned char* data, size_t len)
37 { 37 {
38 - this->m->tokenizer.presentCharacter(ch);  
39 - QPDFTokenizer::Token token;  
40 - if (this->m->tokenizer.getToken(  
41 - token, this->m->unread_char, this->m->char_to_unread))  
42 - {  
43 - this->m->filter->handleToken(token);  
44 - if ((token.getType() == QPDFTokenizer::tt_word) &&  
45 - (token.getValue() == "ID"))  
46 - {  
47 - QTC::TC("qpdf", "Pl_QPDFTokenizer found ID");  
48 - this->m->tokenizer.expectInlineImage();  
49 - }  
50 - }  
51 -}  
52 -  
53 -  
54 -void  
55 -Pl_QPDFTokenizer::checkUnread()  
56 -{  
57 - if (this->m->unread_char)  
58 - {  
59 - processChar(this->m->char_to_unread);  
60 - if (this->m->unread_char)  
61 - {  
62 - throw std::logic_error(  
63 - "INTERNAL ERROR: unread_char still true after processing "  
64 - "unread character");  
65 - }  
66 - }  
67 -}  
68 -  
69 -void  
70 -Pl_QPDFTokenizer::write(unsigned char* buf, size_t len)  
71 -{  
72 - checkUnread();  
73 - for (size_t i = 0; i < len; ++i)  
74 - {  
75 - processChar(buf[i]);  
76 - checkUnread();  
77 - } 38 + this->m->buf.write(data, len);
78 } 39 }
79 40
80 void 41 void
81 Pl_QPDFTokenizer::finish() 42 Pl_QPDFTokenizer::finish()
82 { 43 {
83 - this->m->tokenizer.presentEOF();  
84 - QPDFTokenizer::Token token;  
85 - if (this->m->tokenizer.getToken(  
86 - token, this->m->unread_char, this->m->char_to_unread)) 44 + this->m->buf.finish();
  45 + PointerHolder<InputSource> input =
  46 + new BufferInputSource("tokenizer data",
  47 + this->m->buf.getBuffer(), true);
  48 +
  49 + while (true)
87 { 50 {
  51 + QPDFTokenizer::Token token = this->m->tokenizer.readToken(
  52 + input, "offset " + QUtil::int_to_string(input->tell()),
  53 + true);
88 this->m->filter->handleToken(token); 54 this->m->filter->handleToken(token);
  55 + if (token.getType() == QPDFTokenizer::tt_eof)
  56 + {
  57 + break;
  58 + }
  59 + else if ((token.getType() == QPDFTokenizer::tt_word) &&
  60 + (token.getValue() == "ID"))
  61 + {
  62 + QTC::TC("qpdf", "Pl_QPDFTokenizer found ID");
  63 + this->m->tokenizer.expectInlineImage(input);
  64 + }
89 } 65 }
90 -  
91 this->m->filter->handleEOF(); 66 this->m->filter->handleEOF();
92 QPDFObjectHandle::TokenFilter::PipelineAccessor::setPipeline( 67 QPDFObjectHandle::TokenFilter::PipelineAccessor::setPipeline(
93 m->filter, 0); 68 m->filter, 0);
libqpdf/QPDFObjectHandle.cc
@@ -1558,7 +1558,7 @@ QPDFObjectHandle::parseContentStream_data( @@ -1558,7 +1558,7 @@ QPDFObjectHandle::parseContentStream_data(
1558 // terminated the token. Read until end of inline image. 1558 // terminated the token. Read until end of inline image.
1559 char ch; 1559 char ch;
1560 input->read(&ch, 1); 1560 input->read(&ch, 1);
1561 - tokenizer.expectInlineImage(); 1561 + tokenizer.expectInlineImage(input);
1562 QPDFTokenizer::Token t = 1562 QPDFTokenizer::Token t =
1563 tokenizer.readToken(input, description, true); 1563 tokenizer.readToken(input, description, true);
1564 if (t.getType() == QPDFTokenizer::tt_bad) 1564 if (t.getType() == QPDFTokenizer::tt_bad)
libqpdf/QPDFTokenizer.cc
@@ -13,6 +13,79 @@ @@ -13,6 +13,79 @@
13 #include <string.h> 13 #include <string.h>
14 #include <cstdlib> 14 #include <cstdlib>
15 15
  16 +static bool is_delimiter(char ch)
  17 +{
  18 + return (strchr(" \t\n\v\f\r()<>[]{}/%", ch) != 0);
  19 +}
  20 +
  21 +class QPDFWordTokenFinder: public InputSource::Finder
  22 +{
  23 + public:
  24 + QPDFWordTokenFinder(PointerHolder<InputSource> is,
  25 + std::string const& str) :
  26 + is(is),
  27 + str(str)
  28 + {
  29 + }
  30 + virtual ~QPDFWordTokenFinder()
  31 + {
  32 + }
  33 + virtual bool check();
  34 +
  35 + private:
  36 + PointerHolder<InputSource> is;
  37 + std::string str;
  38 +};
  39 +
  40 +bool
  41 +QPDFWordTokenFinder::check()
  42 +{
  43 + // Find a word token matching the given string, preceded by a
  44 + // delimiter, and followed by a delimiter or EOF.
  45 + QPDFTokenizer tokenizer;
  46 + QPDFTokenizer::Token t = tokenizer.readToken(is, "finder", true);
  47 + qpdf_offset_t pos = is->tell();
  48 + if (! (t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, str)))
  49 + {
  50 +/// QTC::TC("qpdf", "QPDFTokenizer finder found wrong word");
  51 + return false;
  52 + }
  53 + qpdf_offset_t token_start = is->getLastOffset();
  54 + char next;
  55 + bool next_okay = false;
  56 + if (is->read(&next, 1) == 0)
  57 + {
  58 + QTC::TC("qpdf", "QPDFTokenizer inline image at EOF");
  59 + next_okay = true;
  60 + }
  61 + else
  62 + {
  63 + next_okay = is_delimiter(next);
  64 + }
  65 + is->seek(pos, SEEK_SET);
  66 + if (! next_okay)
  67 + {
  68 +/// QTC::TC("qpdf", "QPDFTokenizer finder word not followed by delimiter");
  69 + return false;
  70 + }
  71 + if (token_start == 0)
  72 + {
  73 + // Can't actually happen...we never start the search at the
  74 + // beginning of the input.
  75 + return false;
  76 + }
  77 + is->seek(token_start - 1, SEEK_SET);
  78 + char prev;
  79 + bool prev_okay = ((is->read(&prev, 1) == 1) && is_delimiter(prev));
  80 + is->seek(pos, SEEK_SET);
  81 + if (! prev_okay)
  82 + {
  83 +/// QTC::TC("qpdf", "QPDFTokenizer finder word not preceded by delimiter");
  84 + return false;
  85 + }
  86 + return true;
  87 +}
  88 +
16 QPDFTokenizer::Members::Members() : 89 QPDFTokenizer::Members::Members() :
17 pound_special_in_name(true), 90 pound_special_in_name(true),
18 allow_eof(false), 91 allow_eof(false),
@@ -31,6 +104,7 @@ QPDFTokenizer::Members::reset() @@ -31,6 +104,7 @@ QPDFTokenizer::Members::reset()
31 error_message = ""; 104 error_message = "";
32 unread_char = false; 105 unread_char = false;
33 char_to_unread = '\0'; 106 char_to_unread = '\0';
  107 + inline_image_bytes = 0;
34 string_depth = 0; 108 string_depth = 0;
35 string_ignoring_newline = false; 109 string_ignoring_newline = false;
36 last_char_was_bs = false; 110 last_char_was_bs = false;
@@ -91,7 +165,7 @@ QPDFTokenizer::isSpace(char ch) @@ -91,7 +165,7 @@ QPDFTokenizer::isSpace(char ch)
91 bool 165 bool
92 QPDFTokenizer::isDelimiter(char ch) 166 QPDFTokenizer::isDelimiter(char ch)
93 { 167 {
94 - return (strchr(" \t\n\v\f\r()<>[]{}/%", ch) != 0); 168 + return is_delimiter(ch);
95 } 169 }
96 170
97 void 171 void
@@ -470,12 +544,21 @@ QPDFTokenizer::presentCharacter(char ch) @@ -470,12 +544,21 @@ QPDFTokenizer::presentCharacter(char ch)
470 { 544 {
471 this->m->val += ch; 545 this->m->val += ch;
472 size_t len = this->m->val.length(); 546 size_t len = this->m->val.length();
473 - if ((len >= 4) &&  
474 - isDelimiter(this->m->val.at(len-4)) &&  
475 - (this->m->val.at(len-3) == 'E') &&  
476 - (this->m->val.at(len-2) == 'I') &&  
477 - isDelimiter(this->m->val.at(len-1))) 547 + if (len == this->m->inline_image_bytes)
  548 + {
  549 + QTC::TC("qpdf", "QPDFTokenizer found EI by byte count");
  550 + this->m->type = tt_inline_image;
  551 + this->m->inline_image_bytes = 0;
  552 + this->m->state = st_token_ready;
  553 + }
  554 + else if ((this->m->inline_image_bytes == 0) &&
  555 + (len >= 4) &&
  556 + isDelimiter(this->m->val.at(len-4)) &&
  557 + (this->m->val.at(len-3) == 'E') &&
  558 + (this->m->val.at(len-2) == 'I') &&
  559 + isDelimiter(this->m->val.at(len-1)))
478 { 560 {
  561 + QTC::TC("qpdf", "QPDFTokenizer found EI the old way");
479 this->m->val.erase(len - 1); 562 this->m->val.erase(len - 1);
480 this->m->type = tt_inline_image; 563 this->m->type = tt_inline_image;
481 this->m->unread_char = true; 564 this->m->unread_char = true;
@@ -562,7 +645,7 @@ QPDFTokenizer::presentEOF() @@ -562,7 +645,7 @@ QPDFTokenizer::presentEOF()
562 (this->m->val.at(len-2) == 'E') && 645 (this->m->val.at(len-2) == 'E') &&
563 (this->m->val.at(len-1) == 'I')) 646 (this->m->val.at(len-1) == 'I'))
564 { 647 {
565 - QTC::TC("qpdf", "QPDFTokenizer inline image at EOF"); 648 + QTC::TC("qpdf", "QPDFTokenizer inline image at EOF the old way");
566 this->m->type = tt_inline_image; 649 this->m->type = tt_inline_image;
567 this->m->state = st_token_ready; 650 this->m->state = st_token_ready;
568 } 651 }
@@ -598,6 +681,26 @@ QPDFTokenizer::presentEOF() @@ -598,6 +681,26 @@ QPDFTokenizer::presentEOF()
598 void 681 void
599 QPDFTokenizer::expectInlineImage() 682 QPDFTokenizer::expectInlineImage()
600 { 683 {
  684 + expectInlineImage(PointerHolder<InputSource>());
  685 +}
  686 +
  687 +void
  688 +QPDFTokenizer::expectInlineImage(PointerHolder<InputSource> input)
  689 +{
  690 + if (input.getPointer())
  691 + {
  692 + qpdf_offset_t last_offset = input->getLastOffset();
  693 + qpdf_offset_t pos = input->tell();
  694 +
  695 + QPDFWordTokenFinder f(input, "EI");
  696 + if (input->findFirst("EI", pos, 0, f))
  697 + {
  698 + this->m->inline_image_bytes = input->tell() - pos;
  699 + }
  700 +
  701 + input->seek(pos, SEEK_SET);
  702 + input->setLastOffset(last_offset);
  703 + }
601 if (this->m->state != st_top) 704 if (this->m->state != st_top)
602 { 705 {
603 throw std::logic_error("QPDFTokenizer::expectInlineImage called" 706 throw std::logic_error("QPDFTokenizer::expectInlineImage called"
qpdf/qpdf.testcov
@@ -430,3 +430,6 @@ QPDFPageObjectHelper copy shared attribute 0 @@ -430,3 +430,6 @@ QPDFPageObjectHelper copy shared attribute 0
430 qpdf from_nr from repeat_nr 0 430 qpdf from_nr from repeat_nr 0
431 QPDF resolve duplicated page object 0 431 QPDF resolve duplicated page object 0
432 QPDF handle direct page object 0 432 QPDF handle direct page object 0
  433 +QPDFTokenizer found EI the old way 0
  434 +QPDFTokenizer found EI by byte count 0
  435 +QPDFTokenizer inline image at EOF the old way 0
qpdf/qtest/qpdf.test
@@ -694,7 +694,7 @@ $td-&gt;runtest(&quot;check pass1 file&quot;, @@ -694,7 +694,7 @@ $td-&gt;runtest(&quot;check pass1 file&quot;,
694 show_ntests(); 694 show_ntests();
695 # ---------- 695 # ----------
696 $td->notify("--- Tokenizer ---"); 696 $td->notify("--- Tokenizer ---");
697 -$n_tests += 4; 697 +$n_tests += 5;
698 698
699 $td->runtest("tokenizer with no ignorable", 699 $td->runtest("tokenizer with no ignorable",
700 {$td->COMMAND => "test_tokenizer -no-ignorable tokens.pdf"}, 700 {$td->COMMAND => "test_tokenizer -no-ignorable tokens.pdf"},
@@ -706,6 +706,11 @@ $td-&gt;runtest(&quot;tokenizer&quot;, @@ -706,6 +706,11 @@ $td-&gt;runtest(&quot;tokenizer&quot;,
706 {$td->FILE => "tokens.out", $td->EXIT_STATUS => 0}, 706 {$td->FILE => "tokens.out", $td->EXIT_STATUS => 0},
707 $td->NORMALIZE_NEWLINES); 707 $td->NORMALIZE_NEWLINES);
708 708
  709 +$td->runtest("tokenizer with old inline image code",
  710 + {$td->COMMAND => "test_tokenizer -old-ei tokens.pdf"},
  711 + {$td->FILE => "tokens.out", $td->EXIT_STATUS => 0},
  712 + $td->NORMALIZE_NEWLINES);
  713 +
709 $td->runtest("tokenizer with max_len", 714 $td->runtest("tokenizer with max_len",
710 {$td->COMMAND => "test_tokenizer -maxlen 50 tokens.pdf"}, 715 {$td->COMMAND => "test_tokenizer -maxlen 50 tokens.pdf"},
711 {$td->FILE => "tokens-maxlen.out", $td->EXIT_STATUS => 0}, 716 {$td->FILE => "tokens-maxlen.out", $td->EXIT_STATUS => 0},
qpdf/test_tokenizer.cc
@@ -16,7 +16,7 @@ static char const* whoami = 0; @@ -16,7 +16,7 @@ static char const* whoami = 0;
16 void usage() 16 void usage()
17 { 17 {
18 std::cerr << "Usage: " << whoami 18 std::cerr << "Usage: " << whoami
19 - << " [-maxlen len | -no-ignorable] filename" 19 + << " [-maxlen len | -no-ignorable | -old-ei] filename"
20 << std::endl; 20 << std::endl;
21 exit(2); 21 exit(2);
22 } 22 }
@@ -132,7 +132,7 @@ try_skipping(QPDFTokenizer&amp; tokenizer, PointerHolder&lt;InputSource&gt; is, @@ -132,7 +132,7 @@ try_skipping(QPDFTokenizer&amp; tokenizer, PointerHolder&lt;InputSource&gt; is,
132 static void 132 static void
133 dump_tokens(PointerHolder<InputSource> is, std::string const& label, 133 dump_tokens(PointerHolder<InputSource> is, std::string const& label,
134 size_t max_len, bool include_ignorable, 134 size_t max_len, bool include_ignorable,
135 - bool skip_streams, bool skip_inline_images) 135 + bool skip_streams, bool skip_inline_images, bool old_ei)
136 { 136 {
137 Finder f1(is, "endstream"); 137 Finder f1(is, "endstream");
138 std::cout << "--- BEGIN " << label << " ---" << std::endl; 138 std::cout << "--- BEGIN " << label << " ---" << std::endl;
@@ -183,7 +183,14 @@ dump_tokens(PointerHolder&lt;InputSource&gt; is, std::string const&amp; label, @@ -183,7 +183,14 @@ dump_tokens(PointerHolder&lt;InputSource&gt; is, std::string const&amp; label,
183 else if (skip_inline_images && 183 else if (skip_inline_images &&
184 (token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "ID"))) 184 (token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "ID")))
185 { 185 {
186 - tokenizer.expectInlineImage(); 186 + if (old_ei)
  187 + {
  188 + tokenizer.expectInlineImage();
  189 + }
  190 + else
  191 + {
  192 + tokenizer.expectInlineImage(is);
  193 + }
187 inline_image_offset = is->tell(); 194 inline_image_offset = is->tell();
188 } 195 }
189 else if (token.getType() == QPDFTokenizer::tt_eof) 196 else if (token.getType() == QPDFTokenizer::tt_eof)
@@ -195,7 +202,7 @@ dump_tokens(PointerHolder&lt;InputSource&gt; is, std::string const&amp; label, @@ -195,7 +202,7 @@ dump_tokens(PointerHolder&lt;InputSource&gt; is, std::string const&amp; label,
195 } 202 }
196 203
197 static void process(char const* filename, bool include_ignorable, 204 static void process(char const* filename, bool include_ignorable,
198 - size_t max_len) 205 + size_t max_len, bool old_ei)
199 { 206 {
200 PointerHolder<InputSource> is; 207 PointerHolder<InputSource> is;
201 208
@@ -203,7 +210,7 @@ static void process(char const* filename, bool include_ignorable, @@ -203,7 +210,7 @@ static void process(char const* filename, bool include_ignorable,
203 FileInputSource* fis = new FileInputSource(); 210 FileInputSource* fis = new FileInputSource();
204 fis->setFilename(filename); 211 fis->setFilename(filename);
205 is = fis; 212 is = fis;
206 - dump_tokens(is, "FILE", max_len, include_ignorable, true, false); 213 + dump_tokens(is, "FILE", max_len, include_ignorable, true, false, false);
207 214
208 // Tokenize content streams, skipping inline images 215 // Tokenize content streams, skipping inline images
209 QPDF qpdf; 216 QPDF qpdf;
@@ -222,7 +229,7 @@ static void process(char const* filename, bool include_ignorable, @@ -222,7 +229,7 @@ static void process(char const* filename, bool include_ignorable,
222 "content data", content_data.getPointer()); 229 "content data", content_data.getPointer());
223 is = bis; 230 is = bis;
224 dump_tokens(is, "PAGE " + QUtil::int_to_string(pageno), 231 dump_tokens(is, "PAGE " + QUtil::int_to_string(pageno),
225 - max_len, include_ignorable, false, true); 232 + max_len, include_ignorable, false, true, old_ei);
226 } 233 }
227 234
228 // Tokenize object streams 235 // Tokenize object streams
@@ -241,7 +248,7 @@ static void process(char const* filename, bool include_ignorable, @@ -241,7 +248,7 @@ static void process(char const* filename, bool include_ignorable,
241 is = bis; 248 is = bis;
242 dump_tokens(is, "OBJECT STREAM " + 249 dump_tokens(is, "OBJECT STREAM " +
243 QUtil::int_to_string((*iter).getObjectID()), 250 QUtil::int_to_string((*iter).getObjectID()),
244 - max_len, include_ignorable, false, false); 251 + max_len, include_ignorable, false, false, false);
245 } 252 }
246 } 253 }
247 } 254 }
@@ -266,6 +273,7 @@ int main(int argc, char* argv[]) @@ -266,6 +273,7 @@ int main(int argc, char* argv[])
266 char const* filename = 0; 273 char const* filename = 0;
267 size_t max_len = 0; 274 size_t max_len = 0;
268 bool include_ignorable = true; 275 bool include_ignorable = true;
  276 + bool old_ei = false;
269 for (int i = 1; i < argc; ++i) 277 for (int i = 1; i < argc; ++i)
270 { 278 {
271 if (argv[i][0] == '-') 279 if (argv[i][0] == '-')
@@ -282,6 +290,10 @@ int main(int argc, char* argv[]) @@ -282,6 +290,10 @@ int main(int argc, char* argv[])
282 { 290 {
283 include_ignorable = false; 291 include_ignorable = false;
284 } 292 }
  293 + else if (strcmp(argv[i], "-old-ei") == 0)
  294 + {
  295 + old_ei = true;
  296 + }
285 else 297 else
286 { 298 {
287 usage(); 299 usage();
@@ -303,7 +315,7 @@ int main(int argc, char* argv[]) @@ -303,7 +315,7 @@ int main(int argc, char* argv[])
303 315
304 try 316 try
305 { 317 {
306 - process(filename, include_ignorable, max_len); 318 + process(filename, include_ignorable, max_len, old_ei);
307 } 319 }
308 catch (std::exception& e) 320 catch (std::exception& e)
309 { 321 {