Commit 34a6f8938f0e6d55eeb9f37c0ef23e02fec88932

Authored by m-holger
Committed by Jay Berkenbilt
1 parent dca70f13

Add methods QPDFTokenizer::Token::isWord

examples/pdf-filter-tokens.cc
@@ -139,8 +139,7 @@ ColorToGray::handleToken(QPDFTokenizer::Token const& token) @@ -139,8 +139,7 @@ ColorToGray::handleToken(QPDFTokenizer::Token const& token)
139 QPDFTokenizer::token_type_e token_type = token.getType(); 139 QPDFTokenizer::token_type_e token_type = token.getType();
140 if (!isIgnorable(token_type)) { 140 if (!isIgnorable(token_type)) {
141 this->stack.push_back(token); 141 this->stack.push_back(token);
142 - if ((this->stack.size() == 4) &&  
143 - (token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "rg")) && 142 + if ((this->stack.size() == 4) && token.isWord("rg") &&
144 (isNumeric(this->stack.at(0).getType())) && 143 (isNumeric(this->stack.at(0).getType())) &&
145 (isNumeric(this->stack.at(1).getType())) && 144 (isNumeric(this->stack.at(1).getType())) &&
146 (isNumeric(this->stack.at(2).getType()))) { 145 (isNumeric(this->stack.at(2).getType()))) {
include/qpdf/QPDFTokenizer.hh
@@ -112,6 +112,16 @@ class QPDFTokenizer @@ -112,6 +112,16 @@ class QPDFTokenizer
112 { 112 {
113 return this->type == tt_integer; 113 return this->type == tt_integer;
114 } 114 }
  115 + bool
  116 + isWord() const
  117 + {
  118 + return this->type == tt_word;
  119 + }
  120 + bool
  121 + isWord(std::string const& value) const
  122 + {
  123 + return this->type == tt_word && this->value == value;
  124 + }
115 125
116 private: 126 private:
117 token_type_e type; 127 token_type_e type;
libqpdf/Pl_QPDFTokenizer.cc
@@ -53,9 +53,7 @@ Pl_QPDFTokenizer::finish() @@ -53,9 +53,7 @@ Pl_QPDFTokenizer::finish()
53 this->m->filter->handleToken(token); 53 this->m->filter->handleToken(token);
54 if (token.getType() == QPDFTokenizer::tt_eof) { 54 if (token.getType() == QPDFTokenizer::tt_eof) {
55 break; 55 break;
56 - } else if (  
57 - (token.getType() == QPDFTokenizer::tt_word) &&  
58 - (token.getValue() == "ID")) { 56 + } else if (token.isWord("ID")) {
59 // Read the space after the ID. 57 // Read the space after the ID.
60 char ch = ' '; 58 char ch = ' ';
61 input->read(&ch, 1); 59 input->read(&ch, 1);
libqpdf/QPDF.cc
@@ -459,13 +459,11 @@ QPDF::findHeader() @@ -459,13 +459,11 @@ QPDF::findHeader()
459 bool 459 bool
460 QPDF::findStartxref() 460 QPDF::findStartxref()
461 { 461 {
462 - QPDFTokenizer::Token t = readToken(this->m->file);  
463 - if (t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "startxref")) {  
464 - if (readToken(this->m->file).isInteger()) {  
465 - // Position in front of offset token  
466 - this->m->file->seek(this->m->file->getLastOffset(), SEEK_SET);  
467 - return true;  
468 - } 462 + if (readToken(m->file).isWord("startxref") &&
  463 + readToken(m->file).isInteger()) {
  464 + // Position in front of offset token
  465 + this->m->file->seek(this->m->file->getLastOffset(), SEEK_SET);
  466 + return true;
469 } 467 }
470 return false; 468 return false;
471 } 469 }
@@ -613,16 +611,13 @@ QPDF::reconstruct_xref(QPDFExc& e) @@ -613,16 +611,13 @@ QPDF::reconstruct_xref(QPDFExc& e)
613 // containing this token 611 // containing this token
614 } else if (t1.isInteger()) { 612 } else if (t1.isInteger()) {
615 QPDFTokenizer::Token t2 = readToken(this->m->file, MAX_LEN); 613 QPDFTokenizer::Token t2 = readToken(this->m->file, MAX_LEN);
616 - QPDFTokenizer::Token t3 = readToken(this->m->file, MAX_LEN);  
617 if ((t2.isInteger()) && 614 if ((t2.isInteger()) &&
618 - (t3 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "obj"))) { 615 + (readToken(m->file, MAX_LEN).isWord("obj"))) {
619 int obj = QUtil::string_to_int(t1.getValue().c_str()); 616 int obj = QUtil::string_to_int(t1.getValue().c_str());
620 int gen = QUtil::string_to_int(t2.getValue().c_str()); 617 int gen = QUtil::string_to_int(t2.getValue().c_str());
621 insertXrefEntry(obj, 1, token_start, gen, true); 618 insertXrefEntry(obj, 1, token_start, gen, true);
622 } 619 }
623 - } else if (  
624 - (!this->m->trailer.isInitialized()) &&  
625 - (t1 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "trailer"))) { 620 + } else if (!this->m->trailer.isInitialized() && t1.isWord("trailer")) {
626 QPDFObjectHandle t = 621 QPDFObjectHandle t =
627 readObject(this->m->file, "trailer", QPDFObjGen(), false); 622 readObject(this->m->file, "trailer", QPDFObjGen(), false);
628 if (!t.isDictionary()) { 623 if (!t.isDictionary()) {
@@ -922,8 +917,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) @@ -922,8 +917,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
922 } 917 }
923 } 918 }
924 qpdf_offset_t pos = this->m->file->tell(); 919 qpdf_offset_t pos = this->m->file->tell();
925 - QPDFTokenizer::Token t = readToken(this->m->file);  
926 - if (t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "trailer")) { 920 + if (readToken(m->file).isWord("trailer")) {
927 done = true; 921 done = true;
928 } else { 922 } else {
929 this->m->file->seek(pos, SEEK_SET); 923 this->m->file->seek(pos, SEEK_SET);
@@ -1431,8 +1425,7 @@ QPDF::readObject( @@ -1431,8 +1425,7 @@ QPDF::readObject(
1431 } else if (object.isDictionary() && (!in_object_stream)) { 1425 } else if (object.isDictionary() && (!in_object_stream)) {
1432 // check for stream 1426 // check for stream
1433 qpdf_offset_t cur_offset = input->tell(); 1427 qpdf_offset_t cur_offset = input->tell();
1434 - if (readToken(input) ==  
1435 - QPDFTokenizer::Token(QPDFTokenizer::tt_word, "stream")) { 1428 + if (readToken(input).isWord("stream")) {
1436 // The PDF specification states that the word "stream" 1429 // The PDF specification states that the word "stream"
1437 // should be followed by either a carriage return and 1430 // should be followed by either a carriage return and
1438 // a newline or by a newline alone. It specifically 1431 // a newline or by a newline alone. It specifically
@@ -1523,9 +1516,7 @@ QPDF::readObject( @@ -1523,9 +1516,7 @@ QPDF::readObject(
1523 // Seek in two steps to avoid potential integer overflow 1516 // Seek in two steps to avoid potential integer overflow
1524 input->seek(stream_offset, SEEK_SET); 1517 input->seek(stream_offset, SEEK_SET);
1525 input->seek(toO(length), SEEK_CUR); 1518 input->seek(toO(length), SEEK_CUR);
1526 - if (!(readToken(input) ==  
1527 - QPDFTokenizer::Token(  
1528 - QPDFTokenizer::tt_word, "endstream"))) { 1519 + if (!readToken(input).isWord("endstream")) {
1529 QTC::TC("qpdf", "QPDF missing endstream"); 1520 QTC::TC("qpdf", "QPDF missing endstream");
1530 throw damagedPDF( 1521 throw damagedPDF(
1531 input, input->getLastOffset(), "expected endstream"); 1522 input, input->getLastOffset(), "expected endstream");
@@ -1556,9 +1547,8 @@ bool @@ -1556,9 +1547,8 @@ bool
1556 QPDF::findEndstream() 1547 QPDF::findEndstream()
1557 { 1548 {
1558 // Find endstream or endobj. Position the input at that token. 1549 // Find endstream or endobj. Position the input at that token.
1559 - QPDFTokenizer::Token t = readToken(this->m->file, 20);  
1560 - if ((t.getType() == QPDFTokenizer::tt_word) &&  
1561 - ((t.getValue() == "endobj") || (t.getValue() == "endstream"))) { 1550 + auto t = readToken(m->file, 20);
  1551 + if (t.isWord("endobj") || t.isWord("endstream")) {
1562 this->m->file->seek(this->m->file->getLastOffset(), SEEK_SET); 1552 this->m->file->seek(this->m->file->getLastOffset(), SEEK_SET);
1563 return true; 1553 return true;
1564 } 1554 }
@@ -1682,8 +1672,8 @@ QPDF::readObjectAtOffset( @@ -1682,8 +1672,8 @@ QPDF::readObjectAtOffset(
1682 QPDFTokenizer::Token tobj = readToken(this->m->file); 1672 QPDFTokenizer::Token tobj = readToken(this->m->file);
1683 1673
1684 bool objidok = tobjid.isInteger(); 1674 bool objidok = tobjid.isInteger();
1685 - int genok = tgen.isInteger();  
1686 - int objok = (tobj == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "obj")); 1675 + bool genok = tgen.isInteger();
  1676 + bool objok = tobj.isWord("obj");
1687 1677
1688 QTC::TC("qpdf", "QPDF check objid", objidok ? 1 : 0); 1678 QTC::TC("qpdf", "QPDF check objid", objidok ? 1 : 0);
1689 QTC::TC("qpdf", "QPDF check generation", genok ? 1 : 0); 1679 QTC::TC("qpdf", "QPDF check generation", genok ? 1 : 0);
@@ -1743,8 +1733,7 @@ QPDF::readObjectAtOffset( @@ -1743,8 +1733,7 @@ QPDF::readObjectAtOffset(
1743 1733
1744 QPDFObjectHandle oh = readObject(this->m->file, description, og, false); 1734 QPDFObjectHandle oh = readObject(this->m->file, description, og, false);
1745 1735
1746 - if (!(readToken(this->m->file) ==  
1747 - QPDFTokenizer::Token(QPDFTokenizer::tt_word, "endobj"))) { 1736 + if (!readToken(this->m->file).isWord("endobj")) {
1748 QTC::TC("qpdf", "QPDF err expected endobj"); 1737 QTC::TC("qpdf", "QPDF err expected endobj");
1749 warn(damagedPDF("expected endobj")); 1738 warn(damagedPDF("expected endobj"));
1750 } 1739 }
libqpdf/QPDFFormFieldObjectHelper.cc
@@ -556,7 +556,7 @@ ValueSetter::handleToken(QPDFTokenizer::Token const& token) @@ -556,7 +556,7 @@ ValueSetter::handleToken(QPDFTokenizer::Token const& token)
556 switch (state) { 556 switch (state) {
557 case st_top: 557 case st_top:
558 writeToken(token); 558 writeToken(token);
559 - if ((ttype == QPDFTokenizer::tt_word) && (value == "BMC")) { 559 + if (token.isWord("BMC")) {
560 state = st_bmc; 560 state = st_bmc;
561 } 561 }
562 break; 562 break;
@@ -571,7 +571,7 @@ ValueSetter::handleToken(QPDFTokenizer::Token const& token) @@ -571,7 +571,7 @@ ValueSetter::handleToken(QPDFTokenizer::Token const& token)
571 // fall through to emc 571 // fall through to emc
572 572
573 case st_emc: 573 case st_emc:
574 - if ((ttype == QPDFTokenizer::tt_word) && (value == "EMC")) { 574 + if (token.isWord("EMC")) {
575 do_replace = true; 575 do_replace = true;
576 state = st_end; 576 state = st_end;
577 } 577 }
@@ -751,7 +751,7 @@ TfFinder::handleToken(QPDFTokenizer::Token const& token) @@ -751,7 +751,7 @@ TfFinder::handleToken(QPDFTokenizer::Token const& token)
751 break; 751 break;
752 752
753 case QPDFTokenizer::tt_word: 753 case QPDFTokenizer::tt_word:
754 - if (value == "Tf") { 754 + if (token.isWord("Tf")) {
755 if ((last_num > 1.0) && (last_num < 1000.0)) { 755 if ((last_num > 1.0) && (last_num < 1000.0)) {
756 // These ranges are arbitrary but keep us from doing 756 // These ranges are arbitrary but keep us from doing
757 // insane things or suffering from over/underflow 757 // insane things or suffering from over/underflow
libqpdf/QPDFPageObjectHelper.cc
@@ -210,18 +210,16 @@ InlineImageTracker::handleToken(QPDFTokenizer::Token const&amp; token) @@ -210,18 +210,16 @@ InlineImageTracker::handleToken(QPDFTokenizer::Token const&amp; token)
210 writeToken(token); 210 writeToken(token);
211 state = st_top; 211 state = st_top;
212 } 212 }
213 - } else if (  
214 - token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "ID")) { 213 + } else if (token.isWord("ID")) {
215 bi_str += token.getValue(); 214 bi_str += token.getValue();
216 dict_str += " >>"; 215 dict_str += " >>";
217 - } else if (  
218 - token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "EI")) { 216 + } else if (token.isWord("EI")) {
219 state = st_top; 217 state = st_top;
220 } else { 218 } else {
221 bi_str += token.getRawValue(); 219 bi_str += token.getRawValue();
222 dict_str += token.getRawValue(); 220 dict_str += token.getRawValue();
223 } 221 }
224 - } else if (token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "BI")) { 222 + } else if (token.isWord("BI")) {
225 bi_str = token.getValue(); 223 bi_str = token.getValue();
226 dict_str = "<< "; 224 dict_str = "<< ";
227 state = st_bi; 225 state = st_bi;
libqpdf/QPDFTokenizer.cc
@@ -892,7 +892,7 @@ QPDFTokenizer::findEI(std::shared_ptr&lt;InputSource&gt; input) @@ -892,7 +892,7 @@ QPDFTokenizer::findEI(std::shared_ptr&lt;InputSource&gt; input)
892 okay = true; 892 okay = true;
893 } else if (type == tt_bad) { 893 } else if (type == tt_bad) {
894 found_bad = true; 894 found_bad = true;
895 - } else if (type == tt_word) { 895 + } else if (t.isWord()) {
896 // The qpdf tokenizer lumps alphabetic and otherwise 896 // The qpdf tokenizer lumps alphabetic and otherwise
897 // uncategorized characters into "words". We recognize 897 // uncategorized characters into "words". We recognize
898 // strings of alphabetic characters as potential valid 898 // strings of alphabetic characters as potential valid
libqpdf/QPDF_linearization.cc
@@ -121,12 +121,9 @@ QPDF::isLinearized() @@ -121,12 +121,9 @@ QPDF::isLinearized()
121 } 121 }
122 122
123 QPDFTokenizer::Token t1 = readToken(this->m->file); 123 QPDFTokenizer::Token t1 = readToken(this->m->file);
124 - QPDFTokenizer::Token t2 = readToken(this->m->file);  
125 - QPDFTokenizer::Token t3 = readToken(this->m->file);  
126 - QPDFTokenizer::Token t4 = readToken(this->m->file);  
127 - if (t1.isInteger() && t2.isInteger() &&  
128 - (t3 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "obj")) &&  
129 - (t4.getType() == QPDFTokenizer::tt_dict_open)) { 124 + if (t1.isInteger() && readToken(m->file).isInteger() &&
  125 + readToken(m->file).isWord("obj") &&
  126 + (readToken(m->file).getType() == QPDFTokenizer::tt_dict_open)) {
130 lindict_obj = toI(QUtil::string_to_ll(t1.getValue().c_str())); 127 lindict_obj = toI(QUtil::string_to_ll(t1.getValue().c_str()));
131 } 128 }
132 } 129 }
libtests/input_source.cc
@@ -37,9 +37,9 @@ Finder::check() @@ -37,9 +37,9 @@ Finder::check()
37 { 37 {
38 QPDFTokenizer tokenizer; 38 QPDFTokenizer tokenizer;
39 QPDFTokenizer::Token t = tokenizer.readToken(is, "finder", true); 39 QPDFTokenizer::Token t = tokenizer.readToken(is, "finder", true);
40 - if (t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "potato")) { 40 + if (t.isWord("potato")) {
41 t = tokenizer.readToken(is, "finder", true); 41 t = tokenizer.readToken(is, "finder", true);
42 - return (t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, after)); 42 + return t.isWord(after);
43 } 43 }
44 return false; 44 return false;
45 } 45 }