Commit 34a6f8938f0e6d55eeb9f37c0ef23e02fec88932

Authored by m-holger
Committed by Jay Berkenbilt
1 parent dca70f13

Add methods QPDFTokenizer::Token::isWord

examples/pdf-filter-tokens.cc
... ... @@ -139,8 +139,7 @@ ColorToGray::handleToken(QPDFTokenizer::Token const& token)
139 139 QPDFTokenizer::token_type_e token_type = token.getType();
140 140 if (!isIgnorable(token_type)) {
141 141 this->stack.push_back(token);
142   - if ((this->stack.size() == 4) &&
143   - (token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "rg")) &&
  142 + if ((this->stack.size() == 4) && token.isWord("rg") &&
144 143 (isNumeric(this->stack.at(0).getType())) &&
145 144 (isNumeric(this->stack.at(1).getType())) &&
146 145 (isNumeric(this->stack.at(2).getType()))) {
... ...
include/qpdf/QPDFTokenizer.hh
... ... @@ -112,6 +112,16 @@ class QPDFTokenizer
112 112 {
113 113 return this->type == tt_integer;
114 114 }
  115 + bool
  116 + isWord() const
  117 + {
  118 + return this->type == tt_word;
  119 + }
  120 + bool
  121 + isWord(std::string const& value) const
  122 + {
  123 + return this->type == tt_word && this->value == value;
  124 + }
115 125  
116 126 private:
117 127 token_type_e type;
... ...
libqpdf/Pl_QPDFTokenizer.cc
... ... @@ -53,9 +53,7 @@ Pl_QPDFTokenizer::finish()
53 53 this->m->filter->handleToken(token);
54 54 if (token.getType() == QPDFTokenizer::tt_eof) {
55 55 break;
56   - } else if (
57   - (token.getType() == QPDFTokenizer::tt_word) &&
58   - (token.getValue() == "ID")) {
  56 + } else if (token.isWord("ID")) {
59 57 // Read the space after the ID.
60 58 char ch = ' ';
61 59 input->read(&ch, 1);
... ...
libqpdf/QPDF.cc
... ... @@ -459,13 +459,11 @@ QPDF::findHeader()
459 459 bool
460 460 QPDF::findStartxref()
461 461 {
462   - QPDFTokenizer::Token t = readToken(this->m->file);
463   - if (t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "startxref")) {
464   - if (readToken(this->m->file).isInteger()) {
465   - // Position in front of offset token
466   - this->m->file->seek(this->m->file->getLastOffset(), SEEK_SET);
467   - return true;
468   - }
  462 + if (readToken(m->file).isWord("startxref") &&
  463 + readToken(m->file).isInteger()) {
  464 + // Position in front of offset token
  465 + this->m->file->seek(this->m->file->getLastOffset(), SEEK_SET);
  466 + return true;
469 467 }
470 468 return false;
471 469 }
... ... @@ -613,16 +611,13 @@ QPDF::reconstruct_xref(QPDFExc& e)
613 611 // containing this token
614 612 } else if (t1.isInteger()) {
615 613 QPDFTokenizer::Token t2 = readToken(this->m->file, MAX_LEN);
616   - QPDFTokenizer::Token t3 = readToken(this->m->file, MAX_LEN);
617 614 if ((t2.isInteger()) &&
618   - (t3 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "obj"))) {
  615 + (readToken(m->file, MAX_LEN).isWord("obj"))) {
619 616 int obj = QUtil::string_to_int(t1.getValue().c_str());
620 617 int gen = QUtil::string_to_int(t2.getValue().c_str());
621 618 insertXrefEntry(obj, 1, token_start, gen, true);
622 619 }
623   - } else if (
624   - (!this->m->trailer.isInitialized()) &&
625   - (t1 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "trailer"))) {
  620 + } else if (!this->m->trailer.isInitialized() && t1.isWord("trailer")) {
626 621 QPDFObjectHandle t =
627 622 readObject(this->m->file, "trailer", QPDFObjGen(), false);
628 623 if (!t.isDictionary()) {
... ... @@ -922,8 +917,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
922 917 }
923 918 }
924 919 qpdf_offset_t pos = this->m->file->tell();
925   - QPDFTokenizer::Token t = readToken(this->m->file);
926   - if (t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "trailer")) {
  920 + if (readToken(m->file).isWord("trailer")) {
927 921 done = true;
928 922 } else {
929 923 this->m->file->seek(pos, SEEK_SET);
... ... @@ -1431,8 +1425,7 @@ QPDF::readObject(
1431 1425 } else if (object.isDictionary() && (!in_object_stream)) {
1432 1426 // check for stream
1433 1427 qpdf_offset_t cur_offset = input->tell();
1434   - if (readToken(input) ==
1435   - QPDFTokenizer::Token(QPDFTokenizer::tt_word, "stream")) {
  1428 + if (readToken(input).isWord("stream")) {
1436 1429 // The PDF specification states that the word "stream"
1437 1430 // should be followed by either a carriage return and
1438 1431 // a newline or by a newline alone. It specifically
... ... @@ -1523,9 +1516,7 @@ QPDF::readObject(
1523 1516 // Seek in two steps to avoid potential integer overflow
1524 1517 input->seek(stream_offset, SEEK_SET);
1525 1518 input->seek(toO(length), SEEK_CUR);
1526   - if (!(readToken(input) ==
1527   - QPDFTokenizer::Token(
1528   - QPDFTokenizer::tt_word, "endstream"))) {
  1519 + if (!readToken(input).isWord("endstream")) {
1529 1520 QTC::TC("qpdf", "QPDF missing endstream");
1530 1521 throw damagedPDF(
1531 1522 input, input->getLastOffset(), "expected endstream");
... ... @@ -1556,9 +1547,8 @@ bool
1556 1547 QPDF::findEndstream()
1557 1548 {
1558 1549 // Find endstream or endobj. Position the input at that token.
1559   - QPDFTokenizer::Token t = readToken(this->m->file, 20);
1560   - if ((t.getType() == QPDFTokenizer::tt_word) &&
1561   - ((t.getValue() == "endobj") || (t.getValue() == "endstream"))) {
  1550 + auto t = readToken(m->file, 20);
  1551 + if (t.isWord("endobj") || t.isWord("endstream")) {
1562 1552 this->m->file->seek(this->m->file->getLastOffset(), SEEK_SET);
1563 1553 return true;
1564 1554 }
... ... @@ -1682,8 +1672,8 @@ QPDF::readObjectAtOffset(
1682 1672 QPDFTokenizer::Token tobj = readToken(this->m->file);
1683 1673  
1684 1674 bool objidok = tobjid.isInteger();
1685   - int genok = tgen.isInteger();
1686   - int objok = (tobj == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "obj"));
  1675 + bool genok = tgen.isInteger();
  1676 + bool objok = tobj.isWord("obj");
1687 1677  
1688 1678 QTC::TC("qpdf", "QPDF check objid", objidok ? 1 : 0);
1689 1679 QTC::TC("qpdf", "QPDF check generation", genok ? 1 : 0);
... ... @@ -1743,8 +1733,7 @@ QPDF::readObjectAtOffset(
1743 1733  
1744 1734 QPDFObjectHandle oh = readObject(this->m->file, description, og, false);
1745 1735  
1746   - if (!(readToken(this->m->file) ==
1747   - QPDFTokenizer::Token(QPDFTokenizer::tt_word, "endobj"))) {
  1736 + if (!readToken(this->m->file).isWord("endobj")) {
1748 1737 QTC::TC("qpdf", "QPDF err expected endobj");
1749 1738 warn(damagedPDF("expected endobj"));
1750 1739 }
... ...
libqpdf/QPDFFormFieldObjectHelper.cc
... ... @@ -556,7 +556,7 @@ ValueSetter::handleToken(QPDFTokenizer::Token const& token)
556 556 switch (state) {
557 557 case st_top:
558 558 writeToken(token);
559   - if ((ttype == QPDFTokenizer::tt_word) && (value == "BMC")) {
  559 + if (token.isWord("BMC")) {
560 560 state = st_bmc;
561 561 }
562 562 break;
... ... @@ -571,7 +571,7 @@ ValueSetter::handleToken(QPDFTokenizer::Token const& token)
571 571 // fall through to emc
572 572  
573 573 case st_emc:
574   - if ((ttype == QPDFTokenizer::tt_word) && (value == "EMC")) {
  574 + if (token.isWord("EMC")) {
575 575 do_replace = true;
576 576 state = st_end;
577 577 }
... ... @@ -751,7 +751,7 @@ TfFinder::handleToken(QPDFTokenizer::Token const& token)
751 751 break;
752 752  
753 753 case QPDFTokenizer::tt_word:
754   - if (value == "Tf") {
  754 + if (token.isWord("Tf")) {
755 755 if ((last_num > 1.0) && (last_num < 1000.0)) {
756 756 // These ranges are arbitrary but keep us from doing
757 757 // insane things or suffering from over/underflow
... ...
libqpdf/QPDFPageObjectHelper.cc
... ... @@ -210,18 +210,16 @@ InlineImageTracker::handleToken(QPDFTokenizer::Token const&amp; token)
210 210 writeToken(token);
211 211 state = st_top;
212 212 }
213   - } else if (
214   - token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "ID")) {
  213 + } else if (token.isWord("ID")) {
215 214 bi_str += token.getValue();
216 215 dict_str += " >>";
217   - } else if (
218   - token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "EI")) {
  216 + } else if (token.isWord("EI")) {
219 217 state = st_top;
220 218 } else {
221 219 bi_str += token.getRawValue();
222 220 dict_str += token.getRawValue();
223 221 }
224   - } else if (token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "BI")) {
  222 + } else if (token.isWord("BI")) {
225 223 bi_str = token.getValue();
226 224 dict_str = "<< ";
227 225 state = st_bi;
... ...
libqpdf/QPDFTokenizer.cc
... ... @@ -892,7 +892,7 @@ QPDFTokenizer::findEI(std::shared_ptr&lt;InputSource&gt; input)
892 892 okay = true;
893 893 } else if (type == tt_bad) {
894 894 found_bad = true;
895   - } else if (type == tt_word) {
  895 + } else if (t.isWord()) {
896 896 // The qpdf tokenizer lumps alphabetic and otherwise
897 897 // uncategorized characters into "words". We recognize
898 898 // strings of alphabetic characters as potential valid
... ...
libqpdf/QPDF_linearization.cc
... ... @@ -121,12 +121,9 @@ QPDF::isLinearized()
121 121 }
122 122  
123 123 QPDFTokenizer::Token t1 = readToken(this->m->file);
124   - QPDFTokenizer::Token t2 = readToken(this->m->file);
125   - QPDFTokenizer::Token t3 = readToken(this->m->file);
126   - QPDFTokenizer::Token t4 = readToken(this->m->file);
127   - if (t1.isInteger() && t2.isInteger() &&
128   - (t3 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "obj")) &&
129   - (t4.getType() == QPDFTokenizer::tt_dict_open)) {
  124 + if (t1.isInteger() && readToken(m->file).isInteger() &&
  125 + readToken(m->file).isWord("obj") &&
  126 + (readToken(m->file).getType() == QPDFTokenizer::tt_dict_open)) {
130 127 lindict_obj = toI(QUtil::string_to_ll(t1.getValue().c_str()));
131 128 }
132 129 }
... ...
libtests/input_source.cc
... ... @@ -37,9 +37,9 @@ Finder::check()
37 37 {
38 38 QPDFTokenizer tokenizer;
39 39 QPDFTokenizer::Token t = tokenizer.readToken(is, "finder", true);
40   - if (t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "potato")) {
  40 + if (t.isWord("potato")) {
41 41 t = tokenizer.readToken(is, "finder", true);
42   - return (t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, after));
  42 + return t.isWord(after);
43 43 }
44 44 return false;
45 45 }
... ...