Commit 34a6f8938f0e6d55eeb9f37c0ef23e02fec88932
Committed by
Jay Berkenbilt
1 parent
dca70f13
Add methods QPDFTokenizer::Token::isWord
Showing
9 changed files
with
39 additions
and
48 deletions
examples/pdf-filter-tokens.cc
| ... | ... | @@ -139,8 +139,7 @@ ColorToGray::handleToken(QPDFTokenizer::Token const& token) |
| 139 | 139 | QPDFTokenizer::token_type_e token_type = token.getType(); |
| 140 | 140 | if (!isIgnorable(token_type)) { |
| 141 | 141 | this->stack.push_back(token); |
| 142 | - if ((this->stack.size() == 4) && | |
| 143 | - (token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "rg")) && | |
| 142 | + if ((this->stack.size() == 4) && token.isWord("rg") && | |
| 144 | 143 | (isNumeric(this->stack.at(0).getType())) && |
| 145 | 144 | (isNumeric(this->stack.at(1).getType())) && |
| 146 | 145 | (isNumeric(this->stack.at(2).getType()))) { | ... | ... |
include/qpdf/QPDFTokenizer.hh
| ... | ... | @@ -112,6 +112,16 @@ class QPDFTokenizer |
| 112 | 112 | { |
| 113 | 113 | return this->type == tt_integer; |
| 114 | 114 | } |
| 115 | + bool | |
| 116 | + isWord() const | |
| 117 | + { | |
| 118 | + return this->type == tt_word; | |
| 119 | + } | |
| 120 | + bool | |
| 121 | + isWord(std::string const& value) const | |
| 122 | + { | |
| 123 | + return this->type == tt_word && this->value == value; | |
| 124 | + } | |
| 115 | 125 | |
| 116 | 126 | private: |
| 117 | 127 | token_type_e type; | ... | ... |
libqpdf/Pl_QPDFTokenizer.cc
| ... | ... | @@ -53,9 +53,7 @@ Pl_QPDFTokenizer::finish() |
| 53 | 53 | this->m->filter->handleToken(token); |
| 54 | 54 | if (token.getType() == QPDFTokenizer::tt_eof) { |
| 55 | 55 | break; |
| 56 | - } else if ( | |
| 57 | - (token.getType() == QPDFTokenizer::tt_word) && | |
| 58 | - (token.getValue() == "ID")) { | |
| 56 | + } else if (token.isWord("ID")) { | |
| 59 | 57 | // Read the space after the ID. |
| 60 | 58 | char ch = ' '; |
| 61 | 59 | input->read(&ch, 1); | ... | ... |
libqpdf/QPDF.cc
| ... | ... | @@ -459,13 +459,11 @@ QPDF::findHeader() |
| 459 | 459 | bool |
| 460 | 460 | QPDF::findStartxref() |
| 461 | 461 | { |
| 462 | - QPDFTokenizer::Token t = readToken(this->m->file); | |
| 463 | - if (t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "startxref")) { | |
| 464 | - if (readToken(this->m->file).isInteger()) { | |
| 465 | - // Position in front of offset token | |
| 466 | - this->m->file->seek(this->m->file->getLastOffset(), SEEK_SET); | |
| 467 | - return true; | |
| 468 | - } | |
| 462 | + if (readToken(m->file).isWord("startxref") && | |
| 463 | + readToken(m->file).isInteger()) { | |
| 464 | + // Position in front of offset token | |
| 465 | + this->m->file->seek(this->m->file->getLastOffset(), SEEK_SET); | |
| 466 | + return true; | |
| 469 | 467 | } |
| 470 | 468 | return false; |
| 471 | 469 | } |
| ... | ... | @@ -613,16 +611,13 @@ QPDF::reconstruct_xref(QPDFExc& e) |
| 613 | 611 | // containing this token |
| 614 | 612 | } else if (t1.isInteger()) { |
| 615 | 613 | QPDFTokenizer::Token t2 = readToken(this->m->file, MAX_LEN); |
| 616 | - QPDFTokenizer::Token t3 = readToken(this->m->file, MAX_LEN); | |
| 617 | 614 | if ((t2.isInteger()) && |
| 618 | - (t3 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "obj"))) { | |
| 615 | + (readToken(m->file, MAX_LEN).isWord("obj"))) { | |
| 619 | 616 | int obj = QUtil::string_to_int(t1.getValue().c_str()); |
| 620 | 617 | int gen = QUtil::string_to_int(t2.getValue().c_str()); |
| 621 | 618 | insertXrefEntry(obj, 1, token_start, gen, true); |
| 622 | 619 | } |
| 623 | - } else if ( | |
| 624 | - (!this->m->trailer.isInitialized()) && | |
| 625 | - (t1 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "trailer"))) { | |
| 620 | + } else if (!this->m->trailer.isInitialized() && t1.isWord("trailer")) { | |
| 626 | 621 | QPDFObjectHandle t = |
| 627 | 622 | readObject(this->m->file, "trailer", QPDFObjGen(), false); |
| 628 | 623 | if (!t.isDictionary()) { |
| ... | ... | @@ -922,8 +917,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) |
| 922 | 917 | } |
| 923 | 918 | } |
| 924 | 919 | qpdf_offset_t pos = this->m->file->tell(); |
| 925 | - QPDFTokenizer::Token t = readToken(this->m->file); | |
| 926 | - if (t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "trailer")) { | |
| 920 | + if (readToken(m->file).isWord("trailer")) { | |
| 927 | 921 | done = true; |
| 928 | 922 | } else { |
| 929 | 923 | this->m->file->seek(pos, SEEK_SET); |
| ... | ... | @@ -1431,8 +1425,7 @@ QPDF::readObject( |
| 1431 | 1425 | } else if (object.isDictionary() && (!in_object_stream)) { |
| 1432 | 1426 | // check for stream |
| 1433 | 1427 | qpdf_offset_t cur_offset = input->tell(); |
| 1434 | - if (readToken(input) == | |
| 1435 | - QPDFTokenizer::Token(QPDFTokenizer::tt_word, "stream")) { | |
| 1428 | + if (readToken(input).isWord("stream")) { | |
| 1436 | 1429 | // The PDF specification states that the word "stream" |
| 1437 | 1430 | // should be followed by either a carriage return and |
| 1438 | 1431 | // a newline or by a newline alone. It specifically |
| ... | ... | @@ -1523,9 +1516,7 @@ QPDF::readObject( |
| 1523 | 1516 | // Seek in two steps to avoid potential integer overflow |
| 1524 | 1517 | input->seek(stream_offset, SEEK_SET); |
| 1525 | 1518 | input->seek(toO(length), SEEK_CUR); |
| 1526 | - if (!(readToken(input) == | |
| 1527 | - QPDFTokenizer::Token( | |
| 1528 | - QPDFTokenizer::tt_word, "endstream"))) { | |
| 1519 | + if (!readToken(input).isWord("endstream")) { | |
| 1529 | 1520 | QTC::TC("qpdf", "QPDF missing endstream"); |
| 1530 | 1521 | throw damagedPDF( |
| 1531 | 1522 | input, input->getLastOffset(), "expected endstream"); |
| ... | ... | @@ -1556,9 +1547,8 @@ bool |
| 1556 | 1547 | QPDF::findEndstream() |
| 1557 | 1548 | { |
| 1558 | 1549 | // Find endstream or endobj. Position the input at that token. |
| 1559 | - QPDFTokenizer::Token t = readToken(this->m->file, 20); | |
| 1560 | - if ((t.getType() == QPDFTokenizer::tt_word) && | |
| 1561 | - ((t.getValue() == "endobj") || (t.getValue() == "endstream"))) { | |
| 1550 | + auto t = readToken(m->file, 20); | |
| 1551 | + if (t.isWord("endobj") || t.isWord("endstream")) { | |
| 1562 | 1552 | this->m->file->seek(this->m->file->getLastOffset(), SEEK_SET); |
| 1563 | 1553 | return true; |
| 1564 | 1554 | } |
| ... | ... | @@ -1682,8 +1672,8 @@ QPDF::readObjectAtOffset( |
| 1682 | 1672 | QPDFTokenizer::Token tobj = readToken(this->m->file); |
| 1683 | 1673 | |
| 1684 | 1674 | bool objidok = tobjid.isInteger(); |
| 1685 | - int genok = tgen.isInteger(); | |
| 1686 | - int objok = (tobj == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "obj")); | |
| 1675 | + bool genok = tgen.isInteger(); | |
| 1676 | + bool objok = tobj.isWord("obj"); | |
| 1687 | 1677 | |
| 1688 | 1678 | QTC::TC("qpdf", "QPDF check objid", objidok ? 1 : 0); |
| 1689 | 1679 | QTC::TC("qpdf", "QPDF check generation", genok ? 1 : 0); |
| ... | ... | @@ -1743,8 +1733,7 @@ QPDF::readObjectAtOffset( |
| 1743 | 1733 | |
| 1744 | 1734 | QPDFObjectHandle oh = readObject(this->m->file, description, og, false); |
| 1745 | 1735 | |
| 1746 | - if (!(readToken(this->m->file) == | |
| 1747 | - QPDFTokenizer::Token(QPDFTokenizer::tt_word, "endobj"))) { | |
| 1736 | + if (!readToken(this->m->file).isWord("endobj")) { | |
| 1748 | 1737 | QTC::TC("qpdf", "QPDF err expected endobj"); |
| 1749 | 1738 | warn(damagedPDF("expected endobj")); |
| 1750 | 1739 | } | ... | ... |
libqpdf/QPDFFormFieldObjectHelper.cc
| ... | ... | @@ -556,7 +556,7 @@ ValueSetter::handleToken(QPDFTokenizer::Token const& token) |
| 556 | 556 | switch (state) { |
| 557 | 557 | case st_top: |
| 558 | 558 | writeToken(token); |
| 559 | - if ((ttype == QPDFTokenizer::tt_word) && (value == "BMC")) { | |
| 559 | + if (token.isWord("BMC")) { | |
| 560 | 560 | state = st_bmc; |
| 561 | 561 | } |
| 562 | 562 | break; |
| ... | ... | @@ -571,7 +571,7 @@ ValueSetter::handleToken(QPDFTokenizer::Token const& token) |
| 571 | 571 | // fall through to emc |
| 572 | 572 | |
| 573 | 573 | case st_emc: |
| 574 | - if ((ttype == QPDFTokenizer::tt_word) && (value == "EMC")) { | |
| 574 | + if (token.isWord("EMC")) { | |
| 575 | 575 | do_replace = true; |
| 576 | 576 | state = st_end; |
| 577 | 577 | } |
| ... | ... | @@ -751,7 +751,7 @@ TfFinder::handleToken(QPDFTokenizer::Token const& token) |
| 751 | 751 | break; |
| 752 | 752 | |
| 753 | 753 | case QPDFTokenizer::tt_word: |
| 754 | - if (value == "Tf") { | |
| 754 | + if (token.isWord("Tf")) { | |
| 755 | 755 | if ((last_num > 1.0) && (last_num < 1000.0)) { |
| 756 | 756 | // These ranges are arbitrary but keep us from doing |
| 757 | 757 | // insane things or suffering from over/underflow | ... | ... |
libqpdf/QPDFPageObjectHelper.cc
| ... | ... | @@ -210,18 +210,16 @@ InlineImageTracker::handleToken(QPDFTokenizer::Token const& token) |
| 210 | 210 | writeToken(token); |
| 211 | 211 | state = st_top; |
| 212 | 212 | } |
| 213 | - } else if ( | |
| 214 | - token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "ID")) { | |
| 213 | + } else if (token.isWord("ID")) { | |
| 215 | 214 | bi_str += token.getValue(); |
| 216 | 215 | dict_str += " >>"; |
| 217 | - } else if ( | |
| 218 | - token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "EI")) { | |
| 216 | + } else if (token.isWord("EI")) { | |
| 219 | 217 | state = st_top; |
| 220 | 218 | } else { |
| 221 | 219 | bi_str += token.getRawValue(); |
| 222 | 220 | dict_str += token.getRawValue(); |
| 223 | 221 | } |
| 224 | - } else if (token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "BI")) { | |
| 222 | + } else if (token.isWord("BI")) { | |
| 225 | 223 | bi_str = token.getValue(); |
| 226 | 224 | dict_str = "<< "; |
| 227 | 225 | state = st_bi; | ... | ... |
libqpdf/QPDFTokenizer.cc
| ... | ... | @@ -892,7 +892,7 @@ QPDFTokenizer::findEI(std::shared_ptr<InputSource> input) |
| 892 | 892 | okay = true; |
| 893 | 893 | } else if (type == tt_bad) { |
| 894 | 894 | found_bad = true; |
| 895 | - } else if (type == tt_word) { | |
| 895 | + } else if (t.isWord()) { | |
| 896 | 896 | // The qpdf tokenizer lumps alphabetic and otherwise |
| 897 | 897 | // uncategorized characters into "words". We recognize |
| 898 | 898 | // strings of alphabetic characters as potential valid | ... | ... |
libqpdf/QPDF_linearization.cc
| ... | ... | @@ -121,12 +121,9 @@ QPDF::isLinearized() |
| 121 | 121 | } |
| 122 | 122 | |
| 123 | 123 | QPDFTokenizer::Token t1 = readToken(this->m->file); |
| 124 | - QPDFTokenizer::Token t2 = readToken(this->m->file); | |
| 125 | - QPDFTokenizer::Token t3 = readToken(this->m->file); | |
| 126 | - QPDFTokenizer::Token t4 = readToken(this->m->file); | |
| 127 | - if (t1.isInteger() && t2.isInteger() && | |
| 128 | - (t3 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "obj")) && | |
| 129 | - (t4.getType() == QPDFTokenizer::tt_dict_open)) { | |
| 124 | + if (t1.isInteger() && readToken(m->file).isInteger() && | |
| 125 | + readToken(m->file).isWord("obj") && | |
| 126 | + (readToken(m->file).getType() == QPDFTokenizer::tt_dict_open)) { | |
| 130 | 127 | lindict_obj = toI(QUtil::string_to_ll(t1.getValue().c_str())); |
| 131 | 128 | } |
| 132 | 129 | } | ... | ... |
libtests/input_source.cc
| ... | ... | @@ -37,9 +37,9 @@ Finder::check() |
| 37 | 37 | { |
| 38 | 38 | QPDFTokenizer tokenizer; |
| 39 | 39 | QPDFTokenizer::Token t = tokenizer.readToken(is, "finder", true); |
| 40 | - if (t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "potato")) { | |
| 40 | + if (t.isWord("potato")) { | |
| 41 | 41 | t = tokenizer.readToken(is, "finder", true); |
| 42 | - return (t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, after)); | |
| 42 | + return t.isWord(after); | |
| 43 | 43 | } |
| 44 | 44 | return false; |
| 45 | 45 | } | ... | ... |