Commit 6670c685ab9f929121c5498115b278c95574e461
1 parent
0adfd74f
Move QPDFObjectHandle::parseInternal to new class QPDFParser
Part of #729
Showing
7 changed files
with
576 additions
and
522 deletions
include/qpdf/QPDF.hh
| ... | ... | @@ -49,6 +49,7 @@ class QPDF_Stream; |
| 49 | 49 | class BitStream; |
| 50 | 50 | class BitWriter; |
| 51 | 51 | class QPDFLogger; |
| 52 | +class QPDFParser; | |
| 52 | 53 | |
| 53 | 54 | class QPDF |
| 54 | 55 | { |
| ... | ... | @@ -881,7 +882,7 @@ class QPDF |
| 881 | 882 | // resolution |
| 882 | 883 | class ParseGuard |
| 883 | 884 | { |
| 884 | - friend class QPDFObjectHandle; | |
| 885 | + friend class QPDFParser; | |
| 885 | 886 | |
| 886 | 887 | private: |
| 887 | 888 | ParseGuard(QPDF* qpdf) : | ... | ... |
include/qpdf/QPDFObjectHandle.hh
| ... | ... | @@ -49,9 +49,12 @@ class QPDFTokenizer; |
| 49 | 49 | class QPDFExc; |
| 50 | 50 | class Pl_QPDFTokenizer; |
| 51 | 51 | class QPDFMatrix; |
| 52 | +class QPDFParser; | |
| 52 | 53 | |
| 53 | 54 | class QPDFObjectHandle |
| 54 | 55 | { |
| 56 | + friend class QPDFParser; | |
| 57 | + | |
| 55 | 58 | public: |
| 56 | 59 | // This class is used by replaceStreamData. It provides an |
| 57 | 60 | // alternative way of associating stream data with a stream. See |
| ... | ... | @@ -1563,15 +1566,6 @@ class QPDFObjectHandle |
| 1563 | 1566 | QPDFObjectHandle(QPDF*, QPDFObjGen const& og); |
| 1564 | 1567 | QPDFObjectHandle(std::shared_ptr<QPDFObject> const&); |
| 1565 | 1568 | |
| 1566 | - enum parser_state_e { | |
| 1567 | - st_top, | |
| 1568 | - st_start, | |
| 1569 | - st_stop, | |
| 1570 | - st_eof, | |
| 1571 | - st_dictionary, | |
| 1572 | - st_array | |
| 1573 | - }; | |
| 1574 | - | |
| 1575 | 1569 | // Private object factory methods |
| 1576 | 1570 | static QPDFObjectHandle newIndirect(QPDF*, QPDFObjGen const& og); |
| 1577 | 1571 | static QPDFObjectHandle newStream( |
| ... | ... | @@ -1599,14 +1593,7 @@ class QPDFObjectHandle |
| 1599 | 1593 | std::string const&, |
| 1600 | 1594 | std::shared_ptr<InputSource>, |
| 1601 | 1595 | qpdf_offset_t); |
| 1602 | - static QPDFObjectHandle parseInternal( | |
| 1603 | - std::shared_ptr<InputSource> input, | |
| 1604 | - std::string const& object_description, | |
| 1605 | - QPDFTokenizer& tokenizer, | |
| 1606 | - bool& empty, | |
| 1607 | - StringDecrypter* decrypter, | |
| 1608 | - QPDF* context, | |
| 1609 | - bool content_stream); | |
| 1596 | + | |
| 1610 | 1597 | void setParsedOffset(qpdf_offset_t offset); |
| 1611 | 1598 | void parseContentStream_internal( |
| 1612 | 1599 | std::string const& description, ParserCallbacks* callbacks); | ... | ... |
libqpdf/CMakeLists.txt
libqpdf/QPDFObjectHandle.cc
| ... | ... | @@ -8,6 +8,7 @@ |
| 8 | 8 | #include <qpdf/QPDFLogger.hh> |
| 9 | 9 | #include <qpdf/QPDFMatrix.hh> |
| 10 | 10 | #include <qpdf/QPDFPageObjectHelper.hh> |
| 11 | +#include <qpdf/QPDFParser.hh> | |
| 11 | 12 | #include <qpdf/QPDF_Array.hh> |
| 12 | 13 | #include <qpdf/QPDF_Bool.hh> |
| 13 | 14 | #include <qpdf/QPDF_Dictionary.hh> |
| ... | ... | @@ -1879,8 +1880,8 @@ QPDFObjectHandle::parseContentStream_data( |
| 1879 | 1880 | tokenizer.readToken(input, "content", true); |
| 1880 | 1881 | qpdf_offset_t offset = input->getLastOffset(); |
| 1881 | 1882 | input->seek(offset, SEEK_SET); |
| 1882 | - QPDFObjectHandle obj = parseInternal( | |
| 1883 | - input, "content", tokenizer, empty, nullptr, context, true); | |
| 1883 | + auto obj = QPDFParser(input, "content", tokenizer, nullptr, context) | |
| 1884 | + .parse(empty, true); | |
| 1884 | 1885 | if (!obj.isInitialized()) { |
| 1885 | 1886 | // EOF |
| 1886 | 1887 | break; |
| ... | ... | @@ -1943,497 +1944,8 @@ QPDFObjectHandle::parse( |
| 1943 | 1944 | StringDecrypter* decrypter, |
| 1944 | 1945 | QPDF* context) |
| 1945 | 1946 | { |
| 1946 | - return parseInternal( | |
| 1947 | - input, object_description, tokenizer, empty, decrypter, context, false); | |
| 1948 | -} | |
| 1949 | - | |
| 1950 | -QPDFObjectHandle | |
| 1951 | -QPDFObjectHandle::parseInternal( | |
| 1952 | - std::shared_ptr<InputSource> input, | |
| 1953 | - std::string const& object_description, | |
| 1954 | - QPDFTokenizer& tokenizer, | |
| 1955 | - bool& empty, | |
| 1956 | - StringDecrypter* decrypter, | |
| 1957 | - QPDF* context, | |
| 1958 | - bool content_stream) | |
| 1959 | -{ | |
| 1960 | - // This method must take care not to resolve any objects. Don't | |
| 1961 | - // check the type of any object without first ensuring that it is | |
| 1962 | - // a direct object. Otherwise, doing so may have the side effect | |
| 1963 | - // of reading the object and changing the file pointer. If you do | |
| 1964 | - // this, it will cause a logic error to be thrown from | |
| 1965 | - // QPDF::inParse(). | |
| 1966 | - | |
| 1967 | - QPDF::ParseGuard pg(context); | |
| 1968 | - | |
| 1969 | - empty = false; | |
| 1970 | - | |
| 1971 | - QPDFObjectHandle object; | |
| 1972 | - bool set_offset = false; | |
| 1973 | - | |
| 1974 | - std::vector<SparseOHArray> olist_stack; | |
| 1975 | - olist_stack.push_back(SparseOHArray()); | |
| 1976 | - std::vector<parser_state_e> state_stack; | |
| 1977 | - state_stack.push_back(st_top); | |
| 1978 | - std::vector<qpdf_offset_t> offset_stack; | |
| 1979 | - qpdf_offset_t offset = input->tell(); | |
| 1980 | - offset_stack.push_back(offset); | |
| 1981 | - bool done = false; | |
| 1982 | - int bad_count = 0; | |
| 1983 | - int good_count = 0; | |
| 1984 | - bool b_contents = false; | |
| 1985 | - std::vector<std::string> contents_string_stack; | |
| 1986 | - contents_string_stack.push_back(""); | |
| 1987 | - std::vector<qpdf_offset_t> contents_offset_stack; | |
| 1988 | - contents_offset_stack.push_back(-1); | |
| 1989 | - while (!done) { | |
| 1990 | - bool bad = false; | |
| 1991 | - SparseOHArray& olist = olist_stack.back(); | |
| 1992 | - parser_state_e state = state_stack.back(); | |
| 1993 | - offset = offset_stack.back(); | |
| 1994 | - std::string& contents_string = contents_string_stack.back(); | |
| 1995 | - qpdf_offset_t& contents_offset = contents_offset_stack.back(); | |
| 1996 | - | |
| 1997 | - object = QPDFObjectHandle(); | |
| 1998 | - set_offset = false; | |
| 1999 | - | |
| 2000 | - QPDFTokenizer::Token token = | |
| 2001 | - tokenizer.readToken(input, object_description, true); | |
| 2002 | - std::string const& token_error_message = token.getErrorMessage(); | |
| 2003 | - if (!token_error_message.empty()) { | |
| 2004 | - // Tokens other than tt_bad can still generate warnings. | |
| 2005 | - warn( | |
| 2006 | - context, | |
| 2007 | - QPDFExc( | |
| 2008 | - qpdf_e_damaged_pdf, | |
| 2009 | - input->getName(), | |
| 2010 | - object_description, | |
| 2011 | - input->getLastOffset(), | |
| 2012 | - token_error_message)); | |
| 2013 | - } | |
| 2014 | - | |
| 2015 | - switch (token.getType()) { | |
| 2016 | - case QPDFTokenizer::tt_eof: | |
| 2017 | - if (!content_stream) { | |
| 2018 | - QTC::TC("qpdf", "QPDFObjectHandle eof in parseInternal"); | |
| 2019 | - warn( | |
| 2020 | - context, | |
| 2021 | - QPDFExc( | |
| 2022 | - qpdf_e_damaged_pdf, | |
| 2023 | - input->getName(), | |
| 2024 | - object_description, | |
| 2025 | - input->getLastOffset(), | |
| 2026 | - "unexpected EOF")); | |
| 2027 | - } | |
| 2028 | - bad = true; | |
| 2029 | - state = st_eof; | |
| 2030 | - break; | |
| 2031 | - | |
| 2032 | - case QPDFTokenizer::tt_bad: | |
| 2033 | - QTC::TC("qpdf", "QPDFObjectHandle bad token in parse"); | |
| 2034 | - bad = true; | |
| 2035 | - object = newNull(); | |
| 2036 | - break; | |
| 2037 | - | |
| 2038 | - case QPDFTokenizer::tt_brace_open: | |
| 2039 | - case QPDFTokenizer::tt_brace_close: | |
| 2040 | - QTC::TC("qpdf", "QPDFObjectHandle bad brace"); | |
| 2041 | - warn( | |
| 2042 | - context, | |
| 2043 | - QPDFExc( | |
| 2044 | - qpdf_e_damaged_pdf, | |
| 2045 | - input->getName(), | |
| 2046 | - object_description, | |
| 2047 | - input->getLastOffset(), | |
| 2048 | - "treating unexpected brace token as null")); | |
| 2049 | - bad = true; | |
| 2050 | - object = newNull(); | |
| 2051 | - break; | |
| 2052 | - | |
| 2053 | - case QPDFTokenizer::tt_array_close: | |
| 2054 | - if (state == st_array) { | |
| 2055 | - state = st_stop; | |
| 2056 | - } else { | |
| 2057 | - QTC::TC("qpdf", "QPDFObjectHandle bad array close"); | |
| 2058 | - warn( | |
| 2059 | - context, | |
| 2060 | - QPDFExc( | |
| 2061 | - qpdf_e_damaged_pdf, | |
| 2062 | - input->getName(), | |
| 2063 | - object_description, | |
| 2064 | - input->getLastOffset(), | |
| 2065 | - "treating unexpected array close token as null")); | |
| 2066 | - bad = true; | |
| 2067 | - object = newNull(); | |
| 2068 | - } | |
| 2069 | - break; | |
| 2070 | - | |
| 2071 | - case QPDFTokenizer::tt_dict_close: | |
| 2072 | - if (state == st_dictionary) { | |
| 2073 | - state = st_stop; | |
| 2074 | - } else { | |
| 2075 | - QTC::TC("qpdf", "QPDFObjectHandle bad dictionary close"); | |
| 2076 | - warn( | |
| 2077 | - context, | |
| 2078 | - QPDFExc( | |
| 2079 | - qpdf_e_damaged_pdf, | |
| 2080 | - input->getName(), | |
| 2081 | - object_description, | |
| 2082 | - input->getLastOffset(), | |
| 2083 | - "unexpected dictionary close token")); | |
| 2084 | - bad = true; | |
| 2085 | - object = newNull(); | |
| 2086 | - } | |
| 2087 | - break; | |
| 2088 | - | |
| 2089 | - case QPDFTokenizer::tt_array_open: | |
| 2090 | - case QPDFTokenizer::tt_dict_open: | |
| 2091 | - if (olist_stack.size() > 500) { | |
| 2092 | - QTC::TC("qpdf", "QPDFObjectHandle too deep"); | |
| 2093 | - warn( | |
| 2094 | - context, | |
| 2095 | - QPDFExc( | |
| 2096 | - qpdf_e_damaged_pdf, | |
| 2097 | - input->getName(), | |
| 2098 | - object_description, | |
| 2099 | - input->getLastOffset(), | |
| 2100 | - "ignoring excessively deeply nested data structure")); | |
| 2101 | - bad = true; | |
| 2102 | - object = newNull(); | |
| 2103 | - state = st_top; | |
| 2104 | - } else { | |
| 2105 | - olist_stack.push_back(SparseOHArray()); | |
| 2106 | - state = st_start; | |
| 2107 | - offset_stack.push_back(input->tell()); | |
| 2108 | - state_stack.push_back( | |
| 2109 | - (token.getType() == QPDFTokenizer::tt_array_open) | |
| 2110 | - ? st_array | |
| 2111 | - : st_dictionary); | |
| 2112 | - b_contents = false; | |
| 2113 | - contents_string_stack.push_back(""); | |
| 2114 | - contents_offset_stack.push_back(-1); | |
| 2115 | - } | |
| 2116 | - break; | |
| 2117 | - | |
| 2118 | - case QPDFTokenizer::tt_bool: | |
| 2119 | - object = newBool((token.getValue() == "true")); | |
| 2120 | - break; | |
| 2121 | - | |
| 2122 | - case QPDFTokenizer::tt_null: | |
| 2123 | - object = newNull(); | |
| 2124 | - break; | |
| 2125 | - | |
| 2126 | - case QPDFTokenizer::tt_integer: | |
| 2127 | - object = newInteger(QUtil::string_to_ll(token.getValue().c_str())); | |
| 2128 | - break; | |
| 2129 | - | |
| 2130 | - case QPDFTokenizer::tt_real: | |
| 2131 | - object = newReal(token.getValue()); | |
| 2132 | - break; | |
| 2133 | - | |
| 2134 | - case QPDFTokenizer::tt_name: | |
| 2135 | - { | |
| 2136 | - std::string name = token.getValue(); | |
| 2137 | - object = newName(name); | |
| 2138 | - | |
| 2139 | - if (name == "/Contents") { | |
| 2140 | - b_contents = true; | |
| 2141 | - } else { | |
| 2142 | - b_contents = false; | |
| 2143 | - } | |
| 2144 | - } | |
| 2145 | - break; | |
| 2146 | - | |
| 2147 | - case QPDFTokenizer::tt_word: | |
| 2148 | - { | |
| 2149 | - std::string const& value = token.getValue(); | |
| 2150 | - if (content_stream) { | |
| 2151 | - object = QPDFObjectHandle::newOperator(value); | |
| 2152 | - } else if ( | |
| 2153 | - (value == "R") && (state != st_top) && | |
| 2154 | - (olist.size() >= 2) && | |
| 2155 | - (!olist.at(olist.size() - 1).isIndirect()) && | |
| 2156 | - (olist.at(olist.size() - 1).isInteger()) && | |
| 2157 | - (!olist.at(olist.size() - 2).isIndirect()) && | |
| 2158 | - (olist.at(olist.size() - 2).isInteger())) { | |
| 2159 | - if (context == nullptr) { | |
| 2160 | - QTC::TC( | |
| 2161 | - "qpdf", | |
| 2162 | - "QPDFObjectHandle indirect without context"); | |
| 2163 | - throw std::logic_error( | |
| 2164 | - "QPDFObjectHandle::parse called without context" | |
| 2165 | - " on an object with indirect references"); | |
| 2166 | - } | |
| 2167 | - // Try to resolve indirect objects | |
| 2168 | - object = newIndirect( | |
| 2169 | - context, | |
| 2170 | - QPDFObjGen( | |
| 2171 | - olist.at(olist.size() - 2).getIntValueAsInt(), | |
| 2172 | - olist.at(olist.size() - 1).getIntValueAsInt())); | |
| 2173 | - olist.remove_last(); | |
| 2174 | - olist.remove_last(); | |
| 2175 | - } else if ((value == "endobj") && (state == st_top)) { | |
| 2176 | - // We just saw endobj without having read | |
| 2177 | - // anything. Treat this as a null and do not move | |
| 2178 | - // the input source's offset. | |
| 2179 | - object = newNull(); | |
| 2180 | - input->seek(input->getLastOffset(), SEEK_SET); | |
| 2181 | - empty = true; | |
| 2182 | - } else { | |
| 2183 | - QTC::TC("qpdf", "QPDFObjectHandle treat word as string"); | |
| 2184 | - warn( | |
| 2185 | - context, | |
| 2186 | - QPDFExc( | |
| 2187 | - qpdf_e_damaged_pdf, | |
| 2188 | - input->getName(), | |
| 2189 | - object_description, | |
| 2190 | - input->getLastOffset(), | |
| 2191 | - "unknown token while reading object;" | |
| 2192 | - " treating as string")); | |
| 2193 | - bad = true; | |
| 2194 | - object = newString(value); | |
| 2195 | - } | |
| 2196 | - } | |
| 2197 | - break; | |
| 2198 | - | |
| 2199 | - case QPDFTokenizer::tt_string: | |
| 2200 | - { | |
| 2201 | - std::string val = token.getValue(); | |
| 2202 | - if (decrypter) { | |
| 2203 | - if (b_contents) { | |
| 2204 | - contents_string = val; | |
| 2205 | - contents_offset = input->getLastOffset(); | |
| 2206 | - b_contents = false; | |
| 2207 | - } | |
| 2208 | - decrypter->decryptString(val); | |
| 2209 | - } | |
| 2210 | - object = QPDFObjectHandle::newString(val); | |
| 2211 | - } | |
| 2212 | - | |
| 2213 | - break; | |
| 2214 | - | |
| 2215 | - default: | |
| 2216 | - warn( | |
| 2217 | - context, | |
| 2218 | - QPDFExc( | |
| 2219 | - qpdf_e_damaged_pdf, | |
| 2220 | - input->getName(), | |
| 2221 | - object_description, | |
| 2222 | - input->getLastOffset(), | |
| 2223 | - "treating unknown token type as null while " | |
| 2224 | - "reading object")); | |
| 2225 | - bad = true; | |
| 2226 | - object = newNull(); | |
| 2227 | - break; | |
| 2228 | - } | |
| 2229 | - | |
| 2230 | - if ((!object.isInitialized()) && | |
| 2231 | - (!((state == st_start) || (state == st_stop) || | |
| 2232 | - (state == st_eof)))) { | |
| 2233 | - throw std::logic_error("QPDFObjectHandle::parseInternal: " | |
| 2234 | - "unexpected uninitialized object"); | |
| 2235 | - object = newNull(); | |
| 2236 | - } | |
| 2237 | - | |
| 2238 | - if (bad) { | |
| 2239 | - ++bad_count; | |
| 2240 | - good_count = 0; | |
| 2241 | - } else { | |
| 2242 | - ++good_count; | |
| 2243 | - if (good_count > 3) { | |
| 2244 | - bad_count = 0; | |
| 2245 | - } | |
| 2246 | - } | |
| 2247 | - if (bad_count > 5) { | |
| 2248 | - // We had too many consecutive errors without enough | |
| 2249 | - // intervening successful objects. Give up. | |
| 2250 | - warn( | |
| 2251 | - context, | |
| 2252 | - QPDFExc( | |
| 2253 | - qpdf_e_damaged_pdf, | |
| 2254 | - input->getName(), | |
| 2255 | - object_description, | |
| 2256 | - input->getLastOffset(), | |
| 2257 | - "too many errors; giving up on reading object")); | |
| 2258 | - state = st_top; | |
| 2259 | - object = newNull(); | |
| 2260 | - } | |
| 2261 | - | |
| 2262 | - switch (state) { | |
| 2263 | - case st_eof: | |
| 2264 | - if (state_stack.size() > 1) { | |
| 2265 | - warn( | |
| 2266 | - context, | |
| 2267 | - QPDFExc( | |
| 2268 | - qpdf_e_damaged_pdf, | |
| 2269 | - input->getName(), | |
| 2270 | - object_description, | |
| 2271 | - input->getLastOffset(), | |
| 2272 | - "parse error while reading object")); | |
| 2273 | - } | |
| 2274 | - done = true; | |
| 2275 | - // In content stream mode, leave object uninitialized to | |
| 2276 | - // indicate EOF | |
| 2277 | - if (!content_stream) { | |
| 2278 | - object = newNull(); | |
| 2279 | - } | |
| 2280 | - break; | |
| 2281 | - | |
| 2282 | - case st_dictionary: | |
| 2283 | - case st_array: | |
| 2284 | - setObjectDescriptionFromInput( | |
| 2285 | - object, | |
| 2286 | - context, | |
| 2287 | - object_description, | |
| 2288 | - input, | |
| 2289 | - input->getLastOffset()); | |
| 2290 | - object.setParsedOffset(input->getLastOffset()); | |
| 2291 | - set_offset = true; | |
| 2292 | - olist.append(object); | |
| 2293 | - break; | |
| 2294 | - | |
| 2295 | - case st_top: | |
| 2296 | - done = true; | |
| 2297 | - break; | |
| 2298 | - | |
| 2299 | - case st_start: | |
| 2300 | - break; | |
| 2301 | - | |
| 2302 | - case st_stop: | |
| 2303 | - if ((state_stack.size() < 2) || (olist_stack.size() < 2)) { | |
| 2304 | - throw std::logic_error( | |
| 2305 | - "QPDFObjectHandle::parseInternal: st_stop encountered" | |
| 2306 | - " with insufficient elements in stack"); | |
| 2307 | - } | |
| 2308 | - parser_state_e old_state = state_stack.back(); | |
| 2309 | - state_stack.pop_back(); | |
| 2310 | - if (old_state == st_array) { | |
| 2311 | - // There's no newArray(SparseOHArray) since | |
| 2312 | - // SparseOHArray is not part of the public API. | |
| 2313 | - object = QPDFObjectHandle(QPDF_Array::create(olist)); | |
| 2314 | - setObjectDescriptionFromInput( | |
| 2315 | - object, context, object_description, input, offset); | |
| 2316 | - // The `offset` points to the next of "[". Set the | |
| 2317 | - // rewind offset to point to the beginning of "[". | |
| 2318 | - // This has been explicitly tested with whitespace | |
| 2319 | - // surrounding the array start delimiter. | |
| 2320 | - // getLastOffset points to the array end token and | |
| 2321 | - // therefore can't be used here. | |
| 2322 | - object.setParsedOffset(offset - 1); | |
| 2323 | - set_offset = true; | |
| 2324 | - } else if (old_state == st_dictionary) { | |
| 2325 | - // Convert list to map. Alternating elements are keys. | |
| 2326 | - // Attempt to recover more or less gracefully from | |
| 2327 | - // invalid dictionaries. | |
| 2328 | - std::set<std::string> names; | |
| 2329 | - size_t n_elements = olist.size(); | |
| 2330 | - for (size_t i = 0; i < n_elements; ++i) { | |
| 2331 | - QPDFObjectHandle oh = olist.at(i); | |
| 2332 | - if ((!oh.isIndirect()) && oh.isName()) { | |
| 2333 | - names.insert(oh.getName()); | |
| 2334 | - } | |
| 2335 | - } | |
| 2336 | - | |
| 2337 | - std::map<std::string, QPDFObjectHandle> dict; | |
| 2338 | - int next_fake_key = 1; | |
| 2339 | - for (unsigned int i = 0; i < olist.size(); ++i) { | |
| 2340 | - QPDFObjectHandle key_obj = olist.at(i); | |
| 2341 | - QPDFObjectHandle val; | |
| 2342 | - if (key_obj.isIndirect() || (!key_obj.isName())) { | |
| 2343 | - bool found_fake = false; | |
| 2344 | - std::string candidate; | |
| 2345 | - while (!found_fake) { | |
| 2346 | - candidate = "/QPDFFake" + | |
| 2347 | - QUtil::int_to_string(next_fake_key++); | |
| 2348 | - found_fake = (names.count(candidate) == 0); | |
| 2349 | - QTC::TC( | |
| 2350 | - "qpdf", | |
| 2351 | - "QPDFObjectHandle found fake", | |
| 2352 | - (found_fake ? 0 : 1)); | |
| 2353 | - } | |
| 2354 | - warn( | |
| 2355 | - context, | |
| 2356 | - QPDFExc( | |
| 2357 | - qpdf_e_damaged_pdf, | |
| 2358 | - input->getName(), | |
| 2359 | - object_description, | |
| 2360 | - offset, | |
| 2361 | - "expected dictionary key but found" | |
| 2362 | - " non-name object; inserting key " + | |
| 2363 | - candidate)); | |
| 2364 | - val = key_obj; | |
| 2365 | - key_obj = newName(candidate); | |
| 2366 | - } else if (i + 1 >= olist.size()) { | |
| 2367 | - QTC::TC("qpdf", "QPDFObjectHandle no val for last key"); | |
| 2368 | - warn( | |
| 2369 | - context, | |
| 2370 | - QPDFExc( | |
| 2371 | - qpdf_e_damaged_pdf, | |
| 2372 | - input->getName(), | |
| 2373 | - object_description, | |
| 2374 | - offset, | |
| 2375 | - "dictionary ended prematurely; " | |
| 2376 | - "using null as value for last key")); | |
| 2377 | - val = newNull(); | |
| 2378 | - setObjectDescriptionFromInput( | |
| 2379 | - val, context, object_description, input, offset); | |
| 2380 | - } else { | |
| 2381 | - val = olist.at(++i); | |
| 2382 | - } | |
| 2383 | - std::string key = key_obj.getName(); | |
| 2384 | - if (dict.count(key) > 0) { | |
| 2385 | - QTC::TC("qpdf", "QPDFObjectHandle duplicate dict key"); | |
| 2386 | - warn( | |
| 2387 | - context, | |
| 2388 | - QPDFExc( | |
| 2389 | - qpdf_e_damaged_pdf, | |
| 2390 | - input->getName(), | |
| 2391 | - object_description, | |
| 2392 | - offset, | |
| 2393 | - "dictionary has duplicated key " + key + | |
| 2394 | - "; last occurrence overrides earlier " | |
| 2395 | - "ones")); | |
| 2396 | - } | |
| 2397 | - dict[key] = val; | |
| 2398 | - } | |
| 2399 | - if (!contents_string.empty() && dict.count("/Type") && | |
| 2400 | - dict["/Type"].isNameAndEquals("/Sig") && | |
| 2401 | - dict.count("/ByteRange") && dict.count("/Contents") && | |
| 2402 | - dict["/Contents"].isString()) { | |
| 2403 | - dict["/Contents"] = | |
| 2404 | - QPDFObjectHandle::newString(contents_string); | |
| 2405 | - dict["/Contents"].setParsedOffset(contents_offset); | |
| 2406 | - } | |
| 2407 | - object = newDictionary(dict); | |
| 2408 | - setObjectDescriptionFromInput( | |
| 2409 | - object, context, object_description, input, offset); | |
| 2410 | - // The `offset` points to the next of "<<". Set the | |
| 2411 | - // rewind offset to point to the beginning of "<<". | |
| 2412 | - // This has been explicitly tested with whitespace | |
| 2413 | - // surrounding the dictionary start delimiter. | |
| 2414 | - // getLastOffset points to the dictionary end token | |
| 2415 | - // and therefore can't be used here. | |
| 2416 | - object.setParsedOffset(offset - 2); | |
| 2417 | - set_offset = true; | |
| 2418 | - } | |
| 2419 | - olist_stack.pop_back(); | |
| 2420 | - offset_stack.pop_back(); | |
| 2421 | - if (state_stack.back() == st_top) { | |
| 2422 | - done = true; | |
| 2423 | - } else { | |
| 2424 | - olist_stack.back().append(object); | |
| 2425 | - } | |
| 2426 | - contents_string_stack.pop_back(); | |
| 2427 | - contents_offset_stack.pop_back(); | |
| 2428 | - } | |
| 2429 | - } | |
| 2430 | - | |
| 2431 | - if (!set_offset) { | |
| 2432 | - setObjectDescriptionFromInput( | |
| 2433 | - object, context, object_description, input, offset); | |
| 2434 | - object.setParsedOffset(offset); | |
| 2435 | - } | |
| 2436 | - return object; | |
| 1947 | + return QPDFParser(input, object_description, tokenizer, decrypter, context) | |
| 1948 | + .parse(empty, false); | |
| 2437 | 1949 | } |
| 2438 | 1950 | |
| 2439 | 1951 | qpdf_offset_t | ... | ... |
libqpdf/QPDFParser.cc
0 โ 100644
| 1 | +#include <qpdf/QPDFParser.hh> | |
| 2 | + | |
| 3 | +#include <qpdf/QPDF.hh> | |
| 4 | +#include <qpdf/QPDFObjectHandle.hh> | |
| 5 | +#include <qpdf/QPDF_Array.hh> | |
| 6 | +#include <qpdf/QTC.hh> | |
| 7 | +#include <qpdf/QUtil.hh> | |
| 8 | +#include <qpdf/SparseOHArray.hh> | |
| 9 | + | |
| 10 | +QPDFObjectHandle | |
| 11 | +QPDFParser::parse(bool& empty, bool content_stream) | |
| 12 | +{ | |
| 13 | + // This method must take care not to resolve any objects. Don't | |
| 14 | + // check the type of any object without first ensuring that it is | |
| 15 | + // a direct object. Otherwise, doing so may have the side effect | |
| 16 | + // of reading the object and changing the file pointer. If you do | |
| 17 | + // this, it will cause a logic error to be thrown from | |
| 18 | + // QPDF::inParse(). | |
| 19 | + | |
| 20 | + QPDF::ParseGuard pg(context); | |
| 21 | + | |
| 22 | + empty = false; | |
| 23 | + | |
| 24 | + QPDFObjectHandle object; | |
| 25 | + bool set_offset = false; | |
| 26 | + | |
| 27 | + std::vector<SparseOHArray> olist_stack; | |
| 28 | + olist_stack.push_back(SparseOHArray()); | |
| 29 | + std::vector<parser_state_e> state_stack; | |
| 30 | + state_stack.push_back(st_top); | |
| 31 | + std::vector<qpdf_offset_t> offset_stack; | |
| 32 | + qpdf_offset_t offset = input->tell(); | |
| 33 | + offset_stack.push_back(offset); | |
| 34 | + bool done = false; | |
| 35 | + int bad_count = 0; | |
| 36 | + int good_count = 0; | |
| 37 | + bool b_contents = false; | |
| 38 | + std::vector<std::string> contents_string_stack; | |
| 39 | + contents_string_stack.push_back(""); | |
| 40 | + std::vector<qpdf_offset_t> contents_offset_stack; | |
| 41 | + contents_offset_stack.push_back(-1); | |
| 42 | + while (!done) { | |
| 43 | + bool bad = false; | |
| 44 | + SparseOHArray& olist = olist_stack.back(); | |
| 45 | + parser_state_e state = state_stack.back(); | |
| 46 | + offset = offset_stack.back(); | |
| 47 | + std::string& contents_string = contents_string_stack.back(); | |
| 48 | + qpdf_offset_t& contents_offset = contents_offset_stack.back(); | |
| 49 | + | |
| 50 | + object = QPDFObjectHandle(); | |
| 51 | + set_offset = false; | |
| 52 | + | |
| 53 | + QPDFTokenizer::Token token = | |
| 54 | + tokenizer.readToken(input, object_description, true); | |
| 55 | + std::string const& token_error_message = token.getErrorMessage(); | |
| 56 | + if (!token_error_message.empty()) { | |
| 57 | + // Tokens other than tt_bad can still generate warnings. | |
| 58 | + warn( | |
| 59 | + context, | |
| 60 | + QPDFExc( | |
| 61 | + qpdf_e_damaged_pdf, | |
| 62 | + input->getName(), | |
| 63 | + object_description, | |
| 64 | + input->getLastOffset(), | |
| 65 | + token_error_message)); | |
| 66 | + } | |
| 67 | + | |
| 68 | + switch (token.getType()) { | |
| 69 | + case QPDFTokenizer::tt_eof: | |
| 70 | + if (!content_stream) { | |
| 71 | + QTC::TC("qpdf", "QPDFParser eof in parse"); | |
| 72 | + warn( | |
| 73 | + context, | |
| 74 | + QPDFExc( | |
| 75 | + qpdf_e_damaged_pdf, | |
| 76 | + input->getName(), | |
| 77 | + object_description, | |
| 78 | + input->getLastOffset(), | |
| 79 | + "unexpected EOF")); | |
| 80 | + } | |
| 81 | + bad = true; | |
| 82 | + state = st_eof; | |
| 83 | + break; | |
| 84 | + | |
| 85 | + case QPDFTokenizer::tt_bad: | |
| 86 | + QTC::TC("qpdf", "QPDFParser bad token in parse"); | |
| 87 | + bad = true; | |
| 88 | + object = QPDFObjectHandle::newNull(); | |
| 89 | + break; | |
| 90 | + | |
| 91 | + case QPDFTokenizer::tt_brace_open: | |
| 92 | + case QPDFTokenizer::tt_brace_close: | |
| 93 | + QTC::TC("qpdf", "QPDFParser bad brace"); | |
| 94 | + warn( | |
| 95 | + context, | |
| 96 | + QPDFExc( | |
| 97 | + qpdf_e_damaged_pdf, | |
| 98 | + input->getName(), | |
| 99 | + object_description, | |
| 100 | + input->getLastOffset(), | |
| 101 | + "treating unexpected brace token as null")); | |
| 102 | + bad = true; | |
| 103 | + object = QPDFObjectHandle::newNull(); | |
| 104 | + break; | |
| 105 | + | |
| 106 | + case QPDFTokenizer::tt_array_close: | |
| 107 | + if (state == st_array) { | |
| 108 | + state = st_stop; | |
| 109 | + } else { | |
| 110 | + QTC::TC("qpdf", "QPDFParser bad array close"); | |
| 111 | + warn( | |
| 112 | + context, | |
| 113 | + QPDFExc( | |
| 114 | + qpdf_e_damaged_pdf, | |
| 115 | + input->getName(), | |
| 116 | + object_description, | |
| 117 | + input->getLastOffset(), | |
| 118 | + "treating unexpected array close token as null")); | |
| 119 | + bad = true; | |
| 120 | + object = QPDFObjectHandle::newNull(); | |
| 121 | + } | |
| 122 | + break; | |
| 123 | + | |
| 124 | + case QPDFTokenizer::tt_dict_close: | |
| 125 | + if (state == st_dictionary) { | |
| 126 | + state = st_stop; | |
| 127 | + } else { | |
| 128 | + QTC::TC("qpdf", "QPDFParser bad dictionary close"); | |
| 129 | + warn( | |
| 130 | + context, | |
| 131 | + QPDFExc( | |
| 132 | + qpdf_e_damaged_pdf, | |
| 133 | + input->getName(), | |
| 134 | + object_description, | |
| 135 | + input->getLastOffset(), | |
| 136 | + "unexpected dictionary close token")); | |
| 137 | + bad = true; | |
| 138 | + object = QPDFObjectHandle::newNull(); | |
| 139 | + } | |
| 140 | + break; | |
| 141 | + | |
| 142 | + case QPDFTokenizer::tt_array_open: | |
| 143 | + case QPDFTokenizer::tt_dict_open: | |
| 144 | + if (olist_stack.size() > 500) { | |
| 145 | + QTC::TC("qpdf", "QPDFParser too deep"); | |
| 146 | + warn( | |
| 147 | + context, | |
| 148 | + QPDFExc( | |
| 149 | + qpdf_e_damaged_pdf, | |
| 150 | + input->getName(), | |
| 151 | + object_description, | |
| 152 | + input->getLastOffset(), | |
| 153 | + "ignoring excessively deeply nested data structure")); | |
| 154 | + bad = true; | |
| 155 | + object = QPDFObjectHandle::newNull(); | |
| 156 | + state = st_top; | |
| 157 | + } else { | |
| 158 | + olist_stack.push_back(SparseOHArray()); | |
| 159 | + state = st_start; | |
| 160 | + offset_stack.push_back(input->tell()); | |
| 161 | + state_stack.push_back( | |
| 162 | + (token.getType() == QPDFTokenizer::tt_array_open) | |
| 163 | + ? st_array | |
| 164 | + : st_dictionary); | |
| 165 | + b_contents = false; | |
| 166 | + contents_string_stack.push_back(""); | |
| 167 | + contents_offset_stack.push_back(-1); | |
| 168 | + } | |
| 169 | + break; | |
| 170 | + | |
| 171 | + case QPDFTokenizer::tt_bool: | |
| 172 | + object = QPDFObjectHandle::newBool((token.getValue() == "true")); | |
| 173 | + break; | |
| 174 | + | |
| 175 | + case QPDFTokenizer::tt_null: | |
| 176 | + object = QPDFObjectHandle::newNull(); | |
| 177 | + break; | |
| 178 | + | |
| 179 | + case QPDFTokenizer::tt_integer: | |
| 180 | + object = QPDFObjectHandle::newInteger( | |
| 181 | + QUtil::string_to_ll(token.getValue().c_str())); | |
| 182 | + break; | |
| 183 | + | |
| 184 | + case QPDFTokenizer::tt_real: | |
| 185 | + object = QPDFObjectHandle::newReal(token.getValue()); | |
| 186 | + break; | |
| 187 | + | |
| 188 | + case QPDFTokenizer::tt_name: | |
| 189 | + { | |
| 190 | + std::string name = token.getValue(); | |
| 191 | + object = QPDFObjectHandle::newName(name); | |
| 192 | + | |
| 193 | + if (name == "/Contents") { | |
| 194 | + b_contents = true; | |
| 195 | + } else { | |
| 196 | + b_contents = false; | |
| 197 | + } | |
| 198 | + } | |
| 199 | + break; | |
| 200 | + | |
| 201 | + case QPDFTokenizer::tt_word: | |
| 202 | + { | |
| 203 | + std::string const& value = token.getValue(); | |
| 204 | + if (content_stream) { | |
| 205 | + object = QPDFObjectHandle::newOperator(value); | |
| 206 | + } else if ( | |
| 207 | + (value == "R") && (state != st_top) && | |
| 208 | + (olist.size() >= 2) && | |
| 209 | + (!olist.at(olist.size() - 1).isIndirect()) && | |
| 210 | + (olist.at(olist.size() - 1).isInteger()) && | |
| 211 | + (!olist.at(olist.size() - 2).isIndirect()) && | |
| 212 | + (olist.at(olist.size() - 2).isInteger())) { | |
| 213 | + if (context == nullptr) { | |
| 214 | + QTC::TC("qpdf", "QPDFParser indirect without context"); | |
| 215 | + throw std::logic_error( | |
| 216 | + "QPDFObjectHandle::parse called without context" | |
| 217 | + " on an object with indirect references"); | |
| 218 | + } | |
| 219 | + // Try to resolve indirect objects | |
| 220 | + object = QPDFObjectHandle::newIndirect( | |
| 221 | + context, | |
| 222 | + QPDFObjGen( | |
| 223 | + olist.at(olist.size() - 2).getIntValueAsInt(), | |
| 224 | + olist.at(olist.size() - 1).getIntValueAsInt())); | |
| 225 | + olist.remove_last(); | |
| 226 | + olist.remove_last(); | |
| 227 | + } else if ((value == "endobj") && (state == st_top)) { | |
| 228 | + // We just saw endobj without having read | |
| 229 | + // anything. Treat this as a null and do not move | |
| 230 | + // the input source's offset. | |
| 231 | + object = QPDFObjectHandle::newNull(); | |
| 232 | + input->seek(input->getLastOffset(), SEEK_SET); | |
| 233 | + empty = true; | |
| 234 | + } else { | |
| 235 | + QTC::TC("qpdf", "QPDFParser treat word as string"); | |
| 236 | + warn( | |
| 237 | + context, | |
| 238 | + QPDFExc( | |
| 239 | + qpdf_e_damaged_pdf, | |
| 240 | + input->getName(), | |
| 241 | + object_description, | |
| 242 | + input->getLastOffset(), | |
| 243 | + "unknown token while reading object;" | |
| 244 | + " treating as string")); | |
| 245 | + bad = true; | |
| 246 | + object = QPDFObjectHandle::newString(value); | |
| 247 | + } | |
| 248 | + } | |
| 249 | + break; | |
| 250 | + | |
| 251 | + case QPDFTokenizer::tt_string: | |
| 252 | + { | |
| 253 | + std::string val = token.getValue(); | |
| 254 | + if (decrypter) { | |
| 255 | + if (b_contents) { | |
| 256 | + contents_string = val; | |
| 257 | + contents_offset = input->getLastOffset(); | |
| 258 | + b_contents = false; | |
| 259 | + } | |
| 260 | + decrypter->decryptString(val); | |
| 261 | + } | |
| 262 | + object = QPDFObjectHandle::newString(val); | |
| 263 | + } | |
| 264 | + | |
| 265 | + break; | |
| 266 | + | |
| 267 | + default: | |
| 268 | + warn( | |
| 269 | + context, | |
| 270 | + QPDFExc( | |
| 271 | + qpdf_e_damaged_pdf, | |
| 272 | + input->getName(), | |
| 273 | + object_description, | |
| 274 | + input->getLastOffset(), | |
| 275 | + "treating unknown token type as null while " | |
| 276 | + "reading object")); | |
| 277 | + bad = true; | |
| 278 | + object = QPDFObjectHandle::newNull(); | |
| 279 | + break; | |
| 280 | + } | |
| 281 | + | |
| 282 | + if ((!object.isInitialized()) && | |
| 283 | + (!((state == st_start) || (state == st_stop) || | |
| 284 | + (state == st_eof)))) { | |
| 285 | + throw std::logic_error("QPDFObjectHandle::parseInternal: " | |
| 286 | + "unexpected uninitialized object"); | |
| 287 | + object = QPDFObjectHandle::newNull(); | |
| 288 | + } | |
| 289 | + | |
| 290 | + if (bad) { | |
| 291 | + ++bad_count; | |
| 292 | + good_count = 0; | |
| 293 | + } else { | |
| 294 | + ++good_count; | |
| 295 | + if (good_count > 3) { | |
| 296 | + bad_count = 0; | |
| 297 | + } | |
| 298 | + } | |
| 299 | + if (bad_count > 5) { | |
| 300 | + // We had too many consecutive errors without enough | |
| 301 | + // intervening successful objects. Give up. | |
| 302 | + warn( | |
| 303 | + context, | |
| 304 | + QPDFExc( | |
| 305 | + qpdf_e_damaged_pdf, | |
| 306 | + input->getName(), | |
| 307 | + object_description, | |
| 308 | + input->getLastOffset(), | |
| 309 | + "too many errors; giving up on reading object")); | |
| 310 | + state = st_top; | |
| 311 | + object = QPDFObjectHandle::newNull(); | |
| 312 | + } | |
| 313 | + | |
| 314 | + switch (state) { | |
| 315 | + case st_eof: | |
| 316 | + if (state_stack.size() > 1) { | |
| 317 | + warn( | |
| 318 | + context, | |
| 319 | + QPDFExc( | |
| 320 | + qpdf_e_damaged_pdf, | |
| 321 | + input->getName(), | |
| 322 | + object_description, | |
| 323 | + input->getLastOffset(), | |
| 324 | + "parse error while reading object")); | |
| 325 | + } | |
| 326 | + done = true; | |
| 327 | + // In content stream mode, leave object uninitialized to | |
| 328 | + // indicate EOF | |
| 329 | + if (!content_stream) { | |
| 330 | + object = QPDFObjectHandle::newNull(); | |
| 331 | + } | |
| 332 | + break; | |
| 333 | + | |
| 334 | + case st_dictionary: | |
| 335 | + case st_array: | |
| 336 | + QPDFObjectHandle::setObjectDescriptionFromInput( | |
| 337 | + object, | |
| 338 | + context, | |
| 339 | + object_description, | |
| 340 | + input, | |
| 341 | + input->getLastOffset()); | |
| 342 | + object.setParsedOffset(input->getLastOffset()); | |
| 343 | + set_offset = true; | |
| 344 | + olist.append(object); | |
| 345 | + break; | |
| 346 | + | |
| 347 | + case st_top: | |
| 348 | + done = true; | |
| 349 | + break; | |
| 350 | + | |
| 351 | + case st_start: | |
| 352 | + break; | |
| 353 | + | |
| 354 | + case st_stop: | |
| 355 | + if ((state_stack.size() < 2) || (olist_stack.size() < 2)) { | |
| 356 | + throw std::logic_error( | |
| 357 | + "QPDFObjectHandle::parseInternal: st_stop encountered" | |
| 358 | + " with insufficient elements in stack"); | |
| 359 | + } | |
| 360 | + parser_state_e old_state = state_stack.back(); | |
| 361 | + state_stack.pop_back(); | |
| 362 | + if (old_state == st_array) { | |
| 363 | + // There's no newArray(SparseOHArray) since | |
| 364 | + // SparseOHArray is not part of the public API. | |
| 365 | + object = QPDFObjectHandle(QPDF_Array::create(olist)); | |
| 366 | + QPDFObjectHandle::setObjectDescriptionFromInput( | |
| 367 | + object, context, object_description, input, offset); | |
| 368 | + // The `offset` points to the next of "[". Set the | |
| 369 | + // rewind offset to point to the beginning of "[". | |
| 370 | + // This has been explicitly tested with whitespace | |
| 371 | + // surrounding the array start delimiter. | |
| 372 | + // getLastOffset points to the array end token and | |
| 373 | + // therefore can't be used here. | |
| 374 | + object.setParsedOffset(offset - 1); | |
| 375 | + set_offset = true; | |
| 376 | + } else if (old_state == st_dictionary) { | |
| 377 | + // Convert list to map. Alternating elements are keys. | |
| 378 | + // Attempt to recover more or less gracefully from | |
| 379 | + // invalid dictionaries. | |
| 380 | + std::set<std::string> names; | |
| 381 | + size_t n_elements = olist.size(); | |
| 382 | + for (size_t i = 0; i < n_elements; ++i) { | |
| 383 | + QPDFObjectHandle oh = olist.at(i); | |
| 384 | + if ((!oh.isIndirect()) && oh.isName()) { | |
| 385 | + names.insert(oh.getName()); | |
| 386 | + } | |
| 387 | + } | |
| 388 | + | |
| 389 | + std::map<std::string, QPDFObjectHandle> dict; | |
| 390 | + int next_fake_key = 1; | |
| 391 | + for (unsigned int i = 0; i < olist.size(); ++i) { | |
| 392 | + QPDFObjectHandle key_obj = olist.at(i); | |
| 393 | + QPDFObjectHandle val; | |
| 394 | + if (key_obj.isIndirect() || (!key_obj.isName())) { | |
| 395 | + bool found_fake = false; | |
| 396 | + std::string candidate; | |
| 397 | + while (!found_fake) { | |
| 398 | + candidate = "/QPDFFake" + | |
| 399 | + QUtil::int_to_string(next_fake_key++); | |
| 400 | + found_fake = (names.count(candidate) == 0); | |
| 401 | + QTC::TC( | |
| 402 | + "qpdf", | |
| 403 | + "QPDFParser found fake", | |
| 404 | + (found_fake ? 0 : 1)); | |
| 405 | + } | |
| 406 | + warn( | |
| 407 | + context, | |
| 408 | + QPDFExc( | |
| 409 | + qpdf_e_damaged_pdf, | |
| 410 | + input->getName(), | |
| 411 | + object_description, | |
| 412 | + offset, | |
| 413 | + "expected dictionary key but found" | |
| 414 | + " non-name object; inserting key " + | |
| 415 | + candidate)); | |
| 416 | + val = key_obj; | |
| 417 | + key_obj = QPDFObjectHandle::newName(candidate); | |
| 418 | + } else if (i + 1 >= olist.size()) { | |
| 419 | + QTC::TC("qpdf", "QPDFParser no val for last key"); | |
| 420 | + warn( | |
| 421 | + context, | |
| 422 | + QPDFExc( | |
| 423 | + qpdf_e_damaged_pdf, | |
| 424 | + input->getName(), | |
| 425 | + object_description, | |
| 426 | + offset, | |
| 427 | + "dictionary ended prematurely; " | |
| 428 | + "using null as value for last key")); | |
| 429 | + val = QPDFObjectHandle::newNull(); | |
| 430 | + QPDFObjectHandle::setObjectDescriptionFromInput( | |
| 431 | + val, context, object_description, input, offset); | |
| 432 | + } else { | |
| 433 | + val = olist.at(++i); | |
| 434 | + } | |
| 435 | + std::string key = key_obj.getName(); | |
| 436 | + if (dict.count(key) > 0) { | |
| 437 | + QTC::TC("qpdf", "QPDFParser duplicate dict key"); | |
| 438 | + warn( | |
| 439 | + context, | |
| 440 | + QPDFExc( | |
| 441 | + qpdf_e_damaged_pdf, | |
| 442 | + input->getName(), | |
| 443 | + object_description, | |
| 444 | + offset, | |
| 445 | + "dictionary has duplicated key " + key + | |
| 446 | + "; last occurrence overrides earlier " | |
| 447 | + "ones")); | |
| 448 | + } | |
| 449 | + dict[key] = val; | |
| 450 | + } | |
| 451 | + if (!contents_string.empty() && dict.count("/Type") && | |
| 452 | + dict["/Type"].isNameAndEquals("/Sig") && | |
| 453 | + dict.count("/ByteRange") && dict.count("/Contents") && | |
| 454 | + dict["/Contents"].isString()) { | |
| 455 | + dict["/Contents"] = | |
| 456 | + QPDFObjectHandle::newString(contents_string); | |
| 457 | + dict["/Contents"].setParsedOffset(contents_offset); | |
| 458 | + } | |
| 459 | + object = QPDFObjectHandle::newDictionary(dict); | |
| 460 | + QPDFObjectHandle::setObjectDescriptionFromInput( | |
| 461 | + object, context, object_description, input, offset); | |
| 462 | + // The `offset` points to the next of "<<". Set the | |
| 463 | + // rewind offset to point to the beginning of "<<". | |
| 464 | + // This has been explicitly tested with whitespace | |
| 465 | + // surrounding the dictionary start delimiter. | |
| 466 | + // getLastOffset points to the dictionary end token | |
| 467 | + // and therefore can't be used here. | |
| 468 | + object.setParsedOffset(offset - 2); | |
| 469 | + set_offset = true; | |
| 470 | + } | |
| 471 | + olist_stack.pop_back(); | |
| 472 | + offset_stack.pop_back(); | |
| 473 | + if (state_stack.back() == st_top) { | |
| 474 | + done = true; | |
| 475 | + } else { | |
| 476 | + olist_stack.back().append(object); | |
| 477 | + } | |
| 478 | + contents_string_stack.pop_back(); | |
| 479 | + contents_offset_stack.pop_back(); | |
| 480 | + } | |
| 481 | + } | |
| 482 | + | |
| 483 | + if (!set_offset) { | |
| 484 | + QPDFObjectHandle::setObjectDescriptionFromInput( | |
| 485 | + object, context, object_description, input, offset); | |
| 486 | + object.setParsedOffset(offset); | |
| 487 | + } | |
| 488 | + return object; | |
| 489 | +} | |
| 490 | + | |
| 491 | +void | |
| 492 | +QPDFParser::warn(QPDF* qpdf, QPDFExc const& e) | |
| 493 | +{ | |
| 494 | + // If parsing on behalf of a QPDF object and want to give a | |
| 495 | + // warning, we can warn through the object. If parsing for some | |
| 496 | + // other reason, such as an explicit creation of an object from a | |
| 497 | + // string, then just throw the exception. | |
| 498 | + if (qpdf) { | |
| 499 | + qpdf->warn(e); | |
| 500 | + } else { | |
| 501 | + throw e; | |
| 502 | + } | |
| 503 | +} | ... | ... |
libqpdf/qpdf/QPDFParser.hh
0 โ 100644
| 1 | +#ifndef QPDFPARSER_HH | |
| 2 | +#define QPDFPARSER_HH | |
| 3 | + | |
| 4 | +#include <qpdf/QPDFObjectHandle.hh> | |
| 5 | + | |
| 6 | +#include <memory> | |
| 7 | +#include <string> | |
| 8 | + | |
| 9 | +class QPDFParser | |
| 10 | +{ | |
| 11 | + public: | |
| 12 | + QPDFParser() = delete; | |
| 13 | + QPDFParser( | |
| 14 | + std::shared_ptr<InputSource> input, | |
| 15 | + std::string const& object_description, | |
| 16 | + QPDFTokenizer& tokenizer, | |
| 17 | + QPDFObjectHandle::StringDecrypter* decrypter, | |
| 18 | + QPDF* context) : | |
| 19 | + input(input), | |
| 20 | + object_description(object_description), | |
| 21 | + tokenizer(tokenizer), | |
| 22 | + decrypter(decrypter), | |
| 23 | + context(context) | |
| 24 | + { | |
| 25 | + } | |
| 26 | + virtual ~QPDFParser() = default; | |
| 27 | + | |
| 28 | + QPDFObjectHandle parse(bool& empty, bool content_stream); | |
| 29 | + | |
| 30 | + private: | |
| 31 | + enum parser_state_e { | |
| 32 | + st_top, | |
| 33 | + st_start, | |
| 34 | + st_stop, | |
| 35 | + st_eof, | |
| 36 | + st_dictionary, | |
| 37 | + st_array | |
| 38 | + }; | |
| 39 | + | |
| 40 | + static void warn(QPDF*, QPDFExc const&); | |
| 41 | + void setParsedOffset(qpdf_offset_t offset); | |
| 42 | + | |
| 43 | + std::shared_ptr<InputSource> input; | |
| 44 | + std::string const& object_description; | |
| 45 | + QPDFTokenizer& tokenizer; | |
| 46 | + QPDFObjectHandle::StringDecrypter* decrypter; | |
| 47 | + QPDF* context; | |
| 48 | +}; | |
| 49 | + | |
| 50 | +#endif // QPDFPARSER_HH | ... | ... |
qpdf/qpdf.testcov
| ... | ... | @@ -56,12 +56,12 @@ QPDF missing trailer 0 |
| 56 | 56 | QPDF trailer lacks size 0 |
| 57 | 57 | QPDF trailer size not integer 0 |
| 58 | 58 | QPDF trailer prev not integer 0 |
| 59 | -QPDFObjectHandle bad brace 0 | |
| 60 | -QPDFObjectHandle bad array close 0 | |
| 59 | +QPDFParser bad brace 0 | |
| 60 | +QPDFParser bad array close 0 | |
| 61 | 61 | QPDF stream without length 0 |
| 62 | 62 | QPDF stream length not integer 0 |
| 63 | 63 | QPDF missing endstream 0 |
| 64 | -QPDFObjectHandle bad dictionary close 0 | |
| 64 | +QPDFParser bad dictionary close 0 | |
| 65 | 65 | QPDF can't find xref 0 |
| 66 | 66 | QPDFTokenizer bad ) 0 |
| 67 | 67 | QPDFTokenizer bad > 0 |
| ... | ... | @@ -215,7 +215,7 @@ QPDF not copying pages object 0 |
| 215 | 215 | QPDF insert foreign page 0 |
| 216 | 216 | QPDFWriter foreign object 0 |
| 217 | 217 | QPDFWriter copy use_aes 1 |
| 218 | -QPDFObjectHandle indirect without context 0 | |
| 218 | +QPDFParser indirect without context 0 | |
| 219 | 219 | QPDFObjectHandle trailing data in parse 0 |
| 220 | 220 | QPDFJob pages encryption password 0 |
| 221 | 221 | QPDFTokenizer EOF reading token 0 |
| ... | ... | @@ -257,9 +257,9 @@ qpdf-c called qpdf_set_deterministic_ID 0 |
| 257 | 257 | QPDFObjectHandle indirect with 0 objid 0 |
| 258 | 258 | QPDF object id 0 0 |
| 259 | 259 | QPDF recursion loop in resolve 0 |
| 260 | -QPDFObjectHandle treat word as string 0 | |
| 261 | -QPDFObjectHandle found fake 1 | |
| 262 | -QPDFObjectHandle no val for last key 0 | |
| 260 | +QPDFParser treat word as string 0 | |
| 261 | +QPDFParser found fake 1 | |
| 262 | +QPDFParser no val for last key 0 | |
| 263 | 263 | QPDF resolve failure to null 0 |
| 264 | 264 | QPDFWriter preserve unreferenced standard 0 |
| 265 | 265 | QPDFObjectHandle errors in parsecontent 0 |
| ... | ... | @@ -288,8 +288,8 @@ QPDFObjectHandle non-stream in stream array 0 |
| 288 | 288 | QPDFObjectHandle coalesce called on stream 0 |
| 289 | 289 | QPDFObjectHandle coalesce provide stream data 0 |
| 290 | 290 | QPDF_Stream bad token at end during normalize 0 |
| 291 | -QPDFObjectHandle bad token in parse 0 | |
| 292 | -QPDFObjectHandle eof in parseInternal 0 | |
| 291 | +QPDFParser bad token in parse 0 | |
| 292 | +QPDFParser eof in parse 0 | |
| 293 | 293 | QPDFObjectHandle array bounds 0 |
| 294 | 294 | QPDFObjectHandle boolean returning false 0 |
| 295 | 295 | QPDFObjectHandle integer returning 0 0 |
| ... | ... | @@ -317,7 +317,7 @@ QPDFObjectHandle numeric non-numeric 0 |
| 317 | 317 | QPDFObjectHandle erase array bounds 0 |
| 318 | 318 | qpdf-c called qpdf_check_pdf 0 |
| 319 | 319 | QPDF xref loop 0 |
| 320 | -QPDFObjectHandle too deep 0 | |
| 320 | +QPDFParser too deep 0 | |
| 321 | 321 | QPDFFormFieldObjectHelper non-trivial inheritance 0 |
| 322 | 322 | QPDFFormFieldObjectHelper non-trivial qualified name 0 |
| 323 | 323 | QPDFFormFieldObjectHelper TU present 0 |
| ... | ... | @@ -428,7 +428,7 @@ QPDF eof skipping spaces before xref 1 |
| 428 | 428 | QPDF_encryption user matches owner V < 5 0 |
| 429 | 429 | QPDF_encryption same password 1 |
| 430 | 430 | QPDFWriter stream in ostream 0 |
| 431 | -QPDFObjectHandle duplicate dict key 0 | |
| 431 | +QPDFParser duplicate dict key 0 | |
| 432 | 432 | QPDFWriter no encryption sig contents 0 |
| 433 | 433 | QPDFPageObjectHelper colorspace lookup 0 |
| 434 | 434 | QPDFWriter ignore XRef in qdf mode 0 | ... | ... |