Pl_QPDFTokenizer.cc
1.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#include <qpdf/Pl_QPDFTokenizer.hh>
#include <qpdf/BufferInputSource.hh>
#include <qpdf/QTC.hh>
#include <stdexcept>
class Pl_QPDFTokenizer::Members
{
public:
Members() = default;
Members(Members const&) = delete;
~Members() = default;
QPDFObjectHandle::TokenFilter* filter{nullptr};
QPDFTokenizer tokenizer;
Pl_Buffer buf{"tokenizer buffer"};
};
Pl_QPDFTokenizer::Pl_QPDFTokenizer(
char const* identifier, QPDFObjectHandle::TokenFilter* filter, Pipeline* next) :
Pipeline(identifier, next),
m(std::make_unique<Members>())
{
m->filter = filter;
QPDFObjectHandle::TokenFilter::PipelineAccessor::setPipeline(m->filter, next);
m->tokenizer.allowEOF();
m->tokenizer.includeIgnorable();
}
// Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer
Pl_QPDFTokenizer::~Pl_QPDFTokenizer() = default;
void
Pl_QPDFTokenizer::write(unsigned char const* data, size_t len)
{
m->buf.write(data, len);
}
void
Pl_QPDFTokenizer::finish()
{
m->buf.finish();
auto input = BufferInputSource("tokenizer data", m->buf.getBuffer(), true);
std::string empty;
while (true) {
auto token = m->tokenizer.readToken(input, empty, true);
m->filter->handleToken(token);
if (token.getType() == QPDFTokenizer::tt_eof) {
break;
} else if (token.isWord("ID")) {
// Read the space after the ID.
char ch = ' ';
input.read(&ch, 1);
m->filter->handleToken(
// line-break
QPDFTokenizer::Token(QPDFTokenizer::tt_space, std::string(1, ch)));
QTC::TC("qpdf", "Pl_QPDFTokenizer found ID");
m->tokenizer.expectInlineImage(input);
}
}
m->filter->handleEOF();
QPDFObjectHandle::TokenFilter::PipelineAccessor::setPipeline(m->filter, nullptr);
if (next()) {
next()->finish();
}
}