Commit 54e379d085866c4e95fc359792b96b6a6764211b

Authored by m-holger
Committed by GitHub
2 parents a4b87050 f54d98ba

Merge pull request #1305 from m-holger/is

Refactor InputSource
CMakeLists.txt
... ... @@ -7,7 +7,7 @@ cmake_minimum_required(VERSION 3.16)
7 7 # also find the version number here. generate_auto_job also reads the
8 8 # version from here.
9 9 project(qpdf
10   - VERSION 12.0.0
  10 + VERSION 12.0.1
11 11 LANGUAGES C CXX)
12 12  
13 13 # Enable correct rpath handling for MacOSX
... ...
include/qpdf/DLL.h
... ... @@ -27,12 +27,12 @@
27 27 /* The first version of qpdf to include the version constants is 10.6.0. */
28 28 #define QPDF_MAJOR_VERSION 12
29 29 #define QPDF_MINOR_VERSION 0
30   -#define QPDF_PATCH_VERSION 0
  30 +#define QPDF_PATCH_VERSION 1
31 31  
32 32 #ifdef QPDF_FUTURE
33   -# define QPDF_VERSION "12.0.0+future"
  33 +# define QPDF_VERSION "12.0.1+future"
34 34 #else
35   -# define QPDF_VERSION "12.0.0"
  35 +# define QPDF_VERSION "12.0.1"
36 36 #endif
37 37  
38 38 /*
... ...
include/qpdf/InputSource.hh
... ... @@ -75,7 +75,11 @@ class QPDF_DLL_CLASS InputSource
75 75 // semantically equivalent to seek(-1, SEEK_CUR) but is much more efficient.
76 76 virtual void unreadCh(char ch) = 0;
77 77  
78   - // The following methods are for use by QPDFTokenizer
  78 + // The following methods are for internal use by qpdf only.
  79 + inline size_t read(std::string& str, size_t count, qpdf_offset_t at = -1);
  80 + inline std::string read(size_t count, qpdf_offset_t at = -1);
  81 + size_t read_line(std::string& str, size_t count, qpdf_offset_t at = -1);
  82 + std::string read_line(size_t count, qpdf_offset_t at = -1);
79 83 inline qpdf_offset_t fastTell();
80 84 inline bool fastRead(char&);
81 85 inline void fastUnread(bool);
... ... @@ -93,57 +97,4 @@ class QPDF_DLL_CLASS InputSource
93 97 qpdf_offset_t buf_start = 0;
94 98 };
95 99  
96   -inline void
97   -InputSource::loadBuffer()
98   -{
99   - this->buf_idx = 0;
100   - this->buf_len = qpdf_offset_t(read(this->buffer, this->buf_size));
101   - // NB read sets last_offset
102   - this->buf_start = this->last_offset;
103   -}
104   -
105   -inline qpdf_offset_t
106   -InputSource::fastTell()
107   -{
108   - if (this->buf_len == 0) {
109   - loadBuffer();
110   - } else {
111   - auto curr = tell();
112   - if (curr < this->buf_start || curr >= (this->buf_start + this->buf_len)) {
113   - loadBuffer();
114   - } else {
115   - this->last_offset = curr;
116   - this->buf_idx = curr - this->buf_start;
117   - }
118   - }
119   - return this->last_offset;
120   -}
121   -
122   -inline bool
123   -InputSource::fastRead(char& ch)
124   -{
125   - // Before calling fastRead, fastTell must be called to prepare the buffer. Once reading is
126   - // complete, fastUnread must be called to set the correct file position.
127   - if (this->buf_idx < this->buf_len) {
128   - ch = this->buffer[this->buf_idx];
129   - ++(this->buf_idx);
130   - ++(this->last_offset);
131   - return true;
132   -
133   - } else if (this->buf_len == 0) {
134   - return false;
135   - } else {
136   - seek(this->buf_start + this->buf_len, SEEK_SET);
137   - fastTell();
138   - return fastRead(ch);
139   - }
140   -}
141   -
142   -inline void
143   -InputSource::fastUnread(bool back)
144   -{
145   - this->last_offset -= back ? 1 : 0;
146   - seek(this->last_offset, SEEK_SET);
147   -}
148   -
149 100 #endif // QPDF_INPUTSOURCE_HH
... ...
job.sums
1 1 # Generated by generate_auto_job
2   -CMakeLists.txt 88e8974a8b14e10c941a4bb04ff078c3d3063b98af3ea056e02b1dcdff783d22
  2 +CMakeLists.txt 7779469688d17b58dfe69f2af4e5627eb20defd72a95ca71f2ecee68f1ec6d97
3 3 generate_auto_job f64733b79dcee5a0e3e8ccc6976448e8ddf0e8b6529987a66a7d3ab2ebc10a86
4 4 include/qpdf/auto_job_c_att.hh 4c2b171ea00531db54720bf49a43f8b34481586ae7fb6cbf225099ee42bc5bb4
5 5 include/qpdf/auto_job_c_copy_att.hh 50609012bff14fd82f0649185940d617d05d530cdc522185c7f3920a561ccb42
... ... @@ -16,5 +16,5 @@ libqpdf/qpdf/auto_job_json_init.hh 344c2fb473f88fe829c93b1efe6c70a0e4796537b8eb3
16 16 libqpdf/qpdf/auto_job_schema.hh 6d3eef5137b8828eaa301a1b3cf75cb7bb812aa6e2d8301de865b42d238d7a7c
17 17 manual/_ext/qpdf.py 6add6321666031d55ed4aedf7c00e5662bba856dfcd66ccb526563bffefbb580
18 18 manual/cli.rst 67357688f9a52fafa9a4f231fe4ce74c3cd8977130da7501efe54439a1ee22d4
19   -manual/qpdf.1 78bad33f9b3f246f1800bce365f7be06d3545d89f08b8923dd8489031b5af43e
  19 +manual/qpdf.1 dbcc567623f1fa080743ae9bc32b6264a3b6bd3074c81c438e52ca328e94ecd7
20 20 manual/qpdf.1.in 436ecc85d45c4c9e2dbd1725fb7f0177fb627179469f114561adf3cb6cbb677b
... ...
libqpdf/InputSource.cc
1   -#include <qpdf/InputSource.hh>
  1 +#include <qpdf/InputSource_private.hh>
2 2  
3 3 #include <qpdf/QIntC.hh>
4 4 #include <qpdf/QTC.hh>
5 5 #include <cstring>
6 6 #include <stdexcept>
7 7  
  8 +using namespace std::literals;
  9 +
8 10 void
9 11 InputSource::setLastOffset(qpdf_offset_t offset)
10 12 {
... ... @@ -17,27 +19,42 @@ InputSource::getLastOffset() const
17 19 return this->last_offset;
18 20 }
19 21  
20   -std::string
21   -InputSource::readLine(size_t max_line_length)
  22 +size_t
  23 +InputSource::read_line(std::string& str, size_t count, qpdf_offset_t at)
22 24 {
23 25 // Return at most max_line_length characters from the next line. Lines are terminated by one or
24 26 // more \r or \n characters. Consume the trailing newline characters but don't return them.
25 27 // After this is called, the file will be positioned after a line terminator or at the end of
26 28 // the file, and last_offset will point to position the file had when this method was called.
27 29  
28   - qpdf_offset_t offset = this->tell();
29   - auto bp = std::make_unique<char[]>(max_line_length + 1);
30   - char* buf = bp.get();
31   - memset(buf, '\0', max_line_length + 1);
32   - this->read(buf, max_line_length);
33   - this->seek(offset, SEEK_SET);
34   - qpdf_offset_t eol = this->findAndSkipNextEOL();
35   - this->last_offset = offset;
36   - size_t line_length = QIntC::to_size(eol - offset);
37   - if (line_length < max_line_length) {
38   - buf[line_length] = '\0';
  30 + read(str, count, at);
  31 + auto eol = str.find_first_of("\n\r"sv);
  32 + if (eol != std::string::npos) {
  33 + auto next_line = str.find_first_not_of("\n\r"sv, eol);
  34 + str.resize(eol);
  35 + if (eol != std::string::npos) {
  36 + seek(last_offset + static_cast<qpdf_offset_t>(next_line), SEEK_SET);
  37 + return eol;
  38 + }
39 39 }
40   - return {buf};
  40 + // We did not necessarily find the end of the trailing newline sequence.
  41 + seek(last_offset, SEEK_SET);
  42 + findAndSkipNextEOL();
  43 + return eol;
  44 +}
  45 +
  46 +std::string
  47 +InputSource::readLine(size_t max_line_length)
  48 +{
  49 + return read_line(max_line_length);
  50 +}
  51 +
  52 +inline std::string
  53 +InputSource::read_line(size_t count, qpdf_offset_t at)
  54 +{
  55 + std::string result(count, '\0');
  56 + read_line(result, count, at);
  57 + return result;
41 58 }
42 59  
43 60 bool
... ...
libqpdf/QPDF.cc
... ... @@ -13,6 +13,7 @@
13 13  
14 14 #include <qpdf/BufferInputSource.hh>
15 15 #include <qpdf/FileInputSource.hh>
  16 +#include <qpdf/InputSource_private.hh>
16 17 #include <qpdf/OffsetInputSource.hh>
17 18 #include <qpdf/Pipeline.hh>
18 19 #include <qpdf/QPDFExc.hh>
... ... @@ -2761,12 +2762,12 @@ QPDF::pipeStreamData(
2761 2762  
2762 2763 bool attempted_finish = false;
2763 2764 try {
2764   - file->seek(offset, SEEK_SET);
2765   - auto buf = std::make_unique<char[]>(length);
2766   - if (auto read = file->read(buf.get(), length); read != length) {
2767   - throw damagedPDF(*file, "", offset + toO(read), "unexpected EOF reading stream data");
  2765 + auto buf = file->read(length, offset);
  2766 + if (buf.size() != length) {
  2767 + throw damagedPDF(
  2768 + *file, "", offset + toO(buf.size()), "unexpected EOF reading stream data");
2768 2769 }
2769   - pipeline->write(buf.get(), length);
  2770 + pipeline->write(buf.data(), length);
2770 2771 attempted_finish = true;
2771 2772 pipeline->finish();
2772 2773 return true;
... ...
libqpdf/QPDFTokenizer.cc
... ... @@ -3,6 +3,7 @@
3 3 // DO NOT USE ctype -- it is locale dependent for some things, and it's not worth the risk of
4 4 // including it in case it may accidentally be used.
5 5  
  6 +#include <qpdf/InputSource_private.hh>
6 7 #include <qpdf/QIntC.hh>
7 8 #include <qpdf/QPDFExc.hh>
8 9 #include <qpdf/QPDFObjectHandle.hh>
... ...
libqpdf/QPDF_linearization.cc
... ... @@ -4,6 +4,7 @@
4 4  
5 5 #include <qpdf/BitStream.hh>
6 6 #include <qpdf/BitWriter.hh>
  7 +#include <qpdf/InputSource_private.hh>
7 8 #include <qpdf/Pl_Buffer.hh>
8 9 #include <qpdf/Pl_Count.hh>
9 10 #include <qpdf/Pl_Flate.hh>
... ... @@ -19,6 +20,7 @@
19 20 #include <cstring>
20 21  
21 22 using namespace qpdf;
  23 +using namespace std::literals;
22 24  
23 25 template <class T, class int_type>
24 26 static void
... ... @@ -96,68 +98,52 @@ QPDF::isLinearized()
96 98  
97 99 // The PDF spec says the linearization dictionary must be completely contained within the first
98 100 // 1024 bytes of the file. Add a byte for a null terminator.
99   - static int const tbuf_size = 1025;
100   -
101   - auto b = std::make_unique<char[]>(tbuf_size);
102   - char* buf = b.get();
103   - m->file->seek(0, SEEK_SET);
104   - memset(buf, '\0', tbuf_size);
105   - m->file->read(buf, tbuf_size - 1);
106   -
107   - int lindict_obj = -1;
108   - char* p = buf;
109   - while (lindict_obj == -1) {
  101 + auto buffer = m->file->read(1024, 0);
  102 + size_t pos = 0;
  103 + while (true) {
110 104 // Find a digit or end of buffer
111   - while (((p - buf) < tbuf_size) && (!util::is_digit(*p))) {
112   - ++p;
113   - }
114   - if (p - buf == tbuf_size) {
115   - break;
  105 + pos = buffer.find_first_of("0123456789"sv, pos);
  106 + if (pos == std::string::npos) {
  107 + return false;
116 108 }
117 109 // Seek to the digit. Then skip over digits for a potential
118 110 // next iteration.
119   - m->file->seek(p - buf, SEEK_SET);
120   - while (((p - buf) < tbuf_size) && util::is_digit(*p)) {
121   - ++p;
  111 + m->file->seek(toO(pos), SEEK_SET);
  112 +
  113 + auto t1 = readToken(*m->file, 20);
  114 + if (!(t1.isInteger() && readToken(*m->file, 6).isInteger() &&
  115 + readToken(*m->file, 4).isWord("obj"))) {
  116 + pos = buffer.find_first_not_of("0123456789"sv, pos);
  117 + if (pos == std::string::npos) {
  118 + return false;
  119 + }
  120 + continue;
122 121 }
123 122  
124   - QPDFTokenizer::Token t1 = readToken(*m->file);
125   - if (t1.isInteger() && readToken(*m->file).isInteger() &&
126   - readToken(*m->file).isWord("obj") &&
127   - readToken(*m->file).getType() == QPDFTokenizer::tt_dict_open) {
128   - lindict_obj = toI(QUtil::string_to_ll(t1.getValue().c_str()));
  123 + auto candidate = getObject(toI(QUtil::string_to_ll(t1.getValue().data())), 0);
  124 + if (!candidate.isDictionary()) {
  125 + return false;
129 126 }
130   - }
131   -
132   - if (lindict_obj <= 0) {
133   - return false;
134   - }
135 127  
136   - auto candidate = getObjectByID(lindict_obj, 0);
137   - if (!candidate.isDictionary()) {
138   - return false;
139   - }
140   -
141   - QPDFObjectHandle linkey = candidate.getKey("/Linearized");
142   - if (!(linkey.isNumber() && (toI(floor(linkey.getNumericValue())) == 1))) {
143   - return false;
144   - }
  128 + auto linkey = candidate.getKey("/Linearized");
  129 + if (!(linkey.isNumber() && toI(floor(linkey.getNumericValue())) == 1)) {
  130 + return false;
  131 + }
145 132  
146   - QPDFObjectHandle L = candidate.getKey("/L");
147   - if (L.isInteger()) {
  133 + auto L = candidate.getKey("/L");
  134 + if (!L.isInteger()) {
  135 + return false;
  136 + }
148 137 qpdf_offset_t Li = L.getIntValue();
149 138 m->file->seek(0, SEEK_END);
150 139 if (Li != m->file->tell()) {
151 140 QTC::TC("qpdf", "QPDF /L mismatch");
152 141 return false;
153   - } else {
154   - m->linp.file_size = Li;
155 142 }
  143 + m->linp.file_size = Li;
  144 + m->lindict = candidate;
  145 + return true;
156 146 }
157   -
158   - m->lindict = candidate;
159   -
160   - return true;
161 147 }
162 148  
163 149 void
... ... @@ -548,7 +534,7 @@ QPDF::maxEnd(ObjUser const&amp; ou)
548 534 }
549 535 qpdf_offset_t end = 0;
550 536 for (auto const& og: m->obj_user_to_objects[ou]) {
551   - if (m->obj_cache.count(og) == 0) {
  537 + if (!m->obj_cache.count(og)) {
552 538 stopOnError("unknown object referenced in object user table");
553 539 }
554 540 end = std::max(end, m->obj_cache[og].end_after_space);
... ...
libqpdf/qpdf/InputSource_private.hh 0 โ†’ 100644
  1 +#ifndef QPDF_INPUTSOURCE_PRIVATE_HH
  2 +#define QPDF_INPUTSOURCE_PRIVATE_HH
  3 +
  4 +#include <qpdf/InputSource.hh>
  5 +
  6 +inline size_t
  7 +InputSource::read(std::string& str, size_t count, qpdf_offset_t at)
  8 +{
  9 + if (at >= 0) {
  10 + seek(at, SEEK_SET);
  11 + }
  12 + str.resize(count);
  13 + str.resize(read(str.data(), count));
  14 + return str.size();
  15 +}
  16 +
  17 +inline std::string
  18 +InputSource::read(size_t count, qpdf_offset_t at)
  19 +{
  20 + std::string result(count, '\0');
  21 + (void)read(result, count, at);
  22 + return result;
  23 +}
  24 +
  25 +inline void
  26 +InputSource::loadBuffer()
  27 +{
  28 + buf_idx = 0;
  29 + buf_len = qpdf_offset_t(read(buffer, buf_size));
  30 + // NB read sets last_offset
  31 + buf_start = last_offset;
  32 +}
  33 +
  34 +inline qpdf_offset_t
  35 +InputSource::fastTell()
  36 +{
  37 + if (buf_len == 0) {
  38 + loadBuffer();
  39 + } else {
  40 + auto curr = tell();
  41 + if (curr < buf_start || curr >= (buf_start + buf_len)) {
  42 + loadBuffer();
  43 + } else {
  44 + last_offset = curr;
  45 + buf_idx = curr - buf_start;
  46 + }
  47 + }
  48 + return last_offset;
  49 +}
  50 +
  51 +inline bool
  52 +InputSource::fastRead(char& ch)
  53 +{
  54 + // Before calling fastRead, fastTell must be called to prepare the buffer. Once reading is
  55 + // complete, fastUnread must be called to set the correct file position.
  56 + if (buf_idx < buf_len) {
  57 + ch = buffer[buf_idx];
  58 + ++(buf_idx);
  59 + ++(last_offset);
  60 + return true;
  61 +
  62 + } else if (buf_len == 0) {
  63 + return false;
  64 + } else {
  65 + seek(buf_start + buf_len, SEEK_SET);
  66 + fastTell();
  67 + return fastRead(ch);
  68 + }
  69 +}
  70 +
  71 +inline void
  72 +InputSource::fastUnread(bool back)
  73 +{
  74 + last_offset -= back ? 1 : 0;
  75 + seek(last_offset, SEEK_SET);
  76 +}
  77 +
  78 +#endif // QPDF_INPUTSOURCE_PRIVATE_HH
... ...
manual/qpdf.1
... ... @@ -3,7 +3,7 @@
3 3 .\" Edits will be automatically overwritten if the build is
4 4 .\" run in maintainer mode.
5 5 .\"
6   -.TH QPDF "1" "" "qpdf version 12.0.0" "User Commands"
  6 +.TH QPDF "1" "" "qpdf version 12.0.1" "User Commands"
7 7 .SH NAME
8 8 qpdf \- PDF transformation software
9 9 .SH SYNOPSIS
... ...
manual/release-notes.rst
... ... @@ -13,12 +13,20 @@ more detail.
13 13  
14 14 .. x.y.z: not yet released
15 15  
  16 +12.0.1: not yet released
  17 + - Bug fixes
  18 +
  19 + - In ``QPDF::isLinearized`` return false if the first object in the file is
  20 + not a linearization parameter dictionary or its ``/L`` entry is not an
  21 + integer object. Previously the method returned false if the first
  22 + dictionary object was not a linearization parameter dictionary.
  23 +
16 24 .. _r12-0-0:
17 25  
18 26 .. cSpell:ignore substract
19 27  
20 28 12.0.0: March 9, 2025
21   - - API: breaking changes
  29 + - API breaking changes
22 30  
23 31 - The header file ``qpdf/QPDFObject.hh`` now generates an error if
24 32 included. This is to prevent code that includes it from
... ...