Commit 54e379d085866c4e95fc359792b96b6a6764211b

Authored by m-holger
Committed by GitHub
2 parents a4b87050 f54d98ba

Merge pull request #1305 from m-holger/is

Refactor InputSource
CMakeLists.txt
@@ -7,7 +7,7 @@ cmake_minimum_required(VERSION 3.16) @@ -7,7 +7,7 @@ cmake_minimum_required(VERSION 3.16)
7 # also find the version number here. generate_auto_job also reads the 7 # also find the version number here. generate_auto_job also reads the
8 # version from here. 8 # version from here.
9 project(qpdf 9 project(qpdf
10 - VERSION 12.0.0 10 + VERSION 12.0.1
11 LANGUAGES C CXX) 11 LANGUAGES C CXX)
12 12
13 # Enable correct rpath handling for MacOSX 13 # Enable correct rpath handling for MacOSX
include/qpdf/DLL.h
@@ -27,12 +27,12 @@ @@ -27,12 +27,12 @@
27 /* The first version of qpdf to include the version constants is 10.6.0. */ 27 /* The first version of qpdf to include the version constants is 10.6.0. */
28 #define QPDF_MAJOR_VERSION 12 28 #define QPDF_MAJOR_VERSION 12
29 #define QPDF_MINOR_VERSION 0 29 #define QPDF_MINOR_VERSION 0
30 -#define QPDF_PATCH_VERSION 0 30 +#define QPDF_PATCH_VERSION 1
31 31
32 #ifdef QPDF_FUTURE 32 #ifdef QPDF_FUTURE
33 -# define QPDF_VERSION "12.0.0+future" 33 +# define QPDF_VERSION "12.0.1+future"
34 #else 34 #else
35 -# define QPDF_VERSION "12.0.0" 35 +# define QPDF_VERSION "12.0.1"
36 #endif 36 #endif
37 37
38 /* 38 /*
include/qpdf/InputSource.hh
@@ -75,7 +75,11 @@ class QPDF_DLL_CLASS InputSource @@ -75,7 +75,11 @@ class QPDF_DLL_CLASS InputSource
75 // semantically equivalent to seek(-1, SEEK_CUR) but is much more efficient. 75 // semantically equivalent to seek(-1, SEEK_CUR) but is much more efficient.
76 virtual void unreadCh(char ch) = 0; 76 virtual void unreadCh(char ch) = 0;
77 77
78 - // The following methods are for use by QPDFTokenizer 78 + // The following methods are for internal use by qpdf only.
  79 + inline size_t read(std::string& str, size_t count, qpdf_offset_t at = -1);
  80 + inline std::string read(size_t count, qpdf_offset_t at = -1);
  81 + size_t read_line(std::string& str, size_t count, qpdf_offset_t at = -1);
  82 + std::string read_line(size_t count, qpdf_offset_t at = -1);
79 inline qpdf_offset_t fastTell(); 83 inline qpdf_offset_t fastTell();
80 inline bool fastRead(char&); 84 inline bool fastRead(char&);
81 inline void fastUnread(bool); 85 inline void fastUnread(bool);
@@ -93,57 +97,4 @@ class QPDF_DLL_CLASS InputSource @@ -93,57 +97,4 @@ class QPDF_DLL_CLASS InputSource
93 qpdf_offset_t buf_start = 0; 97 qpdf_offset_t buf_start = 0;
94 }; 98 };
95 99
96 -inline void  
97 -InputSource::loadBuffer()  
98 -{  
99 - this->buf_idx = 0;  
100 - this->buf_len = qpdf_offset_t(read(this->buffer, this->buf_size));  
101 - // NB read sets last_offset  
102 - this->buf_start = this->last_offset;  
103 -}  
104 -  
105 -inline qpdf_offset_t  
106 -InputSource::fastTell()  
107 -{  
108 - if (this->buf_len == 0) {  
109 - loadBuffer();  
110 - } else {  
111 - auto curr = tell();  
112 - if (curr < this->buf_start || curr >= (this->buf_start + this->buf_len)) {  
113 - loadBuffer();  
114 - } else {  
115 - this->last_offset = curr;  
116 - this->buf_idx = curr - this->buf_start;  
117 - }  
118 - }  
119 - return this->last_offset;  
120 -}  
121 -  
122 -inline bool  
123 -InputSource::fastRead(char& ch)  
124 -{  
125 - // Before calling fastRead, fastTell must be called to prepare the buffer. Once reading is  
126 - // complete, fastUnread must be called to set the correct file position.  
127 - if (this->buf_idx < this->buf_len) {  
128 - ch = this->buffer[this->buf_idx];  
129 - ++(this->buf_idx);  
130 - ++(this->last_offset);  
131 - return true;  
132 -  
133 - } else if (this->buf_len == 0) {  
134 - return false;  
135 - } else {  
136 - seek(this->buf_start + this->buf_len, SEEK_SET);  
137 - fastTell();  
138 - return fastRead(ch);  
139 - }  
140 -}  
141 -  
142 -inline void  
143 -InputSource::fastUnread(bool back)  
144 -{  
145 - this->last_offset -= back ? 1 : 0;  
146 - seek(this->last_offset, SEEK_SET);  
147 -}  
148 -  
149 #endif // QPDF_INPUTSOURCE_HH 100 #endif // QPDF_INPUTSOURCE_HH
job.sums
1 # Generated by generate_auto_job 1 # Generated by generate_auto_job
2 -CMakeLists.txt 88e8974a8b14e10c941a4bb04ff078c3d3063b98af3ea056e02b1dcdff783d22 2 +CMakeLists.txt 7779469688d17b58dfe69f2af4e5627eb20defd72a95ca71f2ecee68f1ec6d97
3 generate_auto_job f64733b79dcee5a0e3e8ccc6976448e8ddf0e8b6529987a66a7d3ab2ebc10a86 3 generate_auto_job f64733b79dcee5a0e3e8ccc6976448e8ddf0e8b6529987a66a7d3ab2ebc10a86
4 include/qpdf/auto_job_c_att.hh 4c2b171ea00531db54720bf49a43f8b34481586ae7fb6cbf225099ee42bc5bb4 4 include/qpdf/auto_job_c_att.hh 4c2b171ea00531db54720bf49a43f8b34481586ae7fb6cbf225099ee42bc5bb4
5 include/qpdf/auto_job_c_copy_att.hh 50609012bff14fd82f0649185940d617d05d530cdc522185c7f3920a561ccb42 5 include/qpdf/auto_job_c_copy_att.hh 50609012bff14fd82f0649185940d617d05d530cdc522185c7f3920a561ccb42
@@ -16,5 +16,5 @@ libqpdf/qpdf/auto_job_json_init.hh 344c2fb473f88fe829c93b1efe6c70a0e4796537b8eb3 @@ -16,5 +16,5 @@ libqpdf/qpdf/auto_job_json_init.hh 344c2fb473f88fe829c93b1efe6c70a0e4796537b8eb3
16 libqpdf/qpdf/auto_job_schema.hh 6d3eef5137b8828eaa301a1b3cf75cb7bb812aa6e2d8301de865b42d238d7a7c 16 libqpdf/qpdf/auto_job_schema.hh 6d3eef5137b8828eaa301a1b3cf75cb7bb812aa6e2d8301de865b42d238d7a7c
17 manual/_ext/qpdf.py 6add6321666031d55ed4aedf7c00e5662bba856dfcd66ccb526563bffefbb580 17 manual/_ext/qpdf.py 6add6321666031d55ed4aedf7c00e5662bba856dfcd66ccb526563bffefbb580
18 manual/cli.rst 67357688f9a52fafa9a4f231fe4ce74c3cd8977130da7501efe54439a1ee22d4 18 manual/cli.rst 67357688f9a52fafa9a4f231fe4ce74c3cd8977130da7501efe54439a1ee22d4
19 -manual/qpdf.1 78bad33f9b3f246f1800bce365f7be06d3545d89f08b8923dd8489031b5af43e 19 +manual/qpdf.1 dbcc567623f1fa080743ae9bc32b6264a3b6bd3074c81c438e52ca328e94ecd7
20 manual/qpdf.1.in 436ecc85d45c4c9e2dbd1725fb7f0177fb627179469f114561adf3cb6cbb677b 20 manual/qpdf.1.in 436ecc85d45c4c9e2dbd1725fb7f0177fb627179469f114561adf3cb6cbb677b
libqpdf/InputSource.cc
1 -#include <qpdf/InputSource.hh> 1 +#include <qpdf/InputSource_private.hh>
2 2
3 #include <qpdf/QIntC.hh> 3 #include <qpdf/QIntC.hh>
4 #include <qpdf/QTC.hh> 4 #include <qpdf/QTC.hh>
5 #include <cstring> 5 #include <cstring>
6 #include <stdexcept> 6 #include <stdexcept>
7 7
  8 +using namespace std::literals;
  9 +
8 void 10 void
9 InputSource::setLastOffset(qpdf_offset_t offset) 11 InputSource::setLastOffset(qpdf_offset_t offset)
10 { 12 {
@@ -17,27 +19,42 @@ InputSource::getLastOffset() const @@ -17,27 +19,42 @@ InputSource::getLastOffset() const
17 return this->last_offset; 19 return this->last_offset;
18 } 20 }
19 21
20 -std::string  
21 -InputSource::readLine(size_t max_line_length) 22 +size_t
  23 +InputSource::read_line(std::string& str, size_t count, qpdf_offset_t at)
22 { 24 {
23 // Return at most max_line_length characters from the next line. Lines are terminated by one or 25 // Return at most max_line_length characters from the next line. Lines are terminated by one or
24 // more \r or \n characters. Consume the trailing newline characters but don't return them. 26 // more \r or \n characters. Consume the trailing newline characters but don't return them.
25 // After this is called, the file will be positioned after a line terminator or at the end of 27 // After this is called, the file will be positioned after a line terminator or at the end of
26 // the file, and last_offset will point to position the file had when this method was called. 28 // the file, and last_offset will point to position the file had when this method was called.
27 29
28 - qpdf_offset_t offset = this->tell();  
29 - auto bp = std::make_unique<char[]>(max_line_length + 1);  
30 - char* buf = bp.get();  
31 - memset(buf, '\0', max_line_length + 1);  
32 - this->read(buf, max_line_length);  
33 - this->seek(offset, SEEK_SET);  
34 - qpdf_offset_t eol = this->findAndSkipNextEOL();  
35 - this->last_offset = offset;  
36 - size_t line_length = QIntC::to_size(eol - offset);  
37 - if (line_length < max_line_length) {  
38 - buf[line_length] = '\0'; 30 + read(str, count, at);
  31 + auto eol = str.find_first_of("\n\r"sv);
  32 + if (eol != std::string::npos) {
  33 + auto next_line = str.find_first_not_of("\n\r"sv, eol);
  34 + str.resize(eol);
  35 + if (eol != std::string::npos) {
  36 + seek(last_offset + static_cast<qpdf_offset_t>(next_line), SEEK_SET);
  37 + return eol;
  38 + }
39 } 39 }
40 - return {buf}; 40 + // We did not necessarily find the end of the trailing newline sequence.
  41 + seek(last_offset, SEEK_SET);
  42 + findAndSkipNextEOL();
  43 + return eol;
  44 +}
  45 +
  46 +std::string
  47 +InputSource::readLine(size_t max_line_length)
  48 +{
  49 + return read_line(max_line_length);
  50 +}
  51 +
  52 +inline std::string
  53 +InputSource::read_line(size_t count, qpdf_offset_t at)
  54 +{
  55 + std::string result(count, '\0');
  56 + read_line(result, count, at);
  57 + return result;
41 } 58 }
42 59
43 bool 60 bool
libqpdf/QPDF.cc
@@ -13,6 +13,7 @@ @@ -13,6 +13,7 @@
13 13
14 #include <qpdf/BufferInputSource.hh> 14 #include <qpdf/BufferInputSource.hh>
15 #include <qpdf/FileInputSource.hh> 15 #include <qpdf/FileInputSource.hh>
  16 +#include <qpdf/InputSource_private.hh>
16 #include <qpdf/OffsetInputSource.hh> 17 #include <qpdf/OffsetInputSource.hh>
17 #include <qpdf/Pipeline.hh> 18 #include <qpdf/Pipeline.hh>
18 #include <qpdf/QPDFExc.hh> 19 #include <qpdf/QPDFExc.hh>
@@ -2761,12 +2762,12 @@ QPDF::pipeStreamData( @@ -2761,12 +2762,12 @@ QPDF::pipeStreamData(
2761 2762
2762 bool attempted_finish = false; 2763 bool attempted_finish = false;
2763 try { 2764 try {
2764 - file->seek(offset, SEEK_SET);  
2765 - auto buf = std::make_unique<char[]>(length);  
2766 - if (auto read = file->read(buf.get(), length); read != length) {  
2767 - throw damagedPDF(*file, "", offset + toO(read), "unexpected EOF reading stream data"); 2765 + auto buf = file->read(length, offset);
  2766 + if (buf.size() != length) {
  2767 + throw damagedPDF(
  2768 + *file, "", offset + toO(buf.size()), "unexpected EOF reading stream data");
2768 } 2769 }
2769 - pipeline->write(buf.get(), length); 2770 + pipeline->write(buf.data(), length);
2770 attempted_finish = true; 2771 attempted_finish = true;
2771 pipeline->finish(); 2772 pipeline->finish();
2772 return true; 2773 return true;
libqpdf/QPDFTokenizer.cc
@@ -3,6 +3,7 @@ @@ -3,6 +3,7 @@
3 // DO NOT USE ctype -- it is locale dependent for some things, and it's not worth the risk of 3 // DO NOT USE ctype -- it is locale dependent for some things, and it's not worth the risk of
4 // including it in case it may accidentally be used. 4 // including it in case it may accidentally be used.
5 5
  6 +#include <qpdf/InputSource_private.hh>
6 #include <qpdf/QIntC.hh> 7 #include <qpdf/QIntC.hh>
7 #include <qpdf/QPDFExc.hh> 8 #include <qpdf/QPDFExc.hh>
8 #include <qpdf/QPDFObjectHandle.hh> 9 #include <qpdf/QPDFObjectHandle.hh>
libqpdf/QPDF_linearization.cc
@@ -4,6 +4,7 @@ @@ -4,6 +4,7 @@
4 4
5 #include <qpdf/BitStream.hh> 5 #include <qpdf/BitStream.hh>
6 #include <qpdf/BitWriter.hh> 6 #include <qpdf/BitWriter.hh>
  7 +#include <qpdf/InputSource_private.hh>
7 #include <qpdf/Pl_Buffer.hh> 8 #include <qpdf/Pl_Buffer.hh>
8 #include <qpdf/Pl_Count.hh> 9 #include <qpdf/Pl_Count.hh>
9 #include <qpdf/Pl_Flate.hh> 10 #include <qpdf/Pl_Flate.hh>
@@ -19,6 +20,7 @@ @@ -19,6 +20,7 @@
19 #include <cstring> 20 #include <cstring>
20 21
21 using namespace qpdf; 22 using namespace qpdf;
  23 +using namespace std::literals;
22 24
23 template <class T, class int_type> 25 template <class T, class int_type>
24 static void 26 static void
@@ -96,68 +98,52 @@ QPDF::isLinearized() @@ -96,68 +98,52 @@ QPDF::isLinearized()
96 98
97 // The PDF spec says the linearization dictionary must be completely contained within the first 99 // The PDF spec says the linearization dictionary must be completely contained within the first
98 // 1024 bytes of the file. Add a byte for a null terminator. 100 // 1024 bytes of the file. Add a byte for a null terminator.
99 - static int const tbuf_size = 1025;  
100 -  
101 - auto b = std::make_unique<char[]>(tbuf_size);  
102 - char* buf = b.get();  
103 - m->file->seek(0, SEEK_SET);  
104 - memset(buf, '\0', tbuf_size);  
105 - m->file->read(buf, tbuf_size - 1);  
106 -  
107 - int lindict_obj = -1;  
108 - char* p = buf;  
109 - while (lindict_obj == -1) { 101 + auto buffer = m->file->read(1024, 0);
  102 + size_t pos = 0;
  103 + while (true) {
110 // Find a digit or end of buffer 104 // Find a digit or end of buffer
111 - while (((p - buf) < tbuf_size) && (!util::is_digit(*p))) {  
112 - ++p;  
113 - }  
114 - if (p - buf == tbuf_size) {  
115 - break; 105 + pos = buffer.find_first_of("0123456789"sv, pos);
  106 + if (pos == std::string::npos) {
  107 + return false;
116 } 108 }
117 // Seek to the digit. Then skip over digits for a potential 109 // Seek to the digit. Then skip over digits for a potential
118 // next iteration. 110 // next iteration.
119 - m->file->seek(p - buf, SEEK_SET);  
120 - while (((p - buf) < tbuf_size) && util::is_digit(*p)) {  
121 - ++p; 111 + m->file->seek(toO(pos), SEEK_SET);
  112 +
  113 + auto t1 = readToken(*m->file, 20);
  114 + if (!(t1.isInteger() && readToken(*m->file, 6).isInteger() &&
  115 + readToken(*m->file, 4).isWord("obj"))) {
  116 + pos = buffer.find_first_not_of("0123456789"sv, pos);
  117 + if (pos == std::string::npos) {
  118 + return false;
  119 + }
  120 + continue;
122 } 121 }
123 122
124 - QPDFTokenizer::Token t1 = readToken(*m->file);  
125 - if (t1.isInteger() && readToken(*m->file).isInteger() &&  
126 - readToken(*m->file).isWord("obj") &&  
127 - readToken(*m->file).getType() == QPDFTokenizer::tt_dict_open) {  
128 - lindict_obj = toI(QUtil::string_to_ll(t1.getValue().c_str())); 123 + auto candidate = getObject(toI(QUtil::string_to_ll(t1.getValue().data())), 0);
  124 + if (!candidate.isDictionary()) {
  125 + return false;
129 } 126 }
130 - }  
131 -  
132 - if (lindict_obj <= 0) {  
133 - return false;  
134 - }  
135 127
136 - auto candidate = getObjectByID(lindict_obj, 0);  
137 - if (!candidate.isDictionary()) {  
138 - return false;  
139 - }  
140 -  
141 - QPDFObjectHandle linkey = candidate.getKey("/Linearized");  
142 - if (!(linkey.isNumber() && (toI(floor(linkey.getNumericValue())) == 1))) {  
143 - return false;  
144 - } 128 + auto linkey = candidate.getKey("/Linearized");
  129 + if (!(linkey.isNumber() && toI(floor(linkey.getNumericValue())) == 1)) {
  130 + return false;
  131 + }
145 132
146 - QPDFObjectHandle L = candidate.getKey("/L");  
147 - if (L.isInteger()) { 133 + auto L = candidate.getKey("/L");
  134 + if (!L.isInteger()) {
  135 + return false;
  136 + }
148 qpdf_offset_t Li = L.getIntValue(); 137 qpdf_offset_t Li = L.getIntValue();
149 m->file->seek(0, SEEK_END); 138 m->file->seek(0, SEEK_END);
150 if (Li != m->file->tell()) { 139 if (Li != m->file->tell()) {
151 QTC::TC("qpdf", "QPDF /L mismatch"); 140 QTC::TC("qpdf", "QPDF /L mismatch");
152 return false; 141 return false;
153 - } else {  
154 - m->linp.file_size = Li;  
155 } 142 }
  143 + m->linp.file_size = Li;
  144 + m->lindict = candidate;
  145 + return true;
156 } 146 }
157 -  
158 - m->lindict = candidate;  
159 -  
160 - return true;  
161 } 147 }
162 148
163 void 149 void
@@ -548,7 +534,7 @@ QPDF::maxEnd(ObjUser const&amp; ou) @@ -548,7 +534,7 @@ QPDF::maxEnd(ObjUser const&amp; ou)
548 } 534 }
549 qpdf_offset_t end = 0; 535 qpdf_offset_t end = 0;
550 for (auto const& og: m->obj_user_to_objects[ou]) { 536 for (auto const& og: m->obj_user_to_objects[ou]) {
551 - if (m->obj_cache.count(og) == 0) { 537 + if (!m->obj_cache.count(og)) {
552 stopOnError("unknown object referenced in object user table"); 538 stopOnError("unknown object referenced in object user table");
553 } 539 }
554 end = std::max(end, m->obj_cache[og].end_after_space); 540 end = std::max(end, m->obj_cache[og].end_after_space);
libqpdf/qpdf/InputSource_private.hh 0 โ†’ 100644
  1 +#ifndef QPDF_INPUTSOURCE_PRIVATE_HH
  2 +#define QPDF_INPUTSOURCE_PRIVATE_HH
  3 +
  4 +#include <qpdf/InputSource.hh>
  5 +
  6 +inline size_t
  7 +InputSource::read(std::string& str, size_t count, qpdf_offset_t at)
  8 +{
  9 + if (at >= 0) {
  10 + seek(at, SEEK_SET);
  11 + }
  12 + str.resize(count);
  13 + str.resize(read(str.data(), count));
  14 + return str.size();
  15 +}
  16 +
  17 +inline std::string
  18 +InputSource::read(size_t count, qpdf_offset_t at)
  19 +{
  20 + std::string result(count, '\0');
  21 + (void)read(result, count, at);
  22 + return result;
  23 +}
  24 +
  25 +inline void
  26 +InputSource::loadBuffer()
  27 +{
  28 + buf_idx = 0;
  29 + buf_len = qpdf_offset_t(read(buffer, buf_size));
  30 + // NB read sets last_offset
  31 + buf_start = last_offset;
  32 +}
  33 +
  34 +inline qpdf_offset_t
  35 +InputSource::fastTell()
  36 +{
  37 + if (buf_len == 0) {
  38 + loadBuffer();
  39 + } else {
  40 + auto curr = tell();
  41 + if (curr < buf_start || curr >= (buf_start + buf_len)) {
  42 + loadBuffer();
  43 + } else {
  44 + last_offset = curr;
  45 + buf_idx = curr - buf_start;
  46 + }
  47 + }
  48 + return last_offset;
  49 +}
  50 +
  51 +inline bool
  52 +InputSource::fastRead(char& ch)
  53 +{
  54 + // Before calling fastRead, fastTell must be called to prepare the buffer. Once reading is
  55 + // complete, fastUnread must be called to set the correct file position.
  56 + if (buf_idx < buf_len) {
  57 + ch = buffer[buf_idx];
  58 + ++(buf_idx);
  59 + ++(last_offset);
  60 + return true;
  61 +
  62 + } else if (buf_len == 0) {
  63 + return false;
  64 + } else {
  65 + seek(buf_start + buf_len, SEEK_SET);
  66 + fastTell();
  67 + return fastRead(ch);
  68 + }
  69 +}
  70 +
  71 +inline void
  72 +InputSource::fastUnread(bool back)
  73 +{
  74 + last_offset -= back ? 1 : 0;
  75 + seek(last_offset, SEEK_SET);
  76 +}
  77 +
  78 +#endif // QPDF_INPUTSOURCE_PRIVATE_HH
manual/qpdf.1
@@ -3,7 +3,7 @@ @@ -3,7 +3,7 @@
3 .\" Edits will be automatically overwritten if the build is 3 .\" Edits will be automatically overwritten if the build is
4 .\" run in maintainer mode. 4 .\" run in maintainer mode.
5 .\" 5 .\"
6 -.TH QPDF "1" "" "qpdf version 12.0.0" "User Commands" 6 +.TH QPDF "1" "" "qpdf version 12.0.1" "User Commands"
7 .SH NAME 7 .SH NAME
8 qpdf \- PDF transformation software 8 qpdf \- PDF transformation software
9 .SH SYNOPSIS 9 .SH SYNOPSIS
manual/release-notes.rst
@@ -13,12 +13,20 @@ more detail. @@ -13,12 +13,20 @@ more detail.
13 13
14 .. x.y.z: not yet released 14 .. x.y.z: not yet released
15 15
  16 +12.0.1: not yet released
  17 + - Bug fixes
  18 +
  19 + - In ``QPDF::isLinearized`` return false if the first object in the file is
  20 + not a linearization parameter dictionary or its ``/L`` entry is not an
  21 + integer object. Previously the method returned false if the first
  22 + dictionary object was not a linearization parameter dictionary.
  23 +
16 .. _r12-0-0: 24 .. _r12-0-0:
17 25
18 .. cSpell:ignore substract 26 .. cSpell:ignore substract
19 27
20 12.0.0: March 9, 2025 28 12.0.0: March 9, 2025
21 - - API: breaking changes 29 + - API breaking changes
22 30
23 - The header file ``qpdf/QPDFObject.hh`` now generates an error if 31 - The header file ``qpdf/QPDFObject.hh`` now generates an error if
24 included. This is to prevent code that includes it from 32 included. This is to prevent code that includes it from