Commit 9f444ffef3c11201d0a460b14b6234d3319ce861

Authored by Jay Berkenbilt
1 parent 359999a5

add QPDF::processMemoryFile and API additions to support it

git-svn-id: svn+q:///qpdf/trunk@1034 71b93d88-0707-0410-a8cf-f5a4172ac649
ChangeLog
  1 +2010-10-01 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * include/qpdf/QPDF.hh: Add processMemoryFile method for
  4 + processing a PDF file from a memory buffer instead of a file.
  5 +
1 2010-09-24 Jay Berkenbilt <ejb@ql.org> 6 2010-09-24 Jay Berkenbilt <ejb@ql.org>
2 7
3 * libqpdf/QPDF.cc: change private "file" method to be a 8 * libqpdf/QPDF.cc: change private "file" method to be a
1 -2.2.1  
2 -=====  
3 -  
4 -Add interface for working with in-memory PDF files. Here's some code  
5 -to work with.  
6 -  
7 - Pl_Buffer b("b");  
8 - FILE* f = QUtil::fopen_wrapper(std::string("open ") + filename,  
9 - fopen(filename, "rb"));  
10 - unsigned char buf[1024];  
11 - size_t bytes_read = 0;  
12 - while (true)  
13 - {  
14 - size_t len = fread(buf, 1, sizeof(buf), f);  
15 - if (len == 0)  
16 - {  
17 - if (ferror(f))  
18 - {  
19 - throw QPDFExc(qpdf_e_system,  
20 - filename, "",  
21 - 0,  
22 - "read");  
23 - }  
24 - else  
25 - {  
26 - b.finish();  
27 - break;  
28 - }  
29 - }  
30 - else  
31 - {  
32 - b.write(buf, len);  
33 - bytes_read += len;  
34 - }  
35 - }  
36 - fclose(f);  
37 - this->file = new BufferInputSource(filename, b.getBuffer());  
38 -  
39 General 1 General
40 ======= 2 =======
41 3
include/qpdf/Buffer.hh
@@ -15,8 +15,17 @@ class Buffer @@ -15,8 +15,17 @@ class Buffer
15 public: 15 public:
16 QPDF_DLL 16 QPDF_DLL
17 Buffer(); 17 Buffer();
  18 +
  19 + // Create a Buffer object whose memory is owned by the class and
  20 + // will be freed when the Buffer object is destroyed.
18 QPDF_DLL 21 QPDF_DLL
19 Buffer(unsigned long size); 22 Buffer(unsigned long size);
  23 +
  24 + // Create a Buffer object whose memory is owned by the caller and
  25 + // will not be freed when the Buffer is destroyed.
  26 + QPDF_DLL
  27 + Buffer(unsigned char* buf, unsigned long size);
  28 +
20 QPDF_DLL 29 QPDF_DLL
21 Buffer(Buffer const&); 30 Buffer(Buffer const&);
22 QPDF_DLL 31 QPDF_DLL
@@ -31,10 +40,11 @@ class Buffer @@ -31,10 +40,11 @@ class Buffer
31 unsigned char* getBuffer(); 40 unsigned char* getBuffer();
32 41
33 private: 42 private:
34 - void init(unsigned long size); 43 + void init(unsigned long size, unsigned char* buf, bool own_memory);
35 void copy(Buffer const&); 44 void copy(Buffer const&);
36 void destroy(); 45 void destroy();
37 46
  47 + bool own_memory;
38 unsigned long size; 48 unsigned long size;
39 unsigned char* buf; 49 unsigned char* buf;
40 }; 50 };
include/qpdf/QPDF.hh
@@ -50,6 +50,15 @@ class QPDF @@ -50,6 +50,15 @@ class QPDF
50 QPDF_DLL 50 QPDF_DLL
51 void processFile(char const* filename, char const* password = 0); 51 void processFile(char const* filename, char const* password = 0);
52 52
  53 + // Parse a PDF file loaded into a memory buffer. This works
  54 + // exactly like processFile except that the PDF file is in memory
  55 + // instead of on disk. The description appears in any warning or
  56 + // error message in place of the file name.
  57 + QPDF_DLL
  58 + void processMemoryFile(char const* description,
  59 + char const* buf, size_t length,
  60 + char const* password = 0);
  61 +
53 // Parameter settings 62 // Parameter settings
54 63
55 // If true, ignore any cross-reference streams in a hybrid file 64 // If true, ignore any cross-reference streams in a hybrid file
@@ -362,7 +371,8 @@ class QPDF @@ -362,7 +371,8 @@ class QPDF
362 class BufferInputSource: public InputSource 371 class BufferInputSource: public InputSource
363 { 372 {
364 public: 373 public:
365 - BufferInputSource(std::string const& description, Buffer* buf); 374 + BufferInputSource(std::string const& description, Buffer* buf,
  375 + bool own_memory = false);
366 virtual ~BufferInputSource(); 376 virtual ~BufferInputSource();
367 virtual std::string const& getName() const; 377 virtual std::string const& getName() const;
368 virtual off_t tell(); 378 virtual off_t tell();
@@ -372,6 +382,7 @@ class QPDF @@ -372,6 +382,7 @@ class QPDF
372 virtual void unreadCh(char ch); 382 virtual void unreadCh(char ch);
373 383
374 private: 384 private:
  385 + bool own_memory;
375 std::string description; 386 std::string description;
376 Buffer* buf; 387 Buffer* buf;
377 off_t cur_offset; 388 off_t cur_offset;
@@ -410,7 +421,7 @@ class QPDF @@ -410,7 +421,7 @@ class QPDF
410 off_t end_after_space; 421 off_t end_after_space;
411 }; 422 };
412 423
413 - void parse(); 424 + void parse(char const* password);
414 void warn(QPDFExc const& e); 425 void warn(QPDFExc const& e);
415 void setTrailer(QPDFObjectHandle obj); 426 void setTrailer(QPDFObjectHandle obj);
416 void read_xref(off_t offset); 427 void read_xref(off_t offset);
libqpdf/Buffer.cc
@@ -4,17 +4,22 @@ @@ -4,17 +4,22 @@
4 4
5 Buffer::Buffer() 5 Buffer::Buffer()
6 { 6 {
7 - init(0); 7 + init(0, 0, true);
8 } 8 }
9 9
10 Buffer::Buffer(unsigned long size) 10 Buffer::Buffer(unsigned long size)
11 { 11 {
12 - init(size); 12 + init(size, 0, true);
  13 +}
  14 +
  15 +Buffer::Buffer(unsigned char* buf, unsigned long size)
  16 +{
  17 + init(size, buf, false);
13 } 18 }
14 19
15 Buffer::Buffer(Buffer const& rhs) 20 Buffer::Buffer(Buffer const& rhs)
16 { 21 {
17 - init(0); 22 + init(0, 0, true);
18 copy(rhs); 23 copy(rhs);
19 } 24 }
20 25
@@ -31,10 +36,18 @@ Buffer::~Buffer() @@ -31,10 +36,18 @@ Buffer::~Buffer()
31 } 36 }
32 37
33 void 38 void
34 -Buffer::init(unsigned long size) 39 +Buffer::init(unsigned long size, unsigned char* buf, bool own_memory)
35 { 40 {
  41 + this->own_memory = own_memory;
36 this->size = size; 42 this->size = size;
37 - this->buf = (size ? new unsigned char[size] : 0); 43 + if (own_memory)
  44 + {
  45 + this->buf = (size ? new unsigned char[size] : 0);
  46 + }
  47 + else
  48 + {
  49 + this->buf = buf;
  50 + }
38 } 51 }
39 52
40 void 53 void
@@ -43,7 +56,7 @@ Buffer::copy(Buffer const&amp; rhs) @@ -43,7 +56,7 @@ Buffer::copy(Buffer const&amp; rhs)
43 if (this != &rhs) 56 if (this != &rhs)
44 { 57 {
45 this->destroy(); 58 this->destroy();
46 - this->init(rhs.size); 59 + this->init(rhs.size, 0, true);
47 if (this->size) 60 if (this->size)
48 { 61 {
49 memcpy(this->buf, rhs.buf, this->size); 62 memcpy(this->buf, rhs.buf, this->size);
@@ -54,7 +67,10 @@ Buffer::copy(Buffer const&amp; rhs) @@ -54,7 +67,10 @@ Buffer::copy(Buffer const&amp; rhs)
54 void 67 void
55 Buffer::destroy() 68 Buffer::destroy()
56 { 69 {
57 - delete [] this->buf; 70 + if (this->own_memory)
  71 + {
  72 + delete [] this->buf;
  73 + }
58 this->size = 0; 74 this->size = 0;
59 this->buf = 0; 75 this->buf = 0;
60 } 76 }
libqpdf/QPDF.cc
@@ -159,7 +159,8 @@ QPDF::FileInputSource::unreadCh(char ch) @@ -159,7 +159,8 @@ QPDF::FileInputSource::unreadCh(char ch)
159 } 159 }
160 160
161 QPDF::BufferInputSource::BufferInputSource(std::string const& description, 161 QPDF::BufferInputSource::BufferInputSource(std::string const& description,
162 - Buffer* buf) : 162 + Buffer* buf, bool own_memory) :
  163 + own_memory(own_memory),
163 description(description), 164 description(description),
164 buf(buf), 165 buf(buf),
165 cur_offset(0) 166 cur_offset(0)
@@ -168,6 +169,10 @@ QPDF::BufferInputSource::BufferInputSource(std::string const&amp; description, @@ -168,6 +169,10 @@ QPDF::BufferInputSource::BufferInputSource(std::string const&amp; description,
168 169
169 QPDF::BufferInputSource::~BufferInputSource() 170 QPDF::BufferInputSource::~BufferInputSource()
170 { 171 {
  172 + if (own_memory)
  173 + {
  174 + delete this->buf;
  175 + }
171 } 176 }
172 177
173 std::string const& 178 std::string const&
@@ -192,7 +197,7 @@ QPDF::BufferInputSource::seek(off_t offset, int whence) @@ -192,7 +197,7 @@ QPDF::BufferInputSource::seek(off_t offset, int whence)
192 break; 197 break;
193 198
194 case SEEK_END: 199 case SEEK_END:
195 - this->cur_offset = this->buf->getSize() - offset; 200 + this->cur_offset = this->buf->getSize() + offset;
196 break; 201 break;
197 202
198 case SEEK_CUR: 203 case SEEK_CUR:
@@ -306,11 +311,19 @@ QPDF::processFile(char const* filename, char const* password) @@ -306,11 +311,19 @@ QPDF::processFile(char const* filename, char const* password)
306 FileInputSource* fi = new FileInputSource(); 311 FileInputSource* fi = new FileInputSource();
307 this->file = fi; 312 this->file = fi;
308 fi->setFilename(filename); 313 fi->setFilename(filename);
309 - if (password)  
310 - {  
311 - this->provided_password = password;  
312 - }  
313 - parse(); 314 + parse(password);
  315 +}
  316 +
  317 +void
  318 +QPDF::processMemoryFile(char const* description,
  319 + char const* buf, size_t length,
  320 + char const* password)
  321 +{
  322 + this->file =
  323 + new BufferInputSource(description,
  324 + new Buffer((unsigned char*)buf, length),
  325 + true);
  326 + parse(password);
314 } 327 }
315 328
316 void 329 void
@@ -340,11 +353,16 @@ QPDF::getWarnings() @@ -340,11 +353,16 @@ QPDF::getWarnings()
340 } 353 }
341 354
342 void 355 void
343 -QPDF::parse() 356 +QPDF::parse(char const* password)
344 { 357 {
345 static PCRE header_re("^%PDF-(1.\\d+)\\b"); 358 static PCRE header_re("^%PDF-(1.\\d+)\\b");
346 static PCRE eof_re("(?s:startxref\\s+(\\d+)\\s+%%EOF\\b)"); 359 static PCRE eof_re("(?s:startxref\\s+(\\d+)\\s+%%EOF\\b)");
347 360
  361 + if (password)
  362 + {
  363 + this->provided_password = password;
  364 + }
  365 +
348 std::string line = this->file->readLine(); 366 std::string line = this->file->readLine();
349 PCRE::Match m1 = header_re.match(line.c_str()); 367 PCRE::Match m1 = header_re.match(line.c_str());
350 if (m1) 368 if (m1)
qpdf/test_driver.cc
@@ -58,11 +58,53 @@ class Provider: public QPDFObjectHandle::StreamDataProvider @@ -58,11 +58,53 @@ class Provider: public QPDFObjectHandle::StreamDataProvider
58 void runtest(int n, char const* filename) 58 void runtest(int n, char const* filename)
59 { 59 {
60 QPDF pdf; 60 QPDF pdf;
  61 + PointerHolder<char> file_buf;
61 if (n == 0) 62 if (n == 0)
62 { 63 {
63 pdf.setAttemptRecovery(false); 64 pdf.setAttemptRecovery(false);
64 } 65 }
65 - pdf.processFile(filename); 66 + if (n % 2 == 0)
  67 + {
  68 + pdf.processFile(filename);
  69 + }
  70 + else
  71 + {
  72 + // Exercise processMemoryFile
  73 + FILE* f = QUtil::fopen_wrapper(std::string("open ") + filename,
  74 + fopen(filename, "rb"));
  75 + fseek(f, 0, SEEK_END);
  76 + size_t size = (size_t) ftell(f);
  77 + fseek(f, 0, SEEK_SET);
  78 + file_buf = new char[size];
  79 + char* buf_p = file_buf.getPointer();
  80 + size_t bytes_read = 0;
  81 + size_t len = 0;
  82 + while ((len = fread(buf_p + bytes_read, 1, size - bytes_read, f)) > 0)
  83 + {
  84 + bytes_read += len;
  85 + }
  86 + if (bytes_read != size)
  87 + {
  88 + if (ferror(f))
  89 + {
  90 + throw std::runtime_error(
  91 + std::string("failure reading file ") + filename +
  92 + " into memory: read " +
  93 + QUtil::int_to_string(bytes_read) + "; wanted " +
  94 + QUtil::int_to_string(size));
  95 + }
  96 + else
  97 + {
  98 + throw std::logic_error(
  99 + std::string("premature eof reading file ") + filename +
  100 + " into memory: read " +
  101 + QUtil::int_to_string(bytes_read) + "; wanted " +
  102 + QUtil::int_to_string(size));
  103 + }
  104 + }
  105 + fclose(f);
  106 + pdf.processMemoryFile(filename, buf_p, size);
  107 + }
66 108
67 if ((n == 0) || (n == 1)) 109 if ((n == 0) || (n == 1))
68 { 110 {