Commit 4ccc8b1a44109a913c87f31029c9c17db30ecc43
1 parent
32ddcec9
Add ClosedFileInputSource
ClosedFileInputSource is an input source that keeps the file closed when not reading it.
Showing
9 changed files
with
279 additions
and
0 deletions
ChangeLog
| 1 | 2018-06-22 Jay Berkenbilt <ejb@ql.org> | 1 | 2018-06-22 Jay Berkenbilt <ejb@ql.org> |
| 2 | 2 | ||
| 3 | + * Add ClosedFileInputSource class, and input source that keeps its | ||
| 4 | + input file closed when not reading it. At the expense of some | ||
| 5 | + performance, this allows you to operate on many files without | ||
| 6 | + opening too many files at the operating system level. | ||
| 7 | + | ||
| 3 | * Add new option --preserved-unreferenced-resources, which | 8 | * Add new option --preserved-unreferenced-resources, which |
| 4 | suppresses removal of unreferenced objects from page resource | 9 | suppresses removal of unreferenced objects from page resource |
| 5 | dictionaries during page splitting operations. | 10 | dictionaries during page splitting operations. |
include/qpdf/ClosedFileInputSource.hh
0 → 100644
| 1 | +// Copyright (c) 2005-2018 Jay Berkenbilt | ||
| 2 | +// | ||
| 3 | +// This file is part of qpdf. | ||
| 4 | +// | ||
| 5 | +// Licensed under the Apache License, Version 2.0 (the "License"); | ||
| 6 | +// you may not use this file except in compliance with the License. | ||
| 7 | +// You may obtain a copy of the License at | ||
| 8 | +// | ||
| 9 | +// http://www.apache.org/licenses/LICENSE-2.0 | ||
| 10 | +// | ||
| 11 | +// Unless required by applicable law or agreed to in writing, software | ||
| 12 | +// distributed under the License is distributed on an "AS IS" BASIS, | ||
| 13 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| 14 | +// See the License for the specific language governing permissions and | ||
| 15 | +// limitations under the License. | ||
| 16 | +// | ||
| 17 | +// Versions of qpdf prior to version 7 were released under the terms | ||
| 18 | +// of version 2.0 of the Artistic License. At your option, you may | ||
| 19 | +// continue to consider qpdf to be licensed under those terms. Please | ||
| 20 | +// see the manual for additional information. | ||
| 21 | + | ||
| 22 | +#ifndef __QPDF_CLOSEDFILEINPUTSOURCE_HH__ | ||
| 23 | +#define __QPDF_CLOSEDFILEINPUTSOURCE_HH__ | ||
| 24 | + | ||
| 25 | +// This is an input source that reads from files, like | ||
| 26 | +// FileInputSource, except that it opens and close the file | ||
| 27 | +// surrounding every operation. This decreases effiency, but it allows | ||
| 28 | +// many more of these to exist at once than the maximum number of open | ||
| 29 | +// file descriptors. This is used for merging large numbers of files. | ||
| 30 | + | ||
| 31 | +#include <qpdf/InputSource.hh> | ||
| 32 | +#include <qpdf/PointerHolder.hh> | ||
| 33 | + | ||
| 34 | +class FileInputSource; | ||
| 35 | + | ||
| 36 | +class ClosedFileInputSource: public InputSource | ||
| 37 | +{ | ||
| 38 | + public: | ||
| 39 | + QPDF_DLL | ||
| 40 | + ClosedFileInputSource(char const* filename); | ||
| 41 | + QPDF_DLL | ||
| 42 | + virtual ~ClosedFileInputSource(); | ||
| 43 | + QPDF_DLL | ||
| 44 | + virtual qpdf_offset_t findAndSkipNextEOL(); | ||
| 45 | + QPDF_DLL | ||
| 46 | + virtual std::string const& getName() const; | ||
| 47 | + QPDF_DLL | ||
| 48 | + virtual qpdf_offset_t tell(); | ||
| 49 | + QPDF_DLL | ||
| 50 | + virtual void seek(qpdf_offset_t offset, int whence); | ||
| 51 | + QPDF_DLL | ||
| 52 | + virtual void rewind(); | ||
| 53 | + QPDF_DLL | ||
| 54 | + virtual size_t read(char* buffer, size_t length); | ||
| 55 | + QPDF_DLL | ||
| 56 | + virtual void unreadCh(char ch); | ||
| 57 | + | ||
| 58 | + private: | ||
| 59 | + ClosedFileInputSource(ClosedFileInputSource const&); | ||
| 60 | + ClosedFileInputSource& operator=(ClosedFileInputSource const&); | ||
| 61 | + | ||
| 62 | + void before(); | ||
| 63 | + void after(); | ||
| 64 | + | ||
| 65 | + class Members | ||
| 66 | + { | ||
| 67 | + friend class ClosedFileInputSource; | ||
| 68 | + | ||
| 69 | + public: | ||
| 70 | + QPDF_DLL | ||
| 71 | + ~Members(); | ||
| 72 | + | ||
| 73 | + private: | ||
| 74 | + Members(char const* filename); | ||
| 75 | + | ||
| 76 | + std::string filename; | ||
| 77 | + qpdf_offset_t offset; | ||
| 78 | + FileInputSource* fis; | ||
| 79 | + }; | ||
| 80 | + PointerHolder<Members> m; | ||
| 81 | +}; | ||
| 82 | + | ||
| 83 | +#endif // __QPDF_CLOSEDFILEINPUTSOURCE_HH__ |
libqpdf/ClosedFileInputSource.cc
0 → 100644
| 1 | +#include <qpdf/ClosedFileInputSource.hh> | ||
| 2 | +#include <qpdf/FileInputSource.hh> | ||
| 3 | + | ||
| 4 | +ClosedFileInputSource::Members::Members(char const* filename) : | ||
| 5 | + filename(filename), | ||
| 6 | + offset(0), | ||
| 7 | + fis(0) | ||
| 8 | +{ | ||
| 9 | +} | ||
| 10 | + | ||
| 11 | +ClosedFileInputSource::Members::~Members() | ||
| 12 | +{ | ||
| 13 | + if (fis) | ||
| 14 | + { | ||
| 15 | + delete fis; | ||
| 16 | + } | ||
| 17 | +} | ||
| 18 | + | ||
| 19 | +ClosedFileInputSource::ClosedFileInputSource(char const* filename) : | ||
| 20 | + m(new Members(filename)) | ||
| 21 | +{ | ||
| 22 | +} | ||
| 23 | + | ||
| 24 | +ClosedFileInputSource::~ClosedFileInputSource() | ||
| 25 | +{ | ||
| 26 | +} | ||
| 27 | + | ||
| 28 | +void | ||
| 29 | +ClosedFileInputSource::before() | ||
| 30 | +{ | ||
| 31 | + if (0 == this->m->fis) | ||
| 32 | + { | ||
| 33 | + this->m->fis = new FileInputSource(); | ||
| 34 | + this->m->fis->setFilename(this->m->filename.c_str()); | ||
| 35 | + this->m->fis->seek(this->m->offset, SEEK_SET); | ||
| 36 | + this->m->fis->setLastOffset(this->last_offset); | ||
| 37 | + } | ||
| 38 | +} | ||
| 39 | + | ||
| 40 | +void | ||
| 41 | +ClosedFileInputSource::after() | ||
| 42 | +{ | ||
| 43 | + this->last_offset = this->m->fis->getLastOffset(); | ||
| 44 | + this->m->offset = this->m->fis->tell(); | ||
| 45 | + delete this->m->fis; | ||
| 46 | + this->m->fis = 0; | ||
| 47 | +} | ||
| 48 | + | ||
| 49 | +qpdf_offset_t | ||
| 50 | +ClosedFileInputSource::findAndSkipNextEOL() | ||
| 51 | +{ | ||
| 52 | + before(); | ||
| 53 | + qpdf_offset_t r = this->m->fis->findAndSkipNextEOL(); | ||
| 54 | + after(); | ||
| 55 | + return r; | ||
| 56 | +} | ||
| 57 | + | ||
| 58 | +std::string const& | ||
| 59 | +ClosedFileInputSource::getName() const | ||
| 60 | +{ | ||
| 61 | + return this->m->filename; | ||
| 62 | +} | ||
| 63 | + | ||
| 64 | +qpdf_offset_t | ||
| 65 | +ClosedFileInputSource::tell() | ||
| 66 | +{ | ||
| 67 | + before(); | ||
| 68 | + qpdf_offset_t r = this->m->fis->tell(); | ||
| 69 | + after(); | ||
| 70 | + return r; | ||
| 71 | +} | ||
| 72 | + | ||
| 73 | +void | ||
| 74 | +ClosedFileInputSource::seek(qpdf_offset_t offset, int whence) | ||
| 75 | +{ | ||
| 76 | + before(); | ||
| 77 | + this->m->fis->seek(offset, whence); | ||
| 78 | + after(); | ||
| 79 | +} | ||
| 80 | + | ||
| 81 | +void | ||
| 82 | +ClosedFileInputSource::rewind() | ||
| 83 | +{ | ||
| 84 | + this->m->offset = 0; | ||
| 85 | +} | ||
| 86 | + | ||
| 87 | +size_t | ||
| 88 | +ClosedFileInputSource::read(char* buffer, size_t length) | ||
| 89 | +{ | ||
| 90 | + before(); | ||
| 91 | + size_t r = this->m->fis->read(buffer, length); | ||
| 92 | + after(); | ||
| 93 | + return r; | ||
| 94 | +} | ||
| 95 | + | ||
| 96 | +void | ||
| 97 | +ClosedFileInputSource::unreadCh(char ch) | ||
| 98 | +{ | ||
| 99 | + before(); | ||
| 100 | + this->m->fis->unreadCh(ch); | ||
| 101 | + // Don't call after -- the file has to stay open after this | ||
| 102 | + // operation. | ||
| 103 | +} |
libqpdf/build.mk
| @@ -9,6 +9,7 @@ SRCS_libqpdf = \ | @@ -9,6 +9,7 @@ SRCS_libqpdf = \ | ||
| 9 | libqpdf/BitWriter.cc \ | 9 | libqpdf/BitWriter.cc \ |
| 10 | libqpdf/Buffer.cc \ | 10 | libqpdf/Buffer.cc \ |
| 11 | libqpdf/BufferInputSource.cc \ | 11 | libqpdf/BufferInputSource.cc \ |
| 12 | + libqpdf/ClosedFileInputSource.cc \ | ||
| 12 | libqpdf/ContentNormalizer.cc \ | 13 | libqpdf/ContentNormalizer.cc \ |
| 13 | libqpdf/FileInputSource.cc \ | 14 | libqpdf/FileInputSource.cc \ |
| 14 | libqpdf/InputSource.cc \ | 15 | libqpdf/InputSource.cc \ |
libtests/build.mk
libtests/closed_file_input_source.cc
0 → 100644
| 1 | +#include <qpdf/ClosedFileInputSource.hh> | ||
| 2 | +#include <qpdf/FileInputSource.hh> | ||
| 3 | + | ||
| 4 | +#include <stdio.h> | ||
| 5 | +#include <string.h> | ||
| 6 | +#include <iostream> | ||
| 7 | +#include <stdlib.h> | ||
| 8 | + | ||
| 9 | +void check(std::string const& what, bool result) | ||
| 10 | +{ | ||
| 11 | + if (! result) | ||
| 12 | + { | ||
| 13 | + std::cout << "FAIL: " << what << std::endl; | ||
| 14 | + } | ||
| 15 | +} | ||
| 16 | + | ||
| 17 | +void do_tests(InputSource* is) | ||
| 18 | +{ | ||
| 19 | + check("get name", "input" == is->getName()); | ||
| 20 | + check("initial tell", 0 == is->tell()); | ||
| 21 | + is->seek(11, SEEK_SET); | ||
| 22 | + check("tell after SEEK_SET", 11 == is->tell()); | ||
| 23 | + check("read offset 11", "Offset 11" == is->readLine(100)); | ||
| 24 | + check("last offset after read 11", 11 == is->getLastOffset()); | ||
| 25 | + check("tell after read", 21 == is->tell()); | ||
| 26 | + is->findAndSkipNextEOL(); | ||
| 27 | + check("tell after findAndSkipNextEOL", 522 == is->tell()); | ||
| 28 | + is->unreadCh('Q'); | ||
| 29 | + char b[1]; | ||
| 30 | + b[0] = '\0'; | ||
| 31 | + check("read unread character", 1 == is->read(b, 1)); | ||
| 32 | + check("last offset after read unread", 521 == is->getLastOffset()); | ||
| 33 | + check("got character", 'Q' == b[0]); | ||
| 34 | + is->seek(0, SEEK_END); | ||
| 35 | + check("tell at end", 556 == is->tell()); | ||
| 36 | + is->seek(-25, SEEK_END); | ||
| 37 | + check("tell before end", 531 == is->tell()); | ||
| 38 | + check("last offset unchanged after seek", 521 == is->getLastOffset()); | ||
| 39 | + is->seek(-9, SEEK_CUR); | ||
| 40 | + check("tell after SEEK_CUR", 522 == is->tell()); | ||
| 41 | + check("read offset 522", "9 before" == is->readLine(100)); | ||
| 42 | + check("last offset after read", 522 == is->getLastOffset()); | ||
| 43 | + is->rewind(); | ||
| 44 | + check("last offset unchanged after rewind", 522 == is->getLastOffset()); | ||
| 45 | + check("tell after rewind", 0 == is->tell()); | ||
| 46 | + check("read offset at beginning", "!00000000?" == is->readLine(100)); | ||
| 47 | + check("last offset after read 0", 0 == is->getLastOffset()); | ||
| 48 | +} | ||
| 49 | + | ||
| 50 | +int main() | ||
| 51 | +{ | ||
| 52 | + // This test is designed to work with a specified input file. | ||
| 53 | + std::cout << "testing with ClosedFileInputSource\n"; | ||
| 54 | + ClosedFileInputSource cf("input"); | ||
| 55 | + do_tests(&cf); | ||
| 56 | + std::cout << "testing with FileInputSource\n"; | ||
| 57 | + FileInputSource f; | ||
| 58 | + f.setFilename("input"); | ||
| 59 | + do_tests(&f); | ||
| 60 | + std::cout << "all assertions passed" << std::endl; | ||
| 61 | + return 0; | ||
| 62 | +} |
libtests/qtest/closedfile.test
0 → 100644
| 1 | +#!/usr/bin/env perl | ||
| 2 | +require 5.008; | ||
| 3 | +use warnings; | ||
| 4 | +use strict; | ||
| 5 | + | ||
| 6 | +chdir("closedfile") or die "chdir testdir failed: $!\n"; | ||
| 7 | + | ||
| 8 | +require TestDriver; | ||
| 9 | + | ||
| 10 | +my $td = new TestDriver('closed_file_input_source'); | ||
| 11 | + | ||
| 12 | +$td->runtest("closed file input source", | ||
| 13 | + {$td->COMMAND => "closed_file_input_source"}, | ||
| 14 | + {$td->FILE => "output", $td->EXIT_STATUS => 0}, | ||
| 15 | + $td->NORMALIZE_NEWLINES); | ||
| 16 | +$td->report(1); |
libtests/qtest/closedfile/input
0 → 100644
| 1 | +!00000000? | ||
| 2 | +Offset 11 | ||
| 3 | +wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww | ||
| 4 | +9 before | ||
| 5 | +This is 25 from the end. |
libtests/qtest/closedfile/output
0 → 100644