Commit 4ccc8b1a44109a913c87f31029c9c17db30ecc43
1 parent
32ddcec9
Add ClosedFileInputSource
ClosedFileInputSource is an input source that keeps the file closed when not reading it.
Showing
9 changed files
with
279 additions
and
0 deletions
ChangeLog
| 1 | 1 | 2018-06-22 Jay Berkenbilt <ejb@ql.org> |
| 2 | 2 | |
| 3 | + * Add ClosedFileInputSource class, and input source that keeps its | |
| 4 | + input file closed when not reading it. At the expense of some | |
| 5 | + performance, this allows you to operate on many files without | |
| 6 | + opening too many files at the operating system level. | |
| 7 | + | |
| 3 | 8 | * Add new option --preserved-unreferenced-resources, which |
| 4 | 9 | suppresses removal of unreferenced objects from page resource |
| 5 | 10 | dictionaries during page splitting operations. | ... | ... |
include/qpdf/ClosedFileInputSource.hh
0 → 100644
| 1 | +// Copyright (c) 2005-2018 Jay Berkenbilt | |
| 2 | +// | |
| 3 | +// This file is part of qpdf. | |
| 4 | +// | |
| 5 | +// Licensed under the Apache License, Version 2.0 (the "License"); | |
| 6 | +// you may not use this file except in compliance with the License. | |
| 7 | +// You may obtain a copy of the License at | |
| 8 | +// | |
| 9 | +// http://www.apache.org/licenses/LICENSE-2.0 | |
| 10 | +// | |
| 11 | +// Unless required by applicable law or agreed to in writing, software | |
| 12 | +// distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 | +// See the License for the specific language governing permissions and | |
| 15 | +// limitations under the License. | |
| 16 | +// | |
| 17 | +// Versions of qpdf prior to version 7 were released under the terms | |
| 18 | +// of version 2.0 of the Artistic License. At your option, you may | |
| 19 | +// continue to consider qpdf to be licensed under those terms. Please | |
| 20 | +// see the manual for additional information. | |
| 21 | + | |
| 22 | +#ifndef __QPDF_CLOSEDFILEINPUTSOURCE_HH__ | |
| 23 | +#define __QPDF_CLOSEDFILEINPUTSOURCE_HH__ | |
| 24 | + | |
| 25 | +// This is an input source that reads from files, like | |
| 26 | +// FileInputSource, except that it opens and close the file | |
| 27 | +// surrounding every operation. This decreases effiency, but it allows | |
| 28 | +// many more of these to exist at once than the maximum number of open | |
| 29 | +// file descriptors. This is used for merging large numbers of files. | |
| 30 | + | |
| 31 | +#include <qpdf/InputSource.hh> | |
| 32 | +#include <qpdf/PointerHolder.hh> | |
| 33 | + | |
| 34 | +class FileInputSource; | |
| 35 | + | |
| 36 | +class ClosedFileInputSource: public InputSource | |
| 37 | +{ | |
| 38 | + public: | |
| 39 | + QPDF_DLL | |
| 40 | + ClosedFileInputSource(char const* filename); | |
| 41 | + QPDF_DLL | |
| 42 | + virtual ~ClosedFileInputSource(); | |
| 43 | + QPDF_DLL | |
| 44 | + virtual qpdf_offset_t findAndSkipNextEOL(); | |
| 45 | + QPDF_DLL | |
| 46 | + virtual std::string const& getName() const; | |
| 47 | + QPDF_DLL | |
| 48 | + virtual qpdf_offset_t tell(); | |
| 49 | + QPDF_DLL | |
| 50 | + virtual void seek(qpdf_offset_t offset, int whence); | |
| 51 | + QPDF_DLL | |
| 52 | + virtual void rewind(); | |
| 53 | + QPDF_DLL | |
| 54 | + virtual size_t read(char* buffer, size_t length); | |
| 55 | + QPDF_DLL | |
| 56 | + virtual void unreadCh(char ch); | |
| 57 | + | |
| 58 | + private: | |
| 59 | + ClosedFileInputSource(ClosedFileInputSource const&); | |
| 60 | + ClosedFileInputSource& operator=(ClosedFileInputSource const&); | |
| 61 | + | |
| 62 | + void before(); | |
| 63 | + void after(); | |
| 64 | + | |
| 65 | + class Members | |
| 66 | + { | |
| 67 | + friend class ClosedFileInputSource; | |
| 68 | + | |
| 69 | + public: | |
| 70 | + QPDF_DLL | |
| 71 | + ~Members(); | |
| 72 | + | |
| 73 | + private: | |
| 74 | + Members(char const* filename); | |
| 75 | + | |
| 76 | + std::string filename; | |
| 77 | + qpdf_offset_t offset; | |
| 78 | + FileInputSource* fis; | |
| 79 | + }; | |
| 80 | + PointerHolder<Members> m; | |
| 81 | +}; | |
| 82 | + | |
| 83 | +#endif // __QPDF_CLOSEDFILEINPUTSOURCE_HH__ | ... | ... |
libqpdf/ClosedFileInputSource.cc
0 → 100644
| 1 | +#include <qpdf/ClosedFileInputSource.hh> | |
| 2 | +#include <qpdf/FileInputSource.hh> | |
| 3 | + | |
| 4 | +ClosedFileInputSource::Members::Members(char const* filename) : | |
| 5 | + filename(filename), | |
| 6 | + offset(0), | |
| 7 | + fis(0) | |
| 8 | +{ | |
| 9 | +} | |
| 10 | + | |
| 11 | +ClosedFileInputSource::Members::~Members() | |
| 12 | +{ | |
| 13 | + if (fis) | |
| 14 | + { | |
| 15 | + delete fis; | |
| 16 | + } | |
| 17 | +} | |
| 18 | + | |
| 19 | +ClosedFileInputSource::ClosedFileInputSource(char const* filename) : | |
| 20 | + m(new Members(filename)) | |
| 21 | +{ | |
| 22 | +} | |
| 23 | + | |
| 24 | +ClosedFileInputSource::~ClosedFileInputSource() | |
| 25 | +{ | |
| 26 | +} | |
| 27 | + | |
| 28 | +void | |
| 29 | +ClosedFileInputSource::before() | |
| 30 | +{ | |
| 31 | + if (0 == this->m->fis) | |
| 32 | + { | |
| 33 | + this->m->fis = new FileInputSource(); | |
| 34 | + this->m->fis->setFilename(this->m->filename.c_str()); | |
| 35 | + this->m->fis->seek(this->m->offset, SEEK_SET); | |
| 36 | + this->m->fis->setLastOffset(this->last_offset); | |
| 37 | + } | |
| 38 | +} | |
| 39 | + | |
| 40 | +void | |
| 41 | +ClosedFileInputSource::after() | |
| 42 | +{ | |
| 43 | + this->last_offset = this->m->fis->getLastOffset(); | |
| 44 | + this->m->offset = this->m->fis->tell(); | |
| 45 | + delete this->m->fis; | |
| 46 | + this->m->fis = 0; | |
| 47 | +} | |
| 48 | + | |
| 49 | +qpdf_offset_t | |
| 50 | +ClosedFileInputSource::findAndSkipNextEOL() | |
| 51 | +{ | |
| 52 | + before(); | |
| 53 | + qpdf_offset_t r = this->m->fis->findAndSkipNextEOL(); | |
| 54 | + after(); | |
| 55 | + return r; | |
| 56 | +} | |
| 57 | + | |
| 58 | +std::string const& | |
| 59 | +ClosedFileInputSource::getName() const | |
| 60 | +{ | |
| 61 | + return this->m->filename; | |
| 62 | +} | |
| 63 | + | |
| 64 | +qpdf_offset_t | |
| 65 | +ClosedFileInputSource::tell() | |
| 66 | +{ | |
| 67 | + before(); | |
| 68 | + qpdf_offset_t r = this->m->fis->tell(); | |
| 69 | + after(); | |
| 70 | + return r; | |
| 71 | +} | |
| 72 | + | |
| 73 | +void | |
| 74 | +ClosedFileInputSource::seek(qpdf_offset_t offset, int whence) | |
| 75 | +{ | |
| 76 | + before(); | |
| 77 | + this->m->fis->seek(offset, whence); | |
| 78 | + after(); | |
| 79 | +} | |
| 80 | + | |
| 81 | +void | |
| 82 | +ClosedFileInputSource::rewind() | |
| 83 | +{ | |
| 84 | + this->m->offset = 0; | |
| 85 | +} | |
| 86 | + | |
| 87 | +size_t | |
| 88 | +ClosedFileInputSource::read(char* buffer, size_t length) | |
| 89 | +{ | |
| 90 | + before(); | |
| 91 | + size_t r = this->m->fis->read(buffer, length); | |
| 92 | + after(); | |
| 93 | + return r; | |
| 94 | +} | |
| 95 | + | |
| 96 | +void | |
| 97 | +ClosedFileInputSource::unreadCh(char ch) | |
| 98 | +{ | |
| 99 | + before(); | |
| 100 | + this->m->fis->unreadCh(ch); | |
| 101 | + // Don't call after -- the file has to stay open after this | |
| 102 | + // operation. | |
| 103 | +} | ... | ... |
libqpdf/build.mk
libtests/build.mk
libtests/closed_file_input_source.cc
0 → 100644
| 1 | +#include <qpdf/ClosedFileInputSource.hh> | |
| 2 | +#include <qpdf/FileInputSource.hh> | |
| 3 | + | |
| 4 | +#include <stdio.h> | |
| 5 | +#include <string.h> | |
| 6 | +#include <iostream> | |
| 7 | +#include <stdlib.h> | |
| 8 | + | |
| 9 | +void check(std::string const& what, bool result) | |
| 10 | +{ | |
| 11 | + if (! result) | |
| 12 | + { | |
| 13 | + std::cout << "FAIL: " << what << std::endl; | |
| 14 | + } | |
| 15 | +} | |
| 16 | + | |
| 17 | +void do_tests(InputSource* is) | |
| 18 | +{ | |
| 19 | + check("get name", "input" == is->getName()); | |
| 20 | + check("initial tell", 0 == is->tell()); | |
| 21 | + is->seek(11, SEEK_SET); | |
| 22 | + check("tell after SEEK_SET", 11 == is->tell()); | |
| 23 | + check("read offset 11", "Offset 11" == is->readLine(100)); | |
| 24 | + check("last offset after read 11", 11 == is->getLastOffset()); | |
| 25 | + check("tell after read", 21 == is->tell()); | |
| 26 | + is->findAndSkipNextEOL(); | |
| 27 | + check("tell after findAndSkipNextEOL", 522 == is->tell()); | |
| 28 | + is->unreadCh('Q'); | |
| 29 | + char b[1]; | |
| 30 | + b[0] = '\0'; | |
| 31 | + check("read unread character", 1 == is->read(b, 1)); | |
| 32 | + check("last offset after read unread", 521 == is->getLastOffset()); | |
| 33 | + check("got character", 'Q' == b[0]); | |
| 34 | + is->seek(0, SEEK_END); | |
| 35 | + check("tell at end", 556 == is->tell()); | |
| 36 | + is->seek(-25, SEEK_END); | |
| 37 | + check("tell before end", 531 == is->tell()); | |
| 38 | + check("last offset unchanged after seek", 521 == is->getLastOffset()); | |
| 39 | + is->seek(-9, SEEK_CUR); | |
| 40 | + check("tell after SEEK_CUR", 522 == is->tell()); | |
| 41 | + check("read offset 522", "9 before" == is->readLine(100)); | |
| 42 | + check("last offset after read", 522 == is->getLastOffset()); | |
| 43 | + is->rewind(); | |
| 44 | + check("last offset unchanged after rewind", 522 == is->getLastOffset()); | |
| 45 | + check("tell after rewind", 0 == is->tell()); | |
| 46 | + check("read offset at beginning", "!00000000?" == is->readLine(100)); | |
| 47 | + check("last offset after read 0", 0 == is->getLastOffset()); | |
| 48 | +} | |
| 49 | + | |
| 50 | +int main() | |
| 51 | +{ | |
| 52 | + // This test is designed to work with a specified input file. | |
| 53 | + std::cout << "testing with ClosedFileInputSource\n"; | |
| 54 | + ClosedFileInputSource cf("input"); | |
| 55 | + do_tests(&cf); | |
| 56 | + std::cout << "testing with FileInputSource\n"; | |
| 57 | + FileInputSource f; | |
| 58 | + f.setFilename("input"); | |
| 59 | + do_tests(&f); | |
| 60 | + std::cout << "all assertions passed" << std::endl; | |
| 61 | + return 0; | |
| 62 | +} | ... | ... |
libtests/qtest/closedfile.test
0 → 100644
| 1 | +#!/usr/bin/env perl | |
| 2 | +require 5.008; | |
| 3 | +use warnings; | |
| 4 | +use strict; | |
| 5 | + | |
| 6 | +chdir("closedfile") or die "chdir testdir failed: $!\n"; | |
| 7 | + | |
| 8 | +require TestDriver; | |
| 9 | + | |
| 10 | +my $td = new TestDriver('closed_file_input_source'); | |
| 11 | + | |
| 12 | +$td->runtest("closed file input source", | |
| 13 | + {$td->COMMAND => "closed_file_input_source"}, | |
| 14 | + {$td->FILE => "output", $td->EXIT_STATUS => 0}, | |
| 15 | + $td->NORMALIZE_NEWLINES); | |
| 16 | +$td->report(1); | ... | ... |
libtests/qtest/closedfile/input
0 → 100644
| 1 | +!00000000? | |
| 2 | +Offset 11 | |
| 3 | +wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww | |
| 4 | +9 before | |
| 5 | +This is 25 from the end. | ... | ... |
libtests/qtest/closedfile/output
0 → 100644