Commit 4ccc8b1a44109a913c87f31029c9c17db30ecc43

Authored by Jay Berkenbilt
1 parent 32ddcec9

Add ClosedFileInputSource

ClosedFileInputSource is an input source that keeps the file closed
when not reading it.
ChangeLog
1 2018-06-22 Jay Berkenbilt <ejb@ql.org> 1 2018-06-22 Jay Berkenbilt <ejb@ql.org>
2 2
  3 + * Add ClosedFileInputSource class, and input source that keeps its
  4 + input file closed when not reading it. At the expense of some
  5 + performance, this allows you to operate on many files without
  6 + opening too many files at the operating system level.
  7 +
3 * Add new option --preserved-unreferenced-resources, which 8 * Add new option --preserved-unreferenced-resources, which
4 suppresses removal of unreferenced objects from page resource 9 suppresses removal of unreferenced objects from page resource
5 dictionaries during page splitting operations. 10 dictionaries during page splitting operations.
include/qpdf/ClosedFileInputSource.hh 0 → 100644
  1 +// Copyright (c) 2005-2018 Jay Berkenbilt
  2 +//
  3 +// This file is part of qpdf.
  4 +//
  5 +// Licensed under the Apache License, Version 2.0 (the "License");
  6 +// you may not use this file except in compliance with the License.
  7 +// You may obtain a copy of the License at
  8 +//
  9 +// http://www.apache.org/licenses/LICENSE-2.0
  10 +//
  11 +// Unless required by applicable law or agreed to in writing, software
  12 +// distributed under the License is distributed on an "AS IS" BASIS,
  13 +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14 +// See the License for the specific language governing permissions and
  15 +// limitations under the License.
  16 +//
  17 +// Versions of qpdf prior to version 7 were released under the terms
  18 +// of version 2.0 of the Artistic License. At your option, you may
  19 +// continue to consider qpdf to be licensed under those terms. Please
  20 +// see the manual for additional information.
  21 +
  22 +#ifndef __QPDF_CLOSEDFILEINPUTSOURCE_HH__
  23 +#define __QPDF_CLOSEDFILEINPUTSOURCE_HH__
  24 +
  25 +// This is an input source that reads from files, like
  26 +// FileInputSource, except that it opens and close the file
  27 +// surrounding every operation. This decreases effiency, but it allows
  28 +// many more of these to exist at once than the maximum number of open
  29 +// file descriptors. This is used for merging large numbers of files.
  30 +
  31 +#include <qpdf/InputSource.hh>
  32 +#include <qpdf/PointerHolder.hh>
  33 +
  34 +class FileInputSource;
  35 +
  36 +class ClosedFileInputSource: public InputSource
  37 +{
  38 + public:
  39 + QPDF_DLL
  40 + ClosedFileInputSource(char const* filename);
  41 + QPDF_DLL
  42 + virtual ~ClosedFileInputSource();
  43 + QPDF_DLL
  44 + virtual qpdf_offset_t findAndSkipNextEOL();
  45 + QPDF_DLL
  46 + virtual std::string const& getName() const;
  47 + QPDF_DLL
  48 + virtual qpdf_offset_t tell();
  49 + QPDF_DLL
  50 + virtual void seek(qpdf_offset_t offset, int whence);
  51 + QPDF_DLL
  52 + virtual void rewind();
  53 + QPDF_DLL
  54 + virtual size_t read(char* buffer, size_t length);
  55 + QPDF_DLL
  56 + virtual void unreadCh(char ch);
  57 +
  58 + private:
  59 + ClosedFileInputSource(ClosedFileInputSource const&);
  60 + ClosedFileInputSource& operator=(ClosedFileInputSource const&);
  61 +
  62 + void before();
  63 + void after();
  64 +
  65 + class Members
  66 + {
  67 + friend class ClosedFileInputSource;
  68 +
  69 + public:
  70 + QPDF_DLL
  71 + ~Members();
  72 +
  73 + private:
  74 + Members(char const* filename);
  75 +
  76 + std::string filename;
  77 + qpdf_offset_t offset;
  78 + FileInputSource* fis;
  79 + };
  80 + PointerHolder<Members> m;
  81 +};
  82 +
  83 +#endif // __QPDF_CLOSEDFILEINPUTSOURCE_HH__
libqpdf/ClosedFileInputSource.cc 0 → 100644
  1 +#include <qpdf/ClosedFileInputSource.hh>
  2 +#include <qpdf/FileInputSource.hh>
  3 +
  4 +ClosedFileInputSource::Members::Members(char const* filename) :
  5 + filename(filename),
  6 + offset(0),
  7 + fis(0)
  8 +{
  9 +}
  10 +
  11 +ClosedFileInputSource::Members::~Members()
  12 +{
  13 + if (fis)
  14 + {
  15 + delete fis;
  16 + }
  17 +}
  18 +
  19 +ClosedFileInputSource::ClosedFileInputSource(char const* filename) :
  20 + m(new Members(filename))
  21 +{
  22 +}
  23 +
  24 +ClosedFileInputSource::~ClosedFileInputSource()
  25 +{
  26 +}
  27 +
  28 +void
  29 +ClosedFileInputSource::before()
  30 +{
  31 + if (0 == this->m->fis)
  32 + {
  33 + this->m->fis = new FileInputSource();
  34 + this->m->fis->setFilename(this->m->filename.c_str());
  35 + this->m->fis->seek(this->m->offset, SEEK_SET);
  36 + this->m->fis->setLastOffset(this->last_offset);
  37 + }
  38 +}
  39 +
  40 +void
  41 +ClosedFileInputSource::after()
  42 +{
  43 + this->last_offset = this->m->fis->getLastOffset();
  44 + this->m->offset = this->m->fis->tell();
  45 + delete this->m->fis;
  46 + this->m->fis = 0;
  47 +}
  48 +
  49 +qpdf_offset_t
  50 +ClosedFileInputSource::findAndSkipNextEOL()
  51 +{
  52 + before();
  53 + qpdf_offset_t r = this->m->fis->findAndSkipNextEOL();
  54 + after();
  55 + return r;
  56 +}
  57 +
  58 +std::string const&
  59 +ClosedFileInputSource::getName() const
  60 +{
  61 + return this->m->filename;
  62 +}
  63 +
  64 +qpdf_offset_t
  65 +ClosedFileInputSource::tell()
  66 +{
  67 + before();
  68 + qpdf_offset_t r = this->m->fis->tell();
  69 + after();
  70 + return r;
  71 +}
  72 +
  73 +void
  74 +ClosedFileInputSource::seek(qpdf_offset_t offset, int whence)
  75 +{
  76 + before();
  77 + this->m->fis->seek(offset, whence);
  78 + after();
  79 +}
  80 +
  81 +void
  82 +ClosedFileInputSource::rewind()
  83 +{
  84 + this->m->offset = 0;
  85 +}
  86 +
  87 +size_t
  88 +ClosedFileInputSource::read(char* buffer, size_t length)
  89 +{
  90 + before();
  91 + size_t r = this->m->fis->read(buffer, length);
  92 + after();
  93 + return r;
  94 +}
  95 +
  96 +void
  97 +ClosedFileInputSource::unreadCh(char ch)
  98 +{
  99 + before();
  100 + this->m->fis->unreadCh(ch);
  101 + // Don't call after -- the file has to stay open after this
  102 + // operation.
  103 +}
libqpdf/build.mk
@@ -9,6 +9,7 @@ SRCS_libqpdf = \ @@ -9,6 +9,7 @@ SRCS_libqpdf = \
9 libqpdf/BitWriter.cc \ 9 libqpdf/BitWriter.cc \
10 libqpdf/Buffer.cc \ 10 libqpdf/Buffer.cc \
11 libqpdf/BufferInputSource.cc \ 11 libqpdf/BufferInputSource.cc \
  12 + libqpdf/ClosedFileInputSource.cc \
12 libqpdf/ContentNormalizer.cc \ 13 libqpdf/ContentNormalizer.cc \
13 libqpdf/FileInputSource.cc \ 14 libqpdf/FileInputSource.cc \
14 libqpdf/InputSource.cc \ 15 libqpdf/InputSource.cc \
libtests/build.mk
@@ -3,6 +3,7 @@ BINS_libtests = \ @@ -3,6 +3,7 @@ BINS_libtests = \
3 ascii85 \ 3 ascii85 \
4 bits \ 4 bits \
5 buffer \ 5 buffer \
  6 + closed_file_input_source \
6 concatenate \ 7 concatenate \
7 dct_compress \ 8 dct_compress \
8 dct_uncompress \ 9 dct_uncompress \
libtests/closed_file_input_source.cc 0 → 100644
  1 +#include <qpdf/ClosedFileInputSource.hh>
  2 +#include <qpdf/FileInputSource.hh>
  3 +
  4 +#include <stdio.h>
  5 +#include <string.h>
  6 +#include <iostream>
  7 +#include <stdlib.h>
  8 +
  9 +void check(std::string const& what, bool result)
  10 +{
  11 + if (! result)
  12 + {
  13 + std::cout << "FAIL: " << what << std::endl;
  14 + }
  15 +}
  16 +
  17 +void do_tests(InputSource* is)
  18 +{
  19 + check("get name", "input" == is->getName());
  20 + check("initial tell", 0 == is->tell());
  21 + is->seek(11, SEEK_SET);
  22 + check("tell after SEEK_SET", 11 == is->tell());
  23 + check("read offset 11", "Offset 11" == is->readLine(100));
  24 + check("last offset after read 11", 11 == is->getLastOffset());
  25 + check("tell after read", 21 == is->tell());
  26 + is->findAndSkipNextEOL();
  27 + check("tell after findAndSkipNextEOL", 522 == is->tell());
  28 + is->unreadCh('Q');
  29 + char b[1];
  30 + b[0] = '\0';
  31 + check("read unread character", 1 == is->read(b, 1));
  32 + check("last offset after read unread", 521 == is->getLastOffset());
  33 + check("got character", 'Q' == b[0]);
  34 + is->seek(0, SEEK_END);
  35 + check("tell at end", 556 == is->tell());
  36 + is->seek(-25, SEEK_END);
  37 + check("tell before end", 531 == is->tell());
  38 + check("last offset unchanged after seek", 521 == is->getLastOffset());
  39 + is->seek(-9, SEEK_CUR);
  40 + check("tell after SEEK_CUR", 522 == is->tell());
  41 + check("read offset 522", "9 before" == is->readLine(100));
  42 + check("last offset after read", 522 == is->getLastOffset());
  43 + is->rewind();
  44 + check("last offset unchanged after rewind", 522 == is->getLastOffset());
  45 + check("tell after rewind", 0 == is->tell());
  46 + check("read offset at beginning", "!00000000?" == is->readLine(100));
  47 + check("last offset after read 0", 0 == is->getLastOffset());
  48 +}
  49 +
  50 +int main()
  51 +{
  52 + // This test is designed to work with a specified input file.
  53 + std::cout << "testing with ClosedFileInputSource\n";
  54 + ClosedFileInputSource cf("input");
  55 + do_tests(&cf);
  56 + std::cout << "testing with FileInputSource\n";
  57 + FileInputSource f;
  58 + f.setFilename("input");
  59 + do_tests(&f);
  60 + std::cout << "all assertions passed" << std::endl;
  61 + return 0;
  62 +}
libtests/qtest/closedfile.test 0 → 100644
  1 +#!/usr/bin/env perl
  2 +require 5.008;
  3 +use warnings;
  4 +use strict;
  5 +
  6 +chdir("closedfile") or die "chdir testdir failed: $!\n";
  7 +
  8 +require TestDriver;
  9 +
  10 +my $td = new TestDriver('closed_file_input_source');
  11 +
  12 +$td->runtest("closed file input source",
  13 + {$td->COMMAND => "closed_file_input_source"},
  14 + {$td->FILE => "output", $td->EXIT_STATUS => 0},
  15 + $td->NORMALIZE_NEWLINES);
  16 +$td->report(1);
libtests/qtest/closedfile/input 0 → 100644
  1 +!00000000?
  2 +Offset 11
  3 +wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww
  4 +9 before
  5 +This is 25 from the end.
libtests/qtest/closedfile/output 0 → 100644
  1 +testing with ClosedFileInputSource
  2 +testing with FileInputSource
  3 +all assertions passed