Commit 8318d81ada86d4ec8e343c47103932b6bbe45a42

Authored by Jay Berkenbilt
1 parent 781c3130

Fix and test support for files >= 4 GB

Makefile
... ... @@ -82,6 +82,7 @@ CLEAN_TARGETS = $(foreach B,$(BUILD_ITEMS),clean_$(B))
82 82 # For test suitse
83 83 export QPDF_BIN = $(abspath qpdf/$(OUTPUT_DIR)/qpdf)
84 84 export SKIP_TEST_COMPARE_IMAGES
  85 +export LARGE_FILE_TEST_PATH
85 86  
86 87 clean:: $(CLEAN_TARGETS)
87 88  
... ...
... ... @@ -15,32 +15,14 @@ Next
15 15  
16 16 * Testing for files > 4GB
17 17  
18   - - Create a PDF from scratch. Each page has a page number as text
19   - and an image. The image can be 5000x5000 pixels using 8-bit
20   - gray scale. It will be divided into 10 stripes of 500 pixels
21   - each. The left and right 500 pixels of each stripe will
22   - alternate black and white. The remaining part of the image will
23   - have white stripes indicating 1 and black stripes indicating 0
24   - with the most-significant bit on top to indicate the page
25   - number. In this way, every page will be unique and will consume
26   - approximately 25 megabytes. Creating 200 pages like this will
27   - make a file that is 5 GB.
28   -
29   - - The file will have to have object streams since a regular xref
30   - table won't be able to support offsets that large.
31   -
32   - - A separate test program can create this file and do various
33   - manipulations on it. This can be enabled with an environment
34   - variable controlled by configure in much the same way image
35   - comparison tests are enabled now. The argument to
36   - --enable-large-file-test should be a path that has enough disk
37   - space to do the tests, probably enough space for two coipes of
38   - the file. The test program should also have an interactive mode
39   - so we can generate the large file and then look at it with a
40   - PDF viewer like Adobe Reader. The test suite should actually
41   - read the file back in and look at all the page and stream
42   - contents to make sure the file is really correct. We need to
43   - test normal writing and linearization.
  18 + The large file test can be enabled with an environment variable
  19 + controlled by configure in much the same way image comparison tests
  20 + are enabled now. The argument to --width-large-file-test should be
  21 + a path that has enough disk space to do the tests, probably enough
  22 + space for two copies of the file.
  23 +
  24 + The tests will take a very long time (possibly hours) to run, so we
  25 + will run them infrequently.
44 26  
45 27  
46 28 Soon
... ...
autoconf.mk.in
... ... @@ -36,3 +36,5 @@ SKIP_TEST_COMPARE_IMAGES=@SKIP_TEST_COMPARE_IMAGES@
36 36 BUILDRULES=@BUILDRULES@
37 37 HAVE_LD_VERSION_SCRIPT=@HAVE_LD_VERSION_SCRIPT@
38 38 WINDOWS_WORDSIZE=@WINDOWS_WORDSIZE@
  39 +SHOW_FAILED_TEST_OUTPUT=@SHOW_FAILED_TEST_OUTPUT@
  40 +LARGE_FILE_TEST_PATH=@LARGE_FILE_TEST_PATH@
... ...
configure.ac
... ... @@ -54,6 +54,14 @@ if test "$BUILD_INTERNAL_LIBS" = "0"; then
54 54 AC_SEARCH_LIBS(pcre_compile,pcre,,[MISSING_PCRE=1; MISSING_ANY=1])
55 55 fi
56 56  
  57 +LARGE_FILE_TEST_PATH=
  58 +AC_SUBST(LARGE_FILE_TEST_PATH)
  59 +AC_ARG_WITH(large-file-test-path,
  60 + AS_HELP_STRING([--with-large-file-test-path=path],
  61 + [To enable testing of files > 4GB, give the path to a directory with at least 11 GB free. The test suite will write temporary files to this directory. Alternatively, just set the LARGE_FILE_TEST_PATH environment variable to the path before running the test suite.]),
  62 + [LARGE_FILE_TEST_PATH=$withval],
  63 + [LARGE_FILE_TEST_PATH=])
  64 +
57 65 AC_SYS_LARGEFILE
58 66 AC_FUNC_FSEEKO
59 67 AC_TYPE_UINT16_T
... ...
include/qpdf/QPDF.hh
... ... @@ -337,7 +337,7 @@ class QPDF
337 337  
338 338 QPDF_DLL
339 339 void generateHintStream(std::map<int, QPDFXRefEntry> const& xref,
340   - std::map<int, size_t> const& lengths,
  340 + std::map<int, qpdf_offset_t> const& lengths,
341 341 std::map<int, int> const& obj_renumber,
342 342 PointerHolder<Buffer>& hint_stream,
343 343 int& S, int& O);
... ... @@ -531,8 +531,9 @@ class QPDF
531 531 void reconstruct_xref(QPDFExc& e);
532 532 qpdf_offset_t read_xrefTable(qpdf_offset_t offset);
533 533 qpdf_offset_t read_xrefStream(qpdf_offset_t offset);
534   - int processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream);
535   - void insertXrefEntry(int obj, int f0, int f1, int f2,
  534 + qpdf_offset_t processXRefStream(
  535 + qpdf_offset_t offset, QPDFObjectHandle& xref_stream);
  536 + void insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2,
536 537 bool overwrite = false);
537 538 void setLastObjectDescription(std::string const& description,
538 539 int objid, int generation);
... ... @@ -609,13 +610,13 @@ class QPDF
609 610 }
610 611  
611 612 int delta_nobjects; // 1
612   - int delta_page_length; // 2
  613 + qpdf_offset_t delta_page_length; // 2
613 614 int nshared_objects; // 3
614 615 // vectors' sizes = nshared_objects
615 616 std::vector<int> shared_identifiers; // 4
616 617 std::vector<int> shared_numerators; // 5
617   - int delta_content_offset; // 6
618   - int delta_content_length; // 7
  618 + qpdf_offset_t delta_content_offset; // 6
  619 + qpdf_offset_t delta_content_length; // 7
619 620 };
620 621  
621 622 // PDF 1.4: Table F.3
... ... @@ -639,7 +640,7 @@ class QPDF
639 640 }
640 641  
641 642 int min_nobjects; // 1
642   - int first_page_offset; // 2
  643 + qpdf_offset_t first_page_offset; // 2
643 644 int nbits_delta_nobjects; // 3
644 645 int min_page_length; // 4
645 646 int nbits_delta_page_length; // 5
... ... @@ -686,7 +687,7 @@ class QPDF
686 687 }
687 688  
688 689 int first_shared_obj; // 1
689   - int first_shared_offset; // 2
  690 + qpdf_offset_t first_shared_offset; // 2
690 691 int nshared_first_page; // 3
691 692 int nshared_total; // 4
692 693 int nbits_nobjects; // 5
... ... @@ -708,7 +709,7 @@ class QPDF
708 709 }
709 710  
710 711 int first_object; // 1
711   - int first_object_offset; // 2
  712 + qpdf_offset_t first_object_offset; // 2
712 713 int nobjects; // 3
713 714 int group_length; // 4
714 715 };
... ... @@ -730,14 +731,14 @@ class QPDF
730 731 {
731 732 }
732 733  
733   - int file_size; // /L
734   - int first_page_object; // /O
735   - int first_page_end; // /E
736   - int npages; // /N
737   - int xref_zero_offset; // /T
738   - int first_page; // /P
739   - int H_offset; // offset of primary hint stream
740   - int H_length; // length of primary hint stream
  734 + qpdf_offset_t file_size; // /L
  735 + int first_page_object; // /O
  736 + qpdf_offset_t first_page_end; // /E
  737 + int npages; // /N
  738 + qpdf_offset_t xref_zero_offset; // /T
  739 + int first_page; // /P
  740 + qpdf_offset_t H_offset; // offset of primary hint stream
  741 + qpdf_offset_t H_length; // length of primary hint stream
741 742 };
742 743  
743 744 // Computed hint table value data structures. These tables
... ... @@ -851,7 +852,7 @@ class QPDF
851 852 void readHSharedObject(BitStream);
852 853 void readHGeneric(BitStream, HGeneric&);
853 854 int maxEnd(ObjUser const& ou);
854   - int getLinearizationOffset(ObjGen const&);
  855 + qpdf_offset_t getLinearizationOffset(ObjGen const&);
855 856 QPDFObjectHandle getUncompressedObject(
856 857 QPDFObjectHandle&, std::map<int, int> const& object_stream_data);
857 858 int lengthNextN(int first_object, int n,
... ... @@ -878,19 +879,19 @@ class QPDF
878 879 std::map<int, int> const& object_stream_data);
879 880 int outputLengthNextN(
880 881 int in_object, int n,
881   - std::map<int, size_t> const& lengths,
  882 + std::map<int, qpdf_offset_t> const& lengths,
882 883 std::map<int, int> const& obj_renumber);
883 884 void calculateHPageOffset(
884 885 std::map<int, QPDFXRefEntry> const& xref,
885   - std::map<int, size_t> const& lengths,
  886 + std::map<int, qpdf_offset_t> const& lengths,
886 887 std::map<int, int> const& obj_renumber);
887 888 void calculateHSharedObject(
888 889 std::map<int, QPDFXRefEntry> const& xref,
889   - std::map<int, size_t> const& lengths,
  890 + std::map<int, qpdf_offset_t> const& lengths,
890 891 std::map<int, int> const& obj_renumber);
891 892 void calculateHOutline(
892 893 std::map<int, QPDFXRefEntry> const& xref,
893   - std::map<int, size_t> const& lengths,
  894 + std::map<int, qpdf_offset_t> const& lengths,
894 895 std::map<int, int> const& obj_renumber);
895 896 void writeHPageOffset(BitWriter&);
896 897 void writeHSharedObject(BitWriter&);
... ... @@ -942,7 +943,7 @@ class QPDF
942 943 std::vector<QPDFExc> warnings;
943 944  
944 945 // Linearization data
945   - int first_xref_item_offset; // actual value from file
  946 + qpdf_offset_t first_xref_item_offset; // actual value from file
946 947 bool uncompressed_after_compressed;
947 948  
948 949 // Linearization parameter dictionary and hint table data: may be
... ...
include/qpdf/QPDFWriter.hh
... ... @@ -212,8 +212,8 @@ class QPDFWriter
212 212 enum trailer_e { t_normal, t_lin_first, t_lin_second };
213 213  
214 214 void init();
215   - int bytesNeeded(unsigned long n);
216   - void writeBinary(unsigned long val, unsigned int bytes);
  215 + int bytesNeeded(unsigned long long n);
  216 + void writeBinary(unsigned long long val, unsigned int bytes);
217 217 void writeString(std::string const& str);
218 218 void writeBuffer(PointerHolder<Buffer>&);
219 219 void writeStringQDF(std::string const& str);
... ... @@ -226,7 +226,7 @@ class QPDFWriter
226 226 void writeObjectStream(QPDFObjectHandle object);
227 227 void writeObject(QPDFObjectHandle object, int object_stream_index = -1);
228 228 void writeTrailer(trailer_e which, int size,
229   - bool xref_stream, int prev = 0);
  229 + bool xref_stream, qpdf_offset_t prev = 0);
230 230 void unparseObject(QPDFObjectHandle object, int level,
231 231 unsigned int flags);
232 232 void unparseObject(QPDFObjectHandle object, int level,
... ... @@ -263,24 +263,28 @@ class QPDFWriter
263 263 void writeEncryptionDictionary();
264 264 void writeHeader();
265 265 void writeHintStream(int hint_id);
266   - int writeXRefTable(trailer_e which, int first, int last, int size);
267   - int writeXRefTable(trailer_e which, int first, int last, int size,
268   - // for linearization
269   - int prev,
270   - bool suppress_offsets,
271   - int hint_id,
272   - qpdf_offset_t hint_offset,
273   - qpdf_offset_t hint_length);
274   - int writeXRefStream(int objid, int max_id, int max_offset,
275   - trailer_e which, int first, int last, int size);
276   - int writeXRefStream(int objid, int max_id, int max_offset,
277   - trailer_e which, int first, int last, int size,
278   - // for linearization
279   - int prev,
280   - int hint_id,
281   - qpdf_offset_t hint_offset,
282   - qpdf_offset_t hint_length,
283   - bool skip_compression);
  266 + qpdf_offset_t writeXRefTable(
  267 + trailer_e which, int first, int last, int size);
  268 + qpdf_offset_t writeXRefTable(
  269 + trailer_e which, int first, int last, int size,
  270 + // for linearization
  271 + qpdf_offset_t prev,
  272 + bool suppress_offsets,
  273 + int hint_id,
  274 + qpdf_offset_t hint_offset,
  275 + qpdf_offset_t hint_length);
  276 + qpdf_offset_t writeXRefStream(
  277 + int objid, int max_id, qpdf_offset_t max_offset,
  278 + trailer_e which, int first, int last, int size);
  279 + qpdf_offset_t writeXRefStream(
  280 + int objid, int max_id, qpdf_offset_t max_offset,
  281 + trailer_e which, int first, int last, int size,
  282 + // for linearization
  283 + qpdf_offset_t prev,
  284 + int hint_id,
  285 + qpdf_offset_t hint_offset,
  286 + qpdf_offset_t hint_length,
  287 + bool skip_compression);
284 288 int calculateXrefStreamPadding(int xref_bytes);
285 289  
286 290 // When filtering subsections, push additional pipelines to the
... ... @@ -336,7 +340,7 @@ class QPDFWriter
336 340 std::list<QPDFObjectHandle> object_queue;
337 341 std::map<int, int> obj_renumber;
338 342 std::map<int, QPDFXRefEntry> xref;
339   - std::map<int, size_t> lengths;
  343 + std::map<int, qpdf_offset_t> lengths;
340 344 int next_objid;
341 345 int cur_stream_length_id;
342 346 size_t cur_stream_length;
... ...
include/qpdf/QPDFXRefEntry.hh
... ... @@ -28,9 +28,9 @@ class QPDFXRefEntry
28 28 QPDF_DLL
29 29 int getType() const;
30 30 QPDF_DLL
31   - qpdf_offset_t getOffset() const; // only for type 1
  31 + qpdf_offset_t getOffset() const; // only for type 1
32 32 QPDF_DLL
33   - int getObjStreamNumber() const; // only for type 2
  33 + int getObjStreamNumber() const; // only for type 2
34 34 QPDF_DLL
35 35 int getObjStreamIndex() const; // only for type 2
36 36  
... ...
include/qpdf/qpdf-c.h
... ... @@ -154,7 +154,7 @@ extern &quot;C&quot; {
154 154 QPDF_DLL
155 155 char const* qpdf_get_error_filename(qpdf_data q, qpdf_error e);
156 156 QPDF_DLL
157   - unsigned long qpdf_get_error_file_position(qpdf_data q, qpdf_error e);
  157 + unsigned long long qpdf_get_error_file_position(qpdf_data q, qpdf_error e);
158 158 QPDF_DLL
159 159 char const* qpdf_get_error_message_detail(qpdf_data q, qpdf_error e);
160 160  
... ... @@ -195,7 +195,7 @@ extern &quot;C&quot; {
195 195 QPDF_ERROR_CODE qpdf_read_memory(qpdf_data qpdf,
196 196 char const* description,
197 197 char const* buffer,
198   - unsigned long size,
  198 + unsigned long long size,
199 199 char const* password);
200 200  
201 201 /* Read functions below must be called after qpdf_read or
... ...
libqpdf/BitStream.cc
... ... @@ -19,7 +19,7 @@ BitStream::reset()
19 19 bits_available = 8 * nbytes;
20 20 }
21 21  
22   -unsigned long
  22 +unsigned long long
23 23 BitStream::getBits(int nbits)
24 24 {
25 25 return read_bits(this->p, this->bit_offset,
... ...
libqpdf/BitWriter.cc
... ... @@ -12,7 +12,7 @@ BitWriter::BitWriter(Pipeline* pl) :
12 12 }
13 13  
14 14 void
15   -BitWriter::writeBits(unsigned long val, unsigned int bits)
  15 +BitWriter::writeBits(unsigned long long val, unsigned int bits)
16 16 {
17 17 write_bits(this->ch, this->bit_offset, val, bits, this->pl);
18 18 }
... ...
libqpdf/QPDF.cc
... ... @@ -571,7 +571,7 @@ QPDF::reconstruct_xref(QPDFExc&amp; e)
571 571 in_obj = true;
572 572 int obj = atoi(m.getMatch(1).c_str());
573 573 int gen = atoi(m.getMatch(2).c_str());
574   - int offset = this->file->getLastOffset();
  574 + qpdf_offset_t offset = this->file->getLastOffset();
575 575 insertXrefEntry(obj, 1, offset, gen, true);
576 576 }
577 577 else if ((! this->trailer.isInitialized()) &&
... ... @@ -634,6 +634,11 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
634 634 }
635 635 }
636 636  
  637 + if (! this->trailer.isInitialized())
  638 + {
  639 + throw QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), "", 0,
  640 + "unable to find trailer while reading xref");
  641 + }
637 642 int size = this->trailer.getKey("/Size").getIntValue();
638 643 int max_obj = 0;
639 644 if (! xref_table.empty())
... ... @@ -704,7 +709,8 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
704 709 QUtil::int_to_string(i) + ")");
705 710 }
706 711  
707   - int f1 = atoi(m2.getMatch(1).c_str());
  712 + // For xref_table, these will always be small enough to be ints
  713 + qpdf_offset_t f1 = QUtil::string_to_ll(m2.getMatch(1).c_str());
708 714 int f2 = atoi(m2.getMatch(2).c_str());
709 715 char type = m2.getMatch(3)[0];
710 716 if (type == 'f')
... ... @@ -855,7 +861,7 @@ QPDF::read_xrefStream(qpdf_offset_t xref_offset)
855 861 return xref_offset;
856 862 }
857 863  
858   -int
  864 +qpdf_offset_t
859 865 QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj)
860 866 {
861 867 QPDFObjectHandle dict = xref_obj.getDict();
... ... @@ -957,7 +963,7 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xref_obj)
957 963 {
958 964 // Read this entry
959 965 unsigned char const* entry = data + (entry_size * i);
960   - int fields[3];
  966 + qpdf_offset_t fields[3];
961 967 unsigned char const* p = entry;
962 968 for (int j = 0; j < 3; ++j)
963 969 {
... ... @@ -1002,7 +1008,7 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xref_obj)
1002 1008 // This is needed by checkLinearization()
1003 1009 this->first_xref_item_offset = xref_offset;
1004 1010 }
1005   - insertXrefEntry(obj, fields[0], fields[1], fields[2]);
  1011 + insertXrefEntry(obj, (int)fields[0], fields[1], (int)fields[2]);
1006 1012 }
1007 1013  
1008 1014 if (! this->trailer.isInitialized())
... ... @@ -1031,7 +1037,7 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle&amp; xref_obj)
1031 1037 }
1032 1038  
1033 1039 void
1034   -QPDF::insertXrefEntry(int obj, int f0, int f1, int f2, bool overwrite)
  1040 +QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2, bool overwrite)
1035 1041 {
1036 1042 // Populate the xref table in such a way that the first reference
1037 1043 // to an object that we see, which is the one in the latest xref
... ... @@ -1558,7 +1564,7 @@ QPDF::recoverStreamLength(PointerHolder&lt;InputSource&gt; input,
1558 1564 QPDFXRefEntry const& entry = (*iter).second;
1559 1565 if (entry.getType() == 1)
1560 1566 {
1561   - int obj_offset = entry.getOffset();
  1567 + qpdf_offset_t obj_offset = entry.getOffset();
1562 1568 if ((obj_offset > stream_offset) &&
1563 1569 ((this_obj_offset == 0) ||
1564 1570 (this_obj_offset > obj_offset)))
... ...
libqpdf/QPDFWriter.cc
... ... @@ -540,7 +540,7 @@ QPDFWriter::setDataKey(int objid)
540 540 }
541 541  
542 542 int
543   -QPDFWriter::bytesNeeded(unsigned long n)
  543 +QPDFWriter::bytesNeeded(unsigned long long n)
544 544 {
545 545 int bytes = 0;
546 546 while (n)
... ... @@ -552,10 +552,10 @@ QPDFWriter::bytesNeeded(unsigned long n)
552 552 }
553 553  
554 554 void
555   -QPDFWriter::writeBinary(unsigned long val, unsigned int bytes)
  555 +QPDFWriter::writeBinary(unsigned long long val, unsigned int bytes)
556 556 {
557   - assert(bytes <= sizeof(unsigned long));
558   - unsigned char data[sizeof(unsigned long)];
  557 + assert(bytes <= sizeof(unsigned long long));
  558 + unsigned char data[sizeof(unsigned long long)];
559 559 for (unsigned int i = 0; i < bytes; ++i)
560 560 {
561 561 data[bytes - i - 1] = (unsigned char)(val & 0xff);
... ... @@ -849,7 +849,8 @@ QPDFWriter::unparseChild(QPDFObjectHandle child, int level, int flags)
849 849 }
850 850  
851 851 void
852   -QPDFWriter::writeTrailer(trailer_e which, int size, bool xref_stream, int prev)
  852 +QPDFWriter::writeTrailer(trailer_e which, int size, bool xref_stream,
  853 + qpdf_offset_t prev)
853 854 {
854 855 QPDFObjectHandle trailer = pdf.getTrailer();
855 856 if (! xref_stream)
... ... @@ -1812,15 +1813,15 @@ QPDFWriter::writeHintStream(int hint_id)
1812 1813 closeObject(hint_id);
1813 1814 }
1814 1815  
1815   -int
  1816 +qpdf_offset_t
1816 1817 QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size)
1817 1818 {
1818 1819 return writeXRefTable(which, first, last, size, 0, false, 0, 0, 0);
1819 1820 }
1820 1821  
1821   -int
  1822 +qpdf_offset_t
1822 1823 QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size,
1823   - int prev, bool suppress_offsets,
  1824 + qpdf_offset_t prev, bool suppress_offsets,
1824 1825 int hint_id, qpdf_offset_t hint_offset,
1825 1826 qpdf_offset_t hint_length)
1826 1827 {
... ... @@ -1838,7 +1839,7 @@ QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size,
1838 1839 }
1839 1840 else
1840 1841 {
1841   - int offset = 0;
  1842 + qpdf_offset_t offset = 0;
1842 1843 if (! suppress_offsets)
1843 1844 {
1844 1845 offset = this->xref[i].getOffset();
... ... @@ -1858,24 +1859,24 @@ QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size,
1858 1859 return space_before_zero;
1859 1860 }
1860 1861  
1861   -int
1862   -QPDFWriter::writeXRefStream(int objid, int max_id, int max_offset,
  1862 +qpdf_offset_t
  1863 +QPDFWriter::writeXRefStream(int objid, int max_id, qpdf_offset_t max_offset,
1863 1864 trailer_e which, int first, int last, int size)
1864 1865 {
1865 1866 return writeXRefStream(objid, max_id, max_offset,
1866 1867 which, first, last, size, 0, 0, 0, 0, false);
1867 1868 }
1868 1869  
1869   -int
1870   -QPDFWriter::writeXRefStream(int xref_id, int max_id, int max_offset,
  1870 +qpdf_offset_t
  1871 +QPDFWriter::writeXRefStream(int xref_id, int max_id, qpdf_offset_t max_offset,
1871 1872 trailer_e which, int first, int last, int size,
1872   - int prev, int hint_id,
  1873 + qpdf_offset_t prev, int hint_id,
1873 1874 qpdf_offset_t hint_offset,
1874 1875 qpdf_offset_t hint_length,
1875 1876 bool skip_compression)
1876 1877 {
1877 1878 qpdf_offset_t xref_offset = this->pipeline->getCount();
1878   - int space_before_zero = xref_offset - 1;
  1879 + qpdf_offset_t space_before_zero = xref_offset - 1;
1879 1880  
1880 1881 // field 1 contains offsets and object stream identifiers
1881 1882 int f1_size = std::max(bytesNeeded(max_offset),
... ... @@ -1921,7 +1922,7 @@ QPDFWriter::writeXRefStream(int xref_id, int max_id, int max_offset,
1921 1922  
1922 1923 case 1:
1923 1924 {
1924   - int offset = e.getOffset();
  1925 + qpdf_offset_t offset = e.getOffset();
1925 1926 if ((hint_id != 0) &&
1926 1927 (i != hint_id) &&
1927 1928 (offset >= hint_offset))
... ... @@ -2309,7 +2310,7 @@ QPDFWriter::writeLinearized()
2309 2310  
2310 2311 // Save hint offset since it will be set to zero by
2311 2312 // calling openObject.
2312   - int hint_offset = this->xref[hint_id].getOffset();
  2313 + qpdf_offset_t hint_offset = this->xref[hint_id].getOffset();
2313 2314  
2314 2315 // Write hint stream to a buffer
2315 2316 pushPipeline(new Pl_Buffer("hint buffer"));
... ...
libqpdf/QPDF_linearization.cc
... ... @@ -18,10 +18,10 @@
18 18 #include <math.h>
19 19 #include <string.h>
20 20  
21   -template <class T>
  21 +template <class T, class int_type>
22 22 static void
23 23 load_vector_int(BitStream& bit_stream, int nitems, std::vector<T>& vec,
24   - int bits_wanted, int T::*field)
  24 + int bits_wanted, int_type T::*field)
25 25 {
26 26 // nitems times, read bits_wanted from the given bit stream,
27 27 // storing results in the ith vector entry.
... ... @@ -144,7 +144,7 @@ QPDF::isLinearized()
144 144 QPDFObjectHandle L = candidate.getKey("/L");
145 145 if (L.isInteger())
146 146 {
147   - int Li = L.getIntValue();
  147 + qpdf_offset_t Li = L.getIntValue();
148 148 this->file->seek(0, SEEK_END);
149 149 if (Li != this->file->tell())
150 150 {
... ... @@ -649,11 +649,11 @@ QPDF::maxEnd(ObjUser const&amp; ou)
649 649 return end;
650 650 }
651 651  
652   -int
  652 +qpdf_offset_t
653 653 QPDF::getLinearizationOffset(ObjGen const& og)
654 654 {
655 655 QPDFXRefEntry entry = this->xref_table[og];
656   - int result = 0;
  656 + qpdf_offset_t result = 0;
657 657 switch (entry.getType())
658 658 {
659 659 case 1:
... ... @@ -1787,7 +1787,7 @@ static inline int nbits(int val)
1787 1787 int
1788 1788 QPDF::outputLengthNextN(
1789 1789 int in_object, int n,
1790   - std::map<int, size_t> const& lengths,
  1790 + std::map<int, qpdf_offset_t> const& lengths,
1791 1791 std::map<int, int> const& obj_renumber)
1792 1792 {
1793 1793 // Figure out the length of a series of n consecutive objects in
... ... @@ -1808,7 +1808,7 @@ QPDF::outputLengthNextN(
1808 1808 void
1809 1809 QPDF::calculateHPageOffset(
1810 1810 std::map<int, QPDFXRefEntry> const& xref,
1811   - std::map<int, size_t> const& lengths,
  1811 + std::map<int, qpdf_offset_t> const& lengths,
1812 1812 std::map<int, int> const& obj_renumber)
1813 1813 {
1814 1814 // Page Offset Hint Table
... ... @@ -1900,7 +1900,7 @@ QPDF::calculateHPageOffset(
1900 1900 void
1901 1901 QPDF::calculateHSharedObject(
1902 1902 std::map<int, QPDFXRefEntry> const& xref,
1903   - std::map<int, size_t> const& lengths,
  1903 + std::map<int, qpdf_offset_t> const& lengths,
1904 1904 std::map<int, int> const& obj_renumber)
1905 1905 {
1906 1906 CHSharedObject& cso = this->c_shared_object_data;
... ... @@ -1946,7 +1946,7 @@ QPDF::calculateHSharedObject(
1946 1946 void
1947 1947 QPDF::calculateHOutline(
1948 1948 std::map<int, QPDFXRefEntry> const& xref,
1949   - std::map<int, size_t> const& lengths,
  1949 + std::map<int, qpdf_offset_t> const& lengths,
1950 1950 std::map<int, int> const& obj_renumber)
1951 1951 {
1952 1952 HGeneric& cho = this->c_outline_data;
... ... @@ -1967,10 +1967,10 @@ QPDF::calculateHOutline(
1967 1967 cho.first_object, ho.nobjects, lengths, obj_renumber);
1968 1968 }
1969 1969  
1970   -template <class T>
  1970 +template <class T, class int_type>
1971 1971 static void
1972 1972 write_vector_int(BitWriter& w, int nitems, std::vector<T>& vec,
1973   - int bits, int T::*field)
  1973 + int bits, int_type T::*field)
1974 1974 {
1975 1975 // nitems times, write bits bits from the given field of the ith
1976 1976 // vector to the given bit writer.
... ... @@ -2095,7 +2095,7 @@ QPDF::writeHGeneric(BitWriter&amp; w, HGeneric&amp; t)
2095 2095  
2096 2096 void
2097 2097 QPDF::generateHintStream(std::map<int, QPDFXRefEntry> const& xref,
2098   - std::map<int, size_t> const& lengths,
  2098 + std::map<int, qpdf_offset_t> const& lengths,
2099 2099 std::map<int, int> const& obj_renumber,
2100 2100 PointerHolder<Buffer>& hint_buffer,
2101 2101 int& S, int& O)
... ...
libqpdf/bits.icc
... ... @@ -15,7 +15,7 @@
15 15 // this code includes with the symbol defined.
16 16  
17 17 #ifdef BITS_READ
18   -static unsigned long
  18 +static unsigned long long
19 19 read_bits(unsigned char const*& p, unsigned int& bit_offset,
20 20 unsigned int& bits_available, unsigned int bits_wanted)
21 21 {
... ... @@ -95,7 +95,7 @@ read_bits(unsigned char const*&amp; p, unsigned int&amp; bit_offset,
95 95 #ifdef BITS_WRITE
96 96 static void
97 97 write_bits(unsigned char& ch, unsigned int& bit_offset,
98   - unsigned long val, unsigned int bits, Pipeline* pipeline)
  98 + unsigned long long val, unsigned int bits, Pipeline* pipeline)
99 99 {
100 100 if (bits > 32)
101 101 {
... ...
libqpdf/qpdf-c.cc
... ... @@ -31,7 +31,7 @@ struct _qpdf_data
31 31 // Parameters for functions we call
32 32 char const* filename; // or description
33 33 char const* buffer;
34   - unsigned long size;
  34 + unsigned long long size;
35 35 char const* password;
36 36 bool write_memory;
37 37 Buffer* output_buffer;
... ... @@ -218,7 +218,7 @@ char const* qpdf_get_error_filename(qpdf_data qpdf, qpdf_error e)
218 218 return e->exc->getFilename().c_str();
219 219 }
220 220  
221   -unsigned long qpdf_get_error_file_position(qpdf_data qpdf, qpdf_error e)
  221 +unsigned long long qpdf_get_error_file_position(qpdf_data qpdf, qpdf_error e)
222 222 {
223 223 if (e == 0)
224 224 {
... ... @@ -268,7 +268,7 @@ QPDF_ERROR_CODE qpdf_read(qpdf_data qpdf, char const* filename,
268 268 QPDF_ERROR_CODE qpdf_read_memory(qpdf_data qpdf,
269 269 char const* description,
270 270 char const* buffer,
271   - unsigned long size,
  271 + unsigned long long size,
272 272 char const* password)
273 273 {
274 274 QPDF_ERROR_CODE status = QPDF_SUCCESS;
... ...
libqpdf/qpdf/BitStream.hh
... ... @@ -13,7 +13,7 @@ class BitStream
13 13 QPDF_DLL
14 14 void reset();
15 15 QPDF_DLL
16   - unsigned long getBits(int nbits);
  16 + unsigned long long getBits(int nbits);
17 17 QPDF_DLL
18 18 void skipToNextByte();
19 19  
... ...
libqpdf/qpdf/BitWriter.hh
... ... @@ -15,7 +15,7 @@ class BitWriter
15 15 QPDF_DLL
16 16 BitWriter(Pipeline* pl);
17 17 QPDF_DLL
18   - void writeBits(unsigned long val, unsigned int bits);
  18 + void writeBits(unsigned long long val, unsigned int bits);
19 19 // Force any partial byte to be written to the pipeline.
20 20 QPDF_DLL
21 21 void flush();
... ...
qpdf/build.mk
1   -BINS_qpdf = qpdf test_driver pdf_from_scratch
  1 +BINS_qpdf = qpdf test_driver pdf_from_scratch test_large_file
2 2 CBINS_qpdf = qpdf-ctest
3 3  
4 4 TARGETS_qpdf = $(foreach B,$(BINS_qpdf) $(CBINS_qpdf),qpdf/$(OUTPUT_DIR)/$(call binname,$(B)))
... ...
qpdf/qpdf-ctest.c
... ... @@ -17,7 +17,10 @@ static void report_errors()
17 17 printf("warning: %s\n", qpdf_get_error_full_text(qpdf, e));
18 18 printf(" code: %d\n", qpdf_get_error_code(qpdf, e));
19 19 printf(" file: %s\n", qpdf_get_error_filename(qpdf, e));
20   - printf(" pos : %ld\n", qpdf_get_error_file_position(qpdf, e));
  20 + /* If your compiler doesn't support %lld, change to %ld and
  21 + * lose precision in the error message.
  22 + */
  23 + printf(" pos : %lld\n", qpdf_get_error_file_position(qpdf, e));
21 24 printf(" text: %s\n", qpdf_get_error_message_detail(qpdf, e));
22 25 }
23 26 if (qpdf_has_error(qpdf))
... ... @@ -27,7 +30,8 @@ static void report_errors()
27 30 printf("error: %s\n", qpdf_get_error_full_text(qpdf, e));
28 31 printf(" code: %d\n", qpdf_get_error_code(qpdf, e));
29 32 printf(" file: %s\n", qpdf_get_error_filename(qpdf, e));
30   - printf(" pos : %ld\n", qpdf_get_error_file_position(qpdf, e));
  33 + /* see above comment about %lld */
  34 + printf(" pos : %lld\n", qpdf_get_error_file_position(qpdf, e));
31 35 printf(" text: %s\n", qpdf_get_error_message_detail(qpdf, e));
32 36 }
33 37 else
... ...
qpdf/qtest/qpdf.test
... ... @@ -21,6 +21,7 @@ if ((exists $ENV{&#39;SKIP_TEST_COMPARE_IMAGES&#39;}) &amp;&amp;
21 21 {
22 22 $compare_images = 0;
23 23 }
  24 +my $large_file_test_path = $ENV{'LARGE_FILE_TEST_PATH'} || undef;
24 25  
25 26 my $have_acroread = 0;
26 27  
... ... @@ -1447,8 +1448,114 @@ for (my $n = 1; $n &lt;= 2; ++$n)
1447 1448 }
1448 1449  
1449 1450 show_ntests();
  1451 +# ----------
  1452 +$td->notify("--- Large File Tests ---");
  1453 +my $nlarge = 1;
  1454 +if (defined $large_file_test_path)
  1455 +{
  1456 + $nlarge = 2;
  1457 +}
  1458 +else
  1459 +{
  1460 + $td->notify("--- Skipping tests on actual large files ---");
  1461 +}
  1462 +$n_tests += $nlarge * 13;
  1463 +for (my $large = 0; $large < $nlarge; ++$large)
  1464 +{
  1465 + if ($large)
  1466 + {
  1467 + $td->notify("--- Running tests on actual large files ---");
  1468 + }
  1469 + else
  1470 + {
  1471 + $td->notify("--- Running large file tests on small files ---");
  1472 + }
  1473 + my $size = ($large ? "large" : "small");
  1474 + my $file = $large ? "$large_file_test_path/a.pdf" : "a.pdf";
  1475 + $td->runtest("write test file",
  1476 + {$td->COMMAND => "test_large_file write $size $file"},
  1477 + {$td->FILE => "large_file.out", $td->EXIT_STATUS => 0},
  1478 + $td->NORMALIZE_NEWLINES);
  1479 + $td->runtest("read test file",
  1480 + {$td->COMMAND => "test_large_file read $size $file"},
  1481 + {$td->FILE => "large_file.out", $td->EXIT_STATUS => 0},
  1482 + $td->NORMALIZE_NEWLINES);
  1483 + $td->runtest("check",
  1484 + {$td->COMMAND => "qpdf --suppress-recovery --check $file",
  1485 + $td->FILTER => "grep -v checking"},
  1486 + {$td->FILE => "large_file-check-normal.out",
  1487 + $td->EXIT_STATUS => 0},
  1488 + $td->NORMALIZE_NEWLINES);
  1489 +
  1490 + for my $ostream (0, 1)
  1491 + {
  1492 + for my $linearize (0, 1)
  1493 + {
  1494 + if (($ostream == 0) && ($linearize == 0))
  1495 + {
  1496 + # Original file has no object streams and is not linearized.
  1497 + next;
  1498 + }
  1499 + my $args = "";
  1500 + my $omode = $ostream ? "generate" : "disable";
  1501 + my $lin = $linearize ? "--linearize" : "";
  1502 + my $newfile = "$file-new";
  1503 +
  1504 + $td->runtest("transform: ostream=$ostream, linearize=$linearize",
  1505 + {$td->COMMAND =>
  1506 + "qpdf --stream-data=preserve" .
  1507 + " --object-streams=$omode" .
  1508 + " $lin $file $newfile"},
  1509 + {$td->STRING => "", $td->EXIT_STATUS => 0});
  1510 + $td->runtest("read test file",
  1511 + {$td->COMMAND =>
  1512 + "test_large_file read $size $newfile"},
  1513 + {$td->FILE => "large_file.out", $td->EXIT_STATUS => 0},
  1514 + $td->NORMALIZE_NEWLINES);
  1515 + my $check_out =
  1516 + ($linearize
  1517 + ? ($ostream
  1518 + ? "large_file-check-ostream-linearized.out"
  1519 + : "large_file-check-linearized.out")
  1520 + : ($ostream
  1521 + ? "large_file-check-ostream.out"
  1522 + : "large_file-check-normal.out"));
  1523 + $td->runtest("check: ostream=$ostream, linearize=$linearize",
  1524 + {$td->COMMAND =>
  1525 + "qpdf --suppress-recovery --check $newfile",
  1526 + $td->FILTER => "grep -v checking"},
  1527 + {$td->FILE => $check_out, $td->EXIT_STATUS => 0},
  1528 + $td->NORMALIZE_NEWLINES);
  1529 + unlink $newfile;
  1530 + }
  1531 + }
  1532 +
  1533 + # Clobber xref
  1534 + open(F, "+<$file") or die;
  1535 + seek(F, -50, 2);
  1536 + my $pos = tell F;
  1537 + my $buf;
  1538 + read(F, $buf, 50);
  1539 + die unless $buf =~ m/^(.*startxref\n)\d+/s;
  1540 + $pos += length($1);
  1541 + seek(F, $pos, 0) or die;
  1542 + print F "oops" or die;
  1543 + close(F);
1450 1544  
  1545 + my $cmd = +{$td->COMMAND => "test_large_file read $size $file"};
  1546 + if ($large)
  1547 + {
  1548 + $cmd->{$td->FILTER} = "sed -e s,$large_file_test_path/,,";
  1549 + }
  1550 + $td->runtest("reconstruct xref table",
  1551 + $cmd,
  1552 + {$td->FILE => "large_file_xref_reconstruct.out",
  1553 + $td->EXIT_STATUS => 0},
  1554 + $td->NORMALIZE_NEWLINES);
  1555 + unlink $file;
  1556 +}
1451 1557 # ----------
  1558 +
1452 1559 cleanup();
1453 1560  
1454 1561 # See comments at beginning about calculation of number of tests. We
... ...
qpdf/qtest/qpdf/large_file-check-linearized.out 0 โ†’ 100644
  1 +PDF Version: 1.3
  2 +File is not encrypted
  3 +File is linearized
  4 +No syntax or stream encoding errors found; the file may still contain
  5 +errors that qpdf cannot detect
... ...
qpdf/qtest/qpdf/large_file-check-normal.out 0 โ†’ 100644
  1 +PDF Version: 1.3
  2 +File is not encrypted
  3 +File is not linearized
  4 +No syntax or stream encoding errors found; the file may still contain
  5 +errors that qpdf cannot detect
... ...
qpdf/qtest/qpdf/large_file-check-ostream-linearized.out 0 โ†’ 100644
  1 +PDF Version: 1.5
  2 +File is not encrypted
  3 +File is linearized
  4 +No syntax or stream encoding errors found; the file may still contain
  5 +errors that qpdf cannot detect
... ...
qpdf/qtest/qpdf/large_file-check-ostream.out 0 โ†’ 100644
  1 +PDF Version: 1.5
  2 +File is not encrypted
  3 +File is not linearized
  4 +No syntax or stream encoding errors found; the file may still contain
  5 +errors that qpdf cannot detect
... ...
qpdf/qtest/qpdf/large_file.out 0 โ†’ 100644
  1 +page 1 of 200
  2 +page 2 of 200
  3 +page 3 of 200
  4 +page 4 of 200
  5 +page 5 of 200
  6 +page 6 of 200
  7 +page 7 of 200
  8 +page 8 of 200
  9 +page 9 of 200
  10 +page 10 of 200
  11 +page 11 of 200
  12 +page 12 of 200
  13 +page 13 of 200
  14 +page 14 of 200
  15 +page 15 of 200
  16 +page 16 of 200
  17 +page 17 of 200
  18 +page 18 of 200
  19 +page 19 of 200
  20 +page 20 of 200
  21 +page 21 of 200
  22 +page 22 of 200
  23 +page 23 of 200
  24 +page 24 of 200
  25 +page 25 of 200
  26 +page 26 of 200
  27 +page 27 of 200
  28 +page 28 of 200
  29 +page 29 of 200
  30 +page 30 of 200
  31 +page 31 of 200
  32 +page 32 of 200
  33 +page 33 of 200
  34 +page 34 of 200
  35 +page 35 of 200
  36 +page 36 of 200
  37 +page 37 of 200
  38 +page 38 of 200
  39 +page 39 of 200
  40 +page 40 of 200
  41 +page 41 of 200
  42 +page 42 of 200
  43 +page 43 of 200
  44 +page 44 of 200
  45 +page 45 of 200
  46 +page 46 of 200
  47 +page 47 of 200
  48 +page 48 of 200
  49 +page 49 of 200
  50 +page 50 of 200
  51 +page 51 of 200
  52 +page 52 of 200
  53 +page 53 of 200
  54 +page 54 of 200
  55 +page 55 of 200
  56 +page 56 of 200
  57 +page 57 of 200
  58 +page 58 of 200
  59 +page 59 of 200
  60 +page 60 of 200
  61 +page 61 of 200
  62 +page 62 of 200
  63 +page 63 of 200
  64 +page 64 of 200
  65 +page 65 of 200
  66 +page 66 of 200
  67 +page 67 of 200
  68 +page 68 of 200
  69 +page 69 of 200
  70 +page 70 of 200
  71 +page 71 of 200
  72 +page 72 of 200
  73 +page 73 of 200
  74 +page 74 of 200
  75 +page 75 of 200
  76 +page 76 of 200
  77 +page 77 of 200
  78 +page 78 of 200
  79 +page 79 of 200
  80 +page 80 of 200
  81 +page 81 of 200
  82 +page 82 of 200
  83 +page 83 of 200
  84 +page 84 of 200
  85 +page 85 of 200
  86 +page 86 of 200
  87 +page 87 of 200
  88 +page 88 of 200
  89 +page 89 of 200
  90 +page 90 of 200
  91 +page 91 of 200
  92 +page 92 of 200
  93 +page 93 of 200
  94 +page 94 of 200
  95 +page 95 of 200
  96 +page 96 of 200
  97 +page 97 of 200
  98 +page 98 of 200
  99 +page 99 of 200
  100 +page 100 of 200
  101 +page 101 of 200
  102 +page 102 of 200
  103 +page 103 of 200
  104 +page 104 of 200
  105 +page 105 of 200
  106 +page 106 of 200
  107 +page 107 of 200
  108 +page 108 of 200
  109 +page 109 of 200
  110 +page 110 of 200
  111 +page 111 of 200
  112 +page 112 of 200
  113 +page 113 of 200
  114 +page 114 of 200
  115 +page 115 of 200
  116 +page 116 of 200
  117 +page 117 of 200
  118 +page 118 of 200
  119 +page 119 of 200
  120 +page 120 of 200
  121 +page 121 of 200
  122 +page 122 of 200
  123 +page 123 of 200
  124 +page 124 of 200
  125 +page 125 of 200
  126 +page 126 of 200
  127 +page 127 of 200
  128 +page 128 of 200
  129 +page 129 of 200
  130 +page 130 of 200
  131 +page 131 of 200
  132 +page 132 of 200
  133 +page 133 of 200
  134 +page 134 of 200
  135 +page 135 of 200
  136 +page 136 of 200
  137 +page 137 of 200
  138 +page 138 of 200
  139 +page 139 of 200
  140 +page 140 of 200
  141 +page 141 of 200
  142 +page 142 of 200
  143 +page 143 of 200
  144 +page 144 of 200
  145 +page 145 of 200
  146 +page 146 of 200
  147 +page 147 of 200
  148 +page 148 of 200
  149 +page 149 of 200
  150 +page 150 of 200
  151 +page 151 of 200
  152 +page 152 of 200
  153 +page 153 of 200
  154 +page 154 of 200
  155 +page 155 of 200
  156 +page 156 of 200
  157 +page 157 of 200
  158 +page 158 of 200
  159 +page 159 of 200
  160 +page 160 of 200
  161 +page 161 of 200
  162 +page 162 of 200
  163 +page 163 of 200
  164 +page 164 of 200
  165 +page 165 of 200
  166 +page 166 of 200
  167 +page 167 of 200
  168 +page 168 of 200
  169 +page 169 of 200
  170 +page 170 of 200
  171 +page 171 of 200
  172 +page 172 of 200
  173 +page 173 of 200
  174 +page 174 of 200
  175 +page 175 of 200
  176 +page 176 of 200
  177 +page 177 of 200
  178 +page 178 of 200
  179 +page 179 of 200
  180 +page 180 of 200
  181 +page 181 of 200
  182 +page 182 of 200
  183 +page 183 of 200
  184 +page 184 of 200
  185 +page 185 of 200
  186 +page 186 of 200
  187 +page 187 of 200
  188 +page 188 of 200
  189 +page 189 of 200
  190 +page 190 of 200
  191 +page 191 of 200
  192 +page 192 of 200
  193 +page 193 of 200
  194 +page 194 of 200
  195 +page 195 of 200
  196 +page 196 of 200
  197 +page 197 of 200
  198 +page 198 of 200
  199 +page 199 of 200
  200 +page 200 of 200
... ...
qpdf/qtest/qpdf/large_file_xref_reconstruct.out 0 โ†’ 100644
  1 +WARNING: a.pdf: file is damaged
  2 +WARNING: a.pdf: can't find startxref
  3 +WARNING: a.pdf: Attempting to reconstruct cross-reference table
  4 +page 1 of 200
  5 +page 2 of 200
  6 +page 3 of 200
  7 +page 4 of 200
  8 +page 5 of 200
  9 +page 6 of 200
  10 +page 7 of 200
  11 +page 8 of 200
  12 +page 9 of 200
  13 +page 10 of 200
  14 +page 11 of 200
  15 +page 12 of 200
  16 +page 13 of 200
  17 +page 14 of 200
  18 +page 15 of 200
  19 +page 16 of 200
  20 +page 17 of 200
  21 +page 18 of 200
  22 +page 19 of 200
  23 +page 20 of 200
  24 +page 21 of 200
  25 +page 22 of 200
  26 +page 23 of 200
  27 +page 24 of 200
  28 +page 25 of 200
  29 +page 26 of 200
  30 +page 27 of 200
  31 +page 28 of 200
  32 +page 29 of 200
  33 +page 30 of 200
  34 +page 31 of 200
  35 +page 32 of 200
  36 +page 33 of 200
  37 +page 34 of 200
  38 +page 35 of 200
  39 +page 36 of 200
  40 +page 37 of 200
  41 +page 38 of 200
  42 +page 39 of 200
  43 +page 40 of 200
  44 +page 41 of 200
  45 +page 42 of 200
  46 +page 43 of 200
  47 +page 44 of 200
  48 +page 45 of 200
  49 +page 46 of 200
  50 +page 47 of 200
  51 +page 48 of 200
  52 +page 49 of 200
  53 +page 50 of 200
  54 +page 51 of 200
  55 +page 52 of 200
  56 +page 53 of 200
  57 +page 54 of 200
  58 +page 55 of 200
  59 +page 56 of 200
  60 +page 57 of 200
  61 +page 58 of 200
  62 +page 59 of 200
  63 +page 60 of 200
  64 +page 61 of 200
  65 +page 62 of 200
  66 +page 63 of 200
  67 +page 64 of 200
  68 +page 65 of 200
  69 +page 66 of 200
  70 +page 67 of 200
  71 +page 68 of 200
  72 +page 69 of 200
  73 +page 70 of 200
  74 +page 71 of 200
  75 +page 72 of 200
  76 +page 73 of 200
  77 +page 74 of 200
  78 +page 75 of 200
  79 +page 76 of 200
  80 +page 77 of 200
  81 +page 78 of 200
  82 +page 79 of 200
  83 +page 80 of 200
  84 +page 81 of 200
  85 +page 82 of 200
  86 +page 83 of 200
  87 +page 84 of 200
  88 +page 85 of 200
  89 +page 86 of 200
  90 +page 87 of 200
  91 +page 88 of 200
  92 +page 89 of 200
  93 +page 90 of 200
  94 +page 91 of 200
  95 +page 92 of 200
  96 +page 93 of 200
  97 +page 94 of 200
  98 +page 95 of 200
  99 +page 96 of 200
  100 +page 97 of 200
  101 +page 98 of 200
  102 +page 99 of 200
  103 +page 100 of 200
  104 +page 101 of 200
  105 +page 102 of 200
  106 +page 103 of 200
  107 +page 104 of 200
  108 +page 105 of 200
  109 +page 106 of 200
  110 +page 107 of 200
  111 +page 108 of 200
  112 +page 109 of 200
  113 +page 110 of 200
  114 +page 111 of 200
  115 +page 112 of 200
  116 +page 113 of 200
  117 +page 114 of 200
  118 +page 115 of 200
  119 +page 116 of 200
  120 +page 117 of 200
  121 +page 118 of 200
  122 +page 119 of 200
  123 +page 120 of 200
  124 +page 121 of 200
  125 +page 122 of 200
  126 +page 123 of 200
  127 +page 124 of 200
  128 +page 125 of 200
  129 +page 126 of 200
  130 +page 127 of 200
  131 +page 128 of 200
  132 +page 129 of 200
  133 +page 130 of 200
  134 +page 131 of 200
  135 +page 132 of 200
  136 +page 133 of 200
  137 +page 134 of 200
  138 +page 135 of 200
  139 +page 136 of 200
  140 +page 137 of 200
  141 +page 138 of 200
  142 +page 139 of 200
  143 +page 140 of 200
  144 +page 141 of 200
  145 +page 142 of 200
  146 +page 143 of 200
  147 +page 144 of 200
  148 +page 145 of 200
  149 +page 146 of 200
  150 +page 147 of 200
  151 +page 148 of 200
  152 +page 149 of 200
  153 +page 150 of 200
  154 +page 151 of 200
  155 +page 152 of 200
  156 +page 153 of 200
  157 +page 154 of 200
  158 +page 155 of 200
  159 +page 156 of 200
  160 +page 157 of 200
  161 +page 158 of 200
  162 +page 159 of 200
  163 +page 160 of 200
  164 +page 161 of 200
  165 +page 162 of 200
  166 +page 163 of 200
  167 +page 164 of 200
  168 +page 165 of 200
  169 +page 166 of 200
  170 +page 167 of 200
  171 +page 168 of 200
  172 +page 169 of 200
  173 +page 170 of 200
  174 +page 171 of 200
  175 +page 172 of 200
  176 +page 173 of 200
  177 +page 174 of 200
  178 +page 175 of 200
  179 +page 176 of 200
  180 +page 177 of 200
  181 +page 178 of 200
  182 +page 179 of 200
  183 +page 180 of 200
  184 +page 181 of 200
  185 +page 182 of 200
  186 +page 183 of 200
  187 +page 184 of 200
  188 +page 185 of 200
  189 +page 186 of 200
  190 +page 187 of 200
  191 +page 188 of 200
  192 +page 189 of 200
  193 +page 190 of 200
  194 +page 191 of 200
  195 +page 192 of 200
  196 +page 193 of 200
  197 +page 194 of 200
  198 +page 195 of 200
  199 +page 196 of 200
  200 +page 197 of 200
  201 +page 198 of 200
  202 +page 199 of 200
  203 +page 200 of 200
... ...
qpdf/test_large_file.cc 0 โ†’ 100644
  1 +#include <qpdf/QPDF.hh>
  2 +#include <qpdf/QPDFWriter.hh>
  3 +#include <qpdf/QPDFObjectHandle.hh>
  4 +#include <qpdf/QUtil.hh>
  5 +#include <iostream>
  6 +#include <string.h>
  7 +#include <stdlib.h>
  8 +#include <assert.h>
  9 +
  10 +// Run "test_large_file write small a.pdf" to get a PDF file that you
  11 +// can look at in a reader.
  12 +
  13 +// This program reads and writes specially crafted files for testing
  14 +// large file support. In write mode, write a file of npages pages
  15 +// where each page contains unique text and a unique image. The image
  16 +// is a binary representation of the page number. The image contains
  17 +// horizontal stripes with light stripes representing 1, dark stripes
  18 +// representing 0, and the high bit on top. In read mode, read the
  19 +// file back checking to make sure all the image data and page
  20 +// contents are as expected.
  21 +
  22 +// Running this is small mode produces a small file that is easy to
  23 +// look at in any viewer. Since there is no question about proper
  24 +// functionality for small files, writing and reading the small file
  25 +// allows the qpdf library to test this test program. Writing and
  26 +// reading the large file then allows us to verify large file support
  27 +// with confidence.
  28 +
  29 +static char const* whoami = 0;
  30 +
  31 +// Height should be a multiple of 10
  32 +static int const nstripes = 10;
  33 +static int const stripesize_large = 500;
  34 +static int const stripesize_small = 5;
  35 +static int const npages = 200;
  36 +
  37 +// initialized in main
  38 +int stripesize = 0;
  39 +int width = 0;
  40 +int height = 0;
  41 +static unsigned char* buf = 0;
  42 +
  43 +static inline unsigned char get_pixel_color(int n, int row)
  44 +{
  45 + return (n & (1 << (nstripes - 1 - row))) ? '\xc0' : '\x40';
  46 +}
  47 +
  48 +class ImageChecker: public Pipeline
  49 +{
  50 + public:
  51 + ImageChecker(int n);
  52 + virtual ~ImageChecker();
  53 + virtual void write(unsigned char* data, size_t len);
  54 + virtual void finish();
  55 +
  56 + private:
  57 + int n;
  58 + size_t offset;
  59 + bool okay;
  60 +};
  61 +
  62 +ImageChecker::ImageChecker(int n) :
  63 + Pipeline("image checker", 0),
  64 + n(n),
  65 + offset(0),
  66 + okay(true)
  67 +{
  68 +}
  69 +
  70 +ImageChecker::~ImageChecker()
  71 +{
  72 +}
  73 +
  74 +void
  75 +ImageChecker::write(unsigned char* data, size_t len)
  76 +{
  77 + for (size_t i = 0; i < len; ++i)
  78 + {
  79 + int y = (this->offset + i) / width / stripesize;
  80 + unsigned char color = get_pixel_color(n, y);
  81 + if (data[i] != color)
  82 + {
  83 + okay = false;
  84 + }
  85 + }
  86 + this->offset += len;
  87 +}
  88 +
  89 +void
  90 +ImageChecker::finish()
  91 +{
  92 + if (! okay)
  93 + {
  94 + std::cout << "errors found checking image data for page " << n
  95 + << std::endl;
  96 + }
  97 +}
  98 +
  99 +class ImageProvider: public QPDFObjectHandle::StreamDataProvider
  100 +{
  101 + public:
  102 + ImageProvider(int n);
  103 + virtual ~ImageProvider();
  104 + virtual void provideStreamData(int objid, int generation,
  105 + Pipeline* pipeline);
  106 + size_t getLength() const;
  107 +
  108 + private:
  109 + int n;
  110 +};
  111 +
  112 +ImageProvider::ImageProvider(int n) :
  113 + n(n)
  114 +{
  115 +}
  116 +
  117 +ImageProvider::~ImageProvider()
  118 +{
  119 +}
  120 +
  121 +void
  122 +ImageProvider::provideStreamData(int objid, int generation,
  123 + Pipeline* pipeline)
  124 +{
  125 + if (buf == 0)
  126 + {
  127 + buf = new unsigned char[width * stripesize];
  128 + }
  129 + std::cout << "page " << n << " of " << npages << std::endl;
  130 + for (int y = 0; y < nstripes; ++y)
  131 + {
  132 + unsigned char color = get_pixel_color(n, y);
  133 + memset(buf, (int) color, width * stripesize);
  134 + pipeline->write(buf, width * stripesize);
  135 + }
  136 + pipeline->finish();
  137 +}
  138 +
  139 +size_t
  140 +ImageProvider::getLength() const
  141 +{
  142 + return width * height;
  143 +}
  144 +
  145 +void usage()
  146 +{
  147 + std::cerr << "Usage: " << whoami << " {read|write} {large|small} outfile"
  148 + << std::endl;
  149 + exit(2);
  150 +}
  151 +
  152 +static void set_parameters(bool large)
  153 +{
  154 + stripesize = large ? stripesize_large : stripesize_small;
  155 + height = nstripes * stripesize;
  156 + width = height;
  157 +}
  158 +
  159 +std::string generate_page_contents(int pageno)
  160 +{
  161 + std::string contents =
  162 + "BT /F1 24 Tf 72 720 Td (page " + QUtil::int_to_string(pageno) +
  163 + ") Tj ET\n"
  164 + "q 468 0 0 468 72 72 cm /Im1 Do Q\n";
  165 + return contents;
  166 +}
  167 +
  168 +static QPDFObjectHandle create_page_contents(QPDF& pdf, int pageno)
  169 +{
  170 + std::string contents = generate_page_contents(pageno);
  171 + PointerHolder<Buffer> b = new Buffer(contents.length());
  172 + unsigned char* bp = b->getBuffer();
  173 + memcpy(bp, (char*)contents.c_str(), contents.length());
  174 + return QPDFObjectHandle::newStream(&pdf, b);
  175 +}
  176 +
  177 +QPDFObjectHandle newName(std::string const& name)
  178 +{
  179 + return QPDFObjectHandle::newName(name);
  180 +}
  181 +
  182 +QPDFObjectHandle newInteger(int val)
  183 +{
  184 + return QPDFObjectHandle::newInteger(val);
  185 +}
  186 +
  187 +static void create_pdf(char const* filename)
  188 +{
  189 + QPDF pdf;
  190 +
  191 + pdf.emptyPDF();
  192 +
  193 + QPDFObjectHandle font = pdf.makeIndirectObject(
  194 + QPDFObjectHandle::newDictionary());
  195 + font.replaceKey("/Type", newName("/Font"));
  196 + font.replaceKey("/Subtype", newName("/Type1"));
  197 + font.replaceKey("/Name", newName("/F1"));
  198 + font.replaceKey("/BaseFont", newName("/Helvetica"));
  199 + font.replaceKey("/Encoding", newName("/WinAnsiEncoding"));
  200 +
  201 + QPDFObjectHandle procset =
  202 + pdf.makeIndirectObject(QPDFObjectHandle::newArray());
  203 + procset.appendItem(newName("/PDF"));
  204 + procset.appendItem(newName("/Text"));
  205 + procset.appendItem(newName("/ImageC"));
  206 +
  207 + QPDFObjectHandle rfont = QPDFObjectHandle::newDictionary();
  208 + rfont.replaceKey("/F1", font);
  209 +
  210 + QPDFObjectHandle mediabox = QPDFObjectHandle::newArray();
  211 + mediabox.appendItem(newInteger(0));
  212 + mediabox.appendItem(newInteger(0));
  213 + mediabox.appendItem(newInteger(612));
  214 + mediabox.appendItem(newInteger(792));
  215 +
  216 + for (int pageno = 1; pageno <= npages; ++pageno)
  217 + {
  218 + QPDFObjectHandle image = QPDFObjectHandle::newStream(&pdf);
  219 + QPDFObjectHandle image_dict = image.getDict();
  220 + image_dict.replaceKey("/Type", newName("/XObject"));
  221 + image_dict.replaceKey("/Subtype", newName("/Image"));
  222 + image_dict.replaceKey("/ColorSpace", newName("/DeviceGray"));
  223 + image_dict.replaceKey("/BitsPerComponent", newInteger(8));
  224 + image_dict.replaceKey("/Width", newInteger(width));
  225 + image_dict.replaceKey("/Height", newInteger(height));
  226 + ImageProvider* p = new ImageProvider(pageno);
  227 + PointerHolder<QPDFObjectHandle::StreamDataProvider> provider(p);
  228 + image.replaceStreamData(provider,
  229 + QPDFObjectHandle::newNull(),
  230 + QPDFObjectHandle::newNull(),
  231 + p->getLength());
  232 +
  233 + QPDFObjectHandle xobject = QPDFObjectHandle::newDictionary();
  234 + xobject.replaceKey("/Im1", image);
  235 +
  236 + QPDFObjectHandle resources = QPDFObjectHandle::newDictionary();
  237 + resources.replaceKey("/ProcSet", procset);
  238 + resources.replaceKey("/Font", rfont);
  239 + resources.replaceKey("/XObject", xobject);
  240 +
  241 + QPDFObjectHandle contents = create_page_contents(pdf, pageno);
  242 +
  243 + QPDFObjectHandle page = pdf.makeIndirectObject(
  244 + QPDFObjectHandle::newDictionary());
  245 + page.replaceKey("/Type", newName("/Page"));
  246 + page.replaceKey("/MediaBox", mediabox);
  247 + page.replaceKey("/Contents", contents);
  248 + page.replaceKey("/Resources", resources);
  249 +
  250 + pdf.addPage(page, false);
  251 + }
  252 +
  253 + QPDFWriter w(pdf, filename);
  254 + w.setStaticID(true); // for testing only
  255 + w.setStreamDataMode(qpdf_s_preserve);
  256 + w.setObjectStreamMode(qpdf_o_disable);
  257 + w.write();
  258 +}
  259 +
  260 +static void check_page_contents(int pageno, QPDFObjectHandle page)
  261 +{
  262 + PointerHolder<Buffer> buf =
  263 + page.getKey("/Contents").getStreamData();
  264 + std::string actual_contents =
  265 + std::string((char *)(buf->getBuffer()), buf->getSize());
  266 + std::string expected_contents = generate_page_contents(pageno);
  267 + if (expected_contents != actual_contents)
  268 + {
  269 + std::cout << "page contents wrong for page " << pageno << std::endl
  270 + << "ACTUAL: " << actual_contents
  271 + << "EXPECTED: " << expected_contents
  272 + << "----\n";
  273 + }
  274 +}
  275 +
  276 +static void check_image(int pageno, QPDFObjectHandle page)
  277 +{
  278 + QPDFObjectHandle image =
  279 + page.getKey("/Resources").getKey("/XObject").getKey("/Im1");
  280 + ImageChecker ic(pageno);
  281 + image.pipeStreamData(&ic, true, false, false);
  282 +}
  283 +
  284 +static void check_pdf(char const* filename)
  285 +{
  286 + QPDF pdf;
  287 + pdf.processFile(filename);
  288 + std::vector<QPDFObjectHandle> const& pages = pdf.getAllPages();
  289 + assert(pages.size() == (size_t)npages);
  290 + for (int i = 0; i < npages; ++i)
  291 + {
  292 + int pageno = i + 1;
  293 + std::cout << "page " << pageno << " of " << npages << std::endl;
  294 + check_page_contents(pageno, pages[i]);
  295 + check_image(pageno, pages[i]);
  296 + }
  297 +}
  298 +
  299 +int main(int argc, char* argv[])
  300 +{
  301 + whoami = QUtil::getWhoami(argv[0]);
  302 + QUtil::setLineBuf(stdout);
  303 +
  304 + // For libtool's sake....
  305 + if (strncmp(whoami, "lt-", 3) == 0)
  306 + {
  307 + whoami += 3;
  308 + }
  309 + if (argc != 4)
  310 + {
  311 + usage();
  312 + }
  313 + char const* operation = argv[1];
  314 + char const* size = argv[2];
  315 + char const* filename = argv[3];
  316 +
  317 + bool op_write = false;
  318 + bool size_large = false;
  319 +
  320 + if (strcmp(operation, "write") == 0)
  321 + {
  322 + op_write = true;
  323 + }
  324 + else if (strcmp(operation, "read") == 0)
  325 + {
  326 + op_write = false;
  327 + }
  328 + else
  329 + {
  330 + usage();
  331 + }
  332 +
  333 + if (strcmp(size, "large") == 0)
  334 + {
  335 + size_large = true;
  336 + }
  337 + else if (strcmp(size, "small") == 0)
  338 + {
  339 + size_large = false;
  340 + }
  341 + else
  342 + {
  343 + usage();
  344 + }
  345 +
  346 + set_parameters(size_large);
  347 +
  348 + try
  349 + {
  350 + if (op_write)
  351 + {
  352 + create_pdf(filename);
  353 + }
  354 + else
  355 + {
  356 + check_pdf(filename);
  357 + }
  358 + }
  359 + catch (std::exception& e)
  360 + {
  361 + std::cerr << e.what() << std::endl;
  362 + exit(2);
  363 + }
  364 +
  365 + delete [] buf;
  366 +
  367 + return 0;
  368 +}
... ...