Commit a0768e419064b66ea6eb3e06a4398806b24311e8

Authored by Jay Berkenbilt
1 parent 9eb8c915

Add QPDF::emptyPDF() and pdf_from_scratch test code

ChangeLog
1 1 2012-06-21 Jay Berkenbilt <ejb@ql.org>
2 2  
  3 + * Add QPDF::emptyPDF() to create an empty QPDF object suitable for
  4 + adding pages and other objects to. pdf_from_scratch.cc is test
  5 + code that exercises it.
  6 +
3 7 * make/libtool.mk: Place user-specified CPPFLAGS and LDFLAGS later
4 8 in the compilation so that if a user installs things in a
5 9 non-standard place that they have to tell the build about, earlier
... ...
... ... @@ -13,6 +13,35 @@ Next
13 13 - update README-windows.txt docs to indicate that MSVC 2010 is the
14 14 supported version and to update the information about mingw.
15 15  
  16 + * Testing for files > 4GB
  17 +
  18 + - Create a PDF from scratch. Each page has a page number as text
  19 + and an image. The image can be 5000x5000 pixels using 8-bit
  20 + gray scale. It will be divided into 10 stripes of 500 pixels
  21 + each. The left and right 500 pixels of each stripe will
  22 + alternate black and white. The remaining part of the image will
  23 + have white stripes indicating 1 and black stripes indicating 0
  24 + with the most-significant bit on top to indicate the page
  25 + number. In this way, every page will be unique and will consume
  26 + approximately 25 megabytes. Creating 200 pages like this will
  27 + make a file that is 5 GB.
  28 +
  29 + - The file will have to have object streams since a regular xref
  30 + table won't be able to support offsets that large.
  31 +
  32 + - A separate test program can create this file and do various
  33 + manipulations on it. This can be enabled with an environment
  34 + variable controlled by configure in much the same way image
  35 + comparison tests are enabled now. The argument to
  36 + --enable-large-file-test should be a path that has enough disk
  37 + space to do the tests, probably enough space for two coipes of
  38 + the file. The test program should also have an interactive mode
  39 + so we can generate the large file and then look at it with a
  40 + PDF viewer like Adobe Reader.
  41 +
  42 + * Consider adding an example that uses the page APIs, or update the
  43 + documentation to refer the user to the test suite.
  44 +
16 45 Soon
17 46 ====
18 47  
... ... @@ -24,8 +53,6 @@ Soon
24 53 * See if I can support the new encryption formats mentioned in the
25 54 open bug on sourceforge. Check other sourceforge bugs.
26 55  
27   - * Would be nice to confirm that it's working for > 4GB files.
28   -
29 56 * Splitting/merging concepts
30 57  
31 58 newPDF() could create a PDF with just a trailer, no pages, and a
... ...
include/qpdf/QPDF.hh
... ... @@ -69,6 +69,16 @@ class QPDF
69 69 char const* buf, size_t length,
70 70 char const* password = 0);
71 71  
  72 + // Create a QPDF object for an empty PDF. This PDF has no pages
  73 + // or objects other than a minimal trailer, a document catalog,
  74 + // and a /Pages tree containing zero pages. Pages and other
  75 + // objects can be added to the file in the normal way, and the
  76 + // trailer and document catalog can be mutated. Calling this
  77 + // method is equivalent to calling processFile on an equivalent
  78 + // PDF file.
  79 + QPDF_DLL
  80 + void emptyPDF();
  81 +
72 82 // Parameter settings
73 83  
74 84 // By default, warning messages are issued to std::cerr and output
... ...
libqpdf/QPDF.cc
... ... @@ -17,6 +17,24 @@
17 17  
18 18 std::string QPDF::qpdf_version = "2.3.1";
19 19  
  20 +static char const* EMPTY_PDF =
  21 + "%PDF-1.3\n"
  22 + "1 0 obj\n"
  23 + "<< /Type /Catalog /Pages 2 0 R >>\n"
  24 + "endobj\n"
  25 + "2 0 obj\n"
  26 + "<< /Type /Pages /Kids [] /Count 0 >>\n"
  27 + "endobj\n"
  28 + "xref\n"
  29 + "0 3\n"
  30 + "0000000000 65535 f \n"
  31 + "0000000009 00000 n \n"
  32 + "0000000058 00000 n \n"
  33 + "trailer << /Size 3 /Root 1 0 R >>\n"
  34 + "startxref\n"
  35 + "110\n"
  36 + "%%EOF\n";
  37 +
20 38 void
21 39 QPDF::InputSource::setLastOffset(qpdf_offset_t offset)
22 40 {
... ... @@ -350,6 +368,12 @@ QPDF::processMemoryFile(char const* description,
350 368 }
351 369  
352 370 void
  371 +QPDF::emptyPDF()
  372 +{
  373 + processMemoryFile("empty file", EMPTY_PDF, strlen(EMPTY_PDF));
  374 +}
  375 +
  376 +void
353 377 QPDF::setIgnoreXRefStreams(bool val)
354 378 {
355 379 this->ignore_xref_streams = val;
... ...
libqpdf/QPDF_optimization.cc
... ... @@ -365,7 +365,7 @@ QPDF::optimizePagesTreeInternal(
365 365 throw QPDFExc(qpdf_e_damaged_pdf, this->file->getName(),
366 366 this->last_object_description,
367 367 this->file->getLastOffset(),
368   - "invalid Type in page tree");
  368 + "invalid Type " + type + " in page tree");
369 369 }
370 370 }
371 371  
... ...
libqpdf/QPDF_pages.cc
... ... @@ -73,7 +73,7 @@ QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages,
73 73 throw QPDFExc(qpdf_e_damaged_pdf, this->file->getName(),
74 74 this->last_object_description,
75 75 this->file->getLastOffset(),
76   - ": invalid Type in page tree");
  76 + "invalid Type " + type + " in page tree");
77 77 }
78 78 }
79 79  
... ...
qpdf/build.mk
1   -BINS_qpdf = qpdf test_driver
  1 +BINS_qpdf = qpdf test_driver pdf_from_scratch
2 2 CBINS_qpdf = qpdf-ctest
3 3  
4 4 TARGETS_qpdf = $(foreach B,$(BINS_qpdf) $(CBINS_qpdf),qpdf/$(OUTPUT_DIR)/$(call binname,$(B)))
... ...
qpdf/pdf_from_scratch.cc 0 → 100644
  1 +#include <qpdf/QPDF.hh>
  2 +
  3 +#include <qpdf/QUtil.hh>
  4 +#include <qpdf/QTC.hh>
  5 +#include <qpdf/QPDFWriter.hh>
  6 +#include <qpdf/QPDFObjectHandle.hh>
  7 +#include <iostream>
  8 +#include <stdio.h>
  9 +#include <string.h>
  10 +#include <stdlib.h>
  11 +#include <assert.h>
  12 +
  13 +static char const* whoami = 0;
  14 +
  15 +void usage()
  16 +{
  17 + std::cerr << "Usage: " << whoami << " n" << std::endl;
  18 + exit(2);
  19 +}
  20 +
  21 +static QPDFObjectHandle createPageContents(QPDF& pdf, std::string const& text)
  22 +{
  23 + std::string contents = "BT /F1 15 Tf 72 720 Td (" + text + ") Tj ET\n";
  24 + PointerHolder<Buffer> b = new Buffer(contents.length());
  25 + unsigned char* bp = b->getBuffer();
  26 + memcpy(bp, (char*)contents.c_str(), contents.length());
  27 + return QPDFObjectHandle::newStream(&pdf, b);
  28 +}
  29 +
  30 +QPDFObjectHandle newName(std::string const& name)
  31 +{
  32 + return QPDFObjectHandle::newName(name);
  33 +}
  34 +
  35 +void runtest(int n)
  36 +{
  37 + QPDF pdf;
  38 + pdf.emptyPDF();
  39 + if (n == 0)
  40 + {
  41 + // Create a minimal PDF from scratch.
  42 +
  43 + std::map<std::string, QPDFObjectHandle> keys;
  44 + std::vector<QPDFObjectHandle> items;
  45 +
  46 + keys.clear();
  47 + keys["/Type"] = newName("/Font");
  48 + keys["/Subtype"] = newName("/Type1");
  49 + keys["/Name"] = newName("/F1");
  50 + keys["/BaseFont"] = newName("/Helvetica");
  51 + keys["/Encoding"] = newName("/WinAnsiEncoding");
  52 + QPDFObjectHandle font = pdf.makeIndirectObject(
  53 + QPDFObjectHandle::newDictionary(keys));
  54 +
  55 + items.clear();
  56 + items.push_back(newName("/PDF"));
  57 + items.push_back(newName("/Text"));
  58 + QPDFObjectHandle procset = pdf.makeIndirectObject(
  59 + QPDFObjectHandle::newArray(items));
  60 +
  61 + QPDFObjectHandle contents = createPageContents(pdf, "First Page");
  62 +
  63 + items.clear();
  64 + items.push_back(QPDFObjectHandle::newInteger(0));
  65 + items.push_back(QPDFObjectHandle::newInteger(0));
  66 + items.push_back(QPDFObjectHandle::newInteger(612));
  67 + items.push_back(QPDFObjectHandle::newInteger(792));
  68 + QPDFObjectHandle mediabox = QPDFObjectHandle::newArray(items);
  69 +
  70 + keys.clear();
  71 + keys["/F1"] = font;
  72 + QPDFObjectHandle rfont = QPDFObjectHandle::newDictionary(keys);
  73 +
  74 + keys.clear();
  75 + keys["/ProcSet"] = procset;
  76 + keys["/Font"] = rfont;
  77 + QPDFObjectHandle resources = QPDFObjectHandle::newDictionary(keys);
  78 +
  79 + keys.clear();
  80 + keys["/Type"] = newName("/Page");
  81 + keys["/MediaBox"] = mediabox;
  82 + keys["/Contents"] = contents;
  83 + keys["/Resources"] = resources;
  84 + QPDFObjectHandle page = pdf.makeIndirectObject(
  85 + QPDFObjectHandle::newDictionary(keys));
  86 +
  87 + pdf.addPage(page, true);
  88 +
  89 + QPDFWriter w(pdf, "a.pdf");
  90 + w.setStaticID(true);
  91 + w.setStreamDataMode(qpdf_s_preserve);
  92 + w.write();
  93 + }
  94 + else
  95 + {
  96 + throw std::runtime_error(std::string("invalid test ") +
  97 + QUtil::int_to_string(n));
  98 + }
  99 +
  100 + std::cout << "test " << n << " done" << std::endl;
  101 +}
  102 +
  103 +int main(int argc, char* argv[])
  104 +{
  105 + QUtil::setLineBuf(stdout);
  106 + if ((whoami = strrchr(argv[0], '/')) == NULL)
  107 + {
  108 + whoami = argv[0];
  109 + }
  110 + else
  111 + {
  112 + ++whoami;
  113 + }
  114 + // For libtool's sake....
  115 + if (strncmp(whoami, "lt-", 3) == 0)
  116 + {
  117 + whoami += 3;
  118 + }
  119 +
  120 + if (argc != 2)
  121 + {
  122 + usage();
  123 + }
  124 +
  125 + try
  126 + {
  127 + int n = atoi(argv[1]);
  128 + runtest(n);
  129 + }
  130 + catch (std::exception& e)
  131 + {
  132 + std::cerr << e.what() << std::endl;
  133 + exit(2);
  134 + }
  135 +
  136 + return 0;
  137 +}
... ...
qpdf/qtest/qpdf.test
... ... @@ -352,6 +352,17 @@ $td-&gt;runtest(&quot;shallow copy a stream&quot;,
352 352  
353 353 show_ntests();
354 354 # ----------
  355 +$td->notify("--- PDF From Scratch ---");
  356 +$n_tests += 2;
  357 +
  358 +$td->runtest("basic qpdf from scratch",
  359 + {$td->COMMAND => "pdf_from_scratch 0"},
  360 + {$td->STRING => "test 0 done\n", $td->EXIT_STATUS => 0},
  361 + $td->NORMALIZE_NEWLINES);
  362 +$td->runtest("check output",
  363 + {$td->FILE => "a.pdf"},
  364 + {$td->FILE => "from-scratch-0.pdf"});
  365 +# ----------
355 366 $td->notify("--- Error Condition Tests ---");
356 367 # $n_tests incremented after initialization of badfiles below.
357 368  
... ...
qpdf/qtest/qpdf/from-scratch-0.pdf 0 → 100644
  1 +%PDF-1.3
  2 +%¿÷¢þ
  3 +1 0 obj
  4 +<< /Pages 2 0 R /Type /Catalog >>
  5 +endobj
  6 +2 0 obj
  7 +<< /Count 1 /Kids [ 3 0 R ] /Type /Pages >>
  8 +endobj
  9 +3 0 obj
  10 +<< /Contents 4 0 R /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 5 0 R >> /ProcSet 6 0 R >> /Type /Page >>
  11 +endobj
  12 +4 0 obj
  13 +<< /Length 42 >>
  14 +stream
  15 +BT /F1 15 Tf 72 720 Td (First Page) Tj ET
  16 +endstream
  17 +endobj
  18 +5 0 obj
  19 +<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >>
  20 +endobj
  21 +6 0 obj
  22 +[ /PDF /Text ]
  23 +endobj
  24 +xref
  25 +0 7
  26 +0000000000 65535 f
  27 +0000000015 00000 n
  28 +0000000064 00000 n
  29 +0000000123 00000 n
  30 +0000000266 00000 n
  31 +0000000357 00000 n
  32 +0000000464 00000 n
  33 +trailer << /Root 1 0 R /Size 7 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] >>
  34 +startxref
  35 +494
  36 +%%EOF
... ...