Commit 0b2e9cb16886d422e6645a5d65c74a340ae6deff

Authored by Jay Berkenbilt
1 parent 9f8aba1d

Example: fast split into single pages

This is faster than using qpdf --pages to do it.
ChangeLog
1 1 2014-06-07 Jay Berkenbilt <ejb@ql.org>
2 2  
  3 + * New example program: pdf-split-pages: efficiently split PDF
  4 + files into individual pages.
  5 +
3 6 * Bug fix: don't fail on files that contain streams where /Filter
4 7 or /DecodeParms references a stream. Before, qpdf would try to
5 8 convert these to direct objects, which would fail because of the
... ...
examples/build.mk
... ... @@ -5,7 +5,8 @@ BINS_examples = \
5 5 pdf-double-page-size \
6 6 pdf-invert-images \
7 7 pdf-create \
8   - pdf-parse-content
  8 + pdf-parse-content \
  9 + pdf-split-pages
9 10 CBINS_examples = pdf-linearize
10 11  
11 12 TARGETS_examples = $(foreach B,$(BINS_examples) $(CBINS_examples),examples/$(OUTPUT_DIR)/$(call binname,$(B)))
... ...
examples/pdf-split-pages.cc 0 → 100644
  1 +//
  2 +// This is a stand-alone example of splitting a PDF into individual
  3 +// pages. It is much faster than using the qpdf command-line tool to
  4 +// split into separate files per page.
  5 +//
  6 +
  7 +#include <qpdf/QPDF.hh>
  8 +#include <qpdf/QPDFWriter.hh>
  9 +#include <qpdf/QUtil.hh>
  10 +#include <string>
  11 +#include <iostream>
  12 +#include <cstdlib>
  13 +
  14 +static bool static_id = false;
  15 +
  16 +static void process(char const* whoami,
  17 + char const* infile,
  18 + std::string outprefix)
  19 +{
  20 + QPDF inpdf;
  21 + inpdf.processFile(infile);
  22 + std::vector<QPDFObjectHandle> const& pages = inpdf.getAllPages();
  23 + int pageno_len = QUtil::int_to_string(pages.size()).length();
  24 + int pageno = 0;
  25 + for (std::vector<QPDFObjectHandle>::const_iterator iter = pages.begin();
  26 + iter != pages.end(); ++iter)
  27 + {
  28 + QPDFObjectHandle page = *iter;
  29 + std::string outfile =
  30 + outprefix + QUtil::int_to_string(++pageno, pageno_len) + ".pdf";
  31 + QPDF outpdf;
  32 + outpdf.emptyPDF();
  33 + outpdf.addPage(page, false);
  34 + QPDFWriter outpdfw(outpdf, outfile.c_str());
  35 + if (static_id)
  36 + {
  37 + // For the test suite, uncompress streams and use static
  38 + // IDs.
  39 + outpdfw.setStaticID(true);
  40 + outpdfw.setStreamDataMode(qpdf_s_uncompress);
  41 + }
  42 + outpdfw.write();
  43 + }
  44 +}
  45 +
  46 +int main(int argc, char* argv[])
  47 +{
  48 + char* whoami = QUtil::getWhoami(argv[0]);
  49 +
  50 + // For libtool's sake....
  51 + if (strncmp(whoami, "lt-", 3) == 0)
  52 + {
  53 + whoami += 3;
  54 + }
  55 + // For test suite
  56 + if ((argc > 1) && (strcmp(argv[1], " --static-id") == 0))
  57 + {
  58 + static_id = true;
  59 + --argc;
  60 + ++argv;
  61 + }
  62 +
  63 + if (argc != 3)
  64 + {
  65 + std::cerr << "Usage: " << whoami << " infile outprefix" << std::endl;
  66 + }
  67 + try
  68 + {
  69 + process(whoami, argv[1], argv[2]);
  70 + }
  71 + catch (std::exception e)
  72 + {
  73 + std::cerr << whoami << ": exception: " << e.what() << std::endl;
  74 + return 2;
  75 + }
  76 + return 0;
  77 +}
... ...
examples/qtest/pdf-split-pages.test 0 → 100644
  1 +#!/usr/bin/env perl
  2 +require 5.008;
  3 +use warnings;
  4 +use strict;
  5 +
  6 +chdir("pdf-split-pages");
  7 +
  8 +require TestDriver;
  9 +
  10 +my $td = new TestDriver('pdf-split-pages');
  11 +
  12 +cleanup();
  13 +
  14 +$td->runtest("split",
  15 + {$td->COMMAND => "pdf-split-pages ' --static-id' in.pdf out"},
  16 + {$td->STRING => "", $td->EXIT_STATUS => 0});
  17 +
  18 +$td->runtest("check page 1",
  19 + {$td->FILE => "out1.pdf"},
  20 + {$td->FILE => "exp1.pdf"});
  21 +
  22 +$td->runtest("check page 2",
  23 + {$td->FILE => "out2.pdf"},
  24 + {$td->FILE => "exp2.pdf"});
  25 +
  26 +cleanup();
  27 +
  28 +$td->report(3);
  29 +
  30 +sub cleanup
  31 +{
  32 + unlink (<out?.pdf>);
  33 +}
... ...
examples/qtest/pdf-split-pages/exp1.pdf 0 → 100644
  1 +%PDF-1.3
  2 +%¿÷¢þ
  3 +1 0 obj
  4 +<< /Pages 2 0 R /Type /Catalog >>
  5 +endobj
  6 +2 0 obj
  7 +<< /Count 1 /Kids [ 3 0 R ] /Type /Pages >>
  8 +endobj
  9 +3 0 obj
  10 +<< /Contents 4 0 R /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 5 0 R >> /ProcSet 6 0 R >> /Type /Page >>
  11 +endobj
  12 +4 0 obj
  13 +<< /Length 44 >>
  14 +stream
  15 +BT
  16 + /F1 24 Tf
  17 + 72 720 Td
  18 + (Page 1) Tj
  19 +ET
  20 +endstream
  21 +endobj
  22 +5 0 obj
  23 +<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >>
  24 +endobj
  25 +6 0 obj
  26 +[ /PDF /Text ]
  27 +endobj
  28 +xref
  29 +0 7
  30 +0000000000 65535 f
  31 +0000000015 00000 n
  32 +0000000064 00000 n
  33 +0000000123 00000 n
  34 +0000000266 00000 n
  35 +0000000359 00000 n
  36 +0000000466 00000 n
  37 +trailer << /Root 1 0 R /Size 7 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] >>
  38 +startxref
  39 +496
  40 +%%EOF
... ...
examples/qtest/pdf-split-pages/exp2.pdf 0 → 100644
  1 +%PDF-1.3
  2 +%¿÷¢þ
  3 +1 0 obj
  4 +<< /Pages 2 0 R /Type /Catalog >>
  5 +endobj
  6 +2 0 obj
  7 +<< /Count 1 /Kids [ 3 0 R ] /Type /Pages >>
  8 +endobj
  9 +3 0 obj
  10 +<< /Contents 4 0 R /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 5 0 R >> /ProcSet 6 0 R >> /Type /Page >>
  11 +endobj
  12 +4 0 obj
  13 +<< /Length 44 >>
  14 +stream
  15 +BT
  16 + /F1 24 Tf
  17 + 72 720 Td
  18 + (Page 2) Tj
  19 +ET
  20 +endstream
  21 +endobj
  22 +5 0 obj
  23 +<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >>
  24 +endobj
  25 +6 0 obj
  26 +[ /PDF /Text ]
  27 +endobj
  28 +xref
  29 +0 7
  30 +0000000000 65535 f
  31 +0000000015 00000 n
  32 +0000000064 00000 n
  33 +0000000123 00000 n
  34 +0000000266 00000 n
  35 +0000000359 00000 n
  36 +0000000466 00000 n
  37 +trailer << /Root 1 0 R /Size 7 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] >>
  38 +startxref
  39 +496
  40 +%%EOF
... ...
examples/qtest/pdf-split-pages/in.pdf 0 → 100644
No preview for this file type