Commit 49621ef5a825fc0a600284fa2c33775b330e2007

Authored by Jay Berkenbilt
1 parent db5d03d8

Add qpdf-test-compare for comparing PDFs with different zlib

CMakeLists.txt
... ... @@ -341,6 +341,7 @@ add_test(
341 341 # add_subdirectory order affects test order
342 342 add_subdirectory(include)
343 343 add_subdirectory(libqpdf)
  344 +add_subdirectory(compare-for-test)
344 345 add_subdirectory(qpdf)
345 346 add_subdirectory(libtests)
346 347 add_subdirectory(examples)
... ...
compare-for-test/CMakeLists.txt 0 → 100644
  1 +# This directory is called compare-for-test rather than
  2 +# qpdf-test-compare to make shell completion easier.
  3 +add_executable(qpdf-test-compare qpdf-test-compare.cc)
  4 +target_link_libraries(qpdf-test-compare libqpdf)
  5 +
  6 +add_test(
  7 + NAME compare-for-test
  8 + COMMAND ${RUN_QTEST}
  9 + --top ${qpdf_SOURCE_DIR}
  10 + --bin $<TARGET_FILE_DIR:qpdf-test-compare>
  11 + --bin $<TARGET_FILE_DIR:libqpdf> # for Windows to find DLL
  12 + --code ${qpdf_SOURCE_DIR}/compare-for-test
  13 + --color ${QTEST_COLOR}
  14 + --show-on-failure ${SHOW_FAILED_TEST_OUTPUT}
  15 + --tc "${qpdf_SOURCE_DIR}/compare-for-test/*.cc")
... ...
compare-for-test/compare.testcov 0 → 100644
  1 +objects with different type 0
  2 +different stream dictionaries 0
  3 +uncompressing 0
  4 +not uncompressing 0
  5 +differing data size 1
  6 +different data 1
  7 +different non-stream 0
  8 +different trailer 0
  9 +ignore data for xref stream 0
... ...
compare-for-test/qpdf-test-compare.cc 0 → 100644
  1 +#include <qpdf/Pl_StdioFile.hh>
  2 +#include <qpdf/QPDF.hh>
  3 +#include <qpdf/QTC.hh>
  4 +#include <qpdf/QUtil.hh>
  5 +
  6 +#include <cstdio>
  7 +#include <cstdlib>
  8 +#include <cstring>
  9 +#include <iostream>
  10 +
  11 +static char const* whoami = nullptr;
  12 +
  13 +void
  14 +usage()
  15 +{
  16 + std::cerr << "Usage: " << whoami << " actual expected" << std::endl
  17 + << R"(Where "actual" is the actual output and "expected" is the expected)"
  18 + << std::endl
  19 + << "output of a test, compare the two PDF files. The files are considered"
  20 + << std::endl
  21 + << "to match if all their objects are identical except that, if a stream is"
  22 + << std::endl
  23 + << "compressed with FlateDecode, the uncompressed data must match." << std::endl
  24 + << std::endl
  25 + << "If the files match, the output is the expected file. Otherwise, it is"
  26 + << std::endl
  27 + << "the actual file. Read comments in the test suite for rationale." << std::endl;
  28 + exit(2);
  29 +}
  30 +
  31 +void
  32 +cleanEncryption(QPDF& q)
  33 +{
  34 + auto enc = q.getTrailer().getKey("/Encrypt");
  35 + if (!enc.isDictionary()) {
  36 + return;
  37 + }
  38 + enc.removeKey("/O");
  39 + enc.removeKey("/OE");
  40 + enc.removeKey("/U");
  41 + enc.removeKey("/UE");
  42 + enc.removeKey("/Perms");
  43 +}
  44 +
  45 +std::string
  46 +compareObjects(std::string const& label, QPDFObjectHandle act, QPDFObjectHandle exp)
  47 +{
  48 + if (act.getTypeCode() != exp.getTypeCode()) {
  49 + QTC::TC("compare", "objects with different type");
  50 + return label + ": different types";
  51 + }
  52 + if (act.isStream()) {
  53 + auto act_dict = act.getDict();
  54 + auto exp_dict = exp.getDict();
  55 + act_dict.removeKey("/Length");
  56 + exp_dict.removeKey("/Length");
  57 + if (act_dict.unparse() != exp_dict.unparse()) {
  58 + QTC::TC("compare", "different stream dictionaries");
  59 + return label + ": stream dictionaries differ";
  60 + }
  61 + if (act_dict.getKey("/Type").isNameAndEquals("/XRef")) {
  62 + QTC::TC("compare", "ignore data for xref stream");
  63 + return "";
  64 + }
  65 + auto act_filters = act_dict.getKey("/Filter");
  66 + bool uncompress = false;
  67 + if (act_filters.isName()) {
  68 + act_filters = act_filters.wrapInArray();
  69 + }
  70 + if (act_filters.isArray()) {
  71 + for (auto& filter: act_filters.aitems()) {
  72 + if (filter.isNameAndEquals("/FlateDecode")) {
  73 + uncompress = true;
  74 + break;
  75 + }
  76 + }
  77 + }
  78 + std::shared_ptr<Buffer> act_data;
  79 + std::shared_ptr<Buffer> exp_data;
  80 + if (uncompress) {
  81 + QTC::TC("compare", "uncompressing");
  82 + act_data = act.getStreamData();
  83 + exp_data = exp.getStreamData();
  84 + } else {
  85 + QTC::TC("compare", "not uncompressing");
  86 + act_data = act.getRawStreamData();
  87 + exp_data = exp.getRawStreamData();
  88 + }
  89 + if (act_data->getSize() != exp_data->getSize()) {
  90 + QTC::TC("compare", "differing data size", uncompress ? 0 : 1);
  91 + return label + ": stream data size differs";
  92 + }
  93 + auto act_buf = act_data->getBuffer();
  94 + auto exp_buf = exp_data->getBuffer();
  95 + if (memcmp(act_buf, exp_buf, act_data->getSize()) != 0) {
  96 + QTC::TC("compare", "different data", uncompress ? 0 : 1);
  97 + return label + ": stream data differs";
  98 + }
  99 + } else if (act.unparseResolved() != exp.unparseResolved()) {
  100 + QTC::TC("compare", "different non-stream");
  101 + return label + ": object contents differ";
  102 + }
  103 + return "";
  104 +}
  105 +
  106 +std::string
  107 +compare(char const* actual_filename, char const* expected_filename)
  108 +{
  109 + QPDF actual;
  110 + actual.processFile(actual_filename);
  111 + QPDF expected;
  112 + expected.processFile(expected_filename);
  113 + // The motivation behind this program is to compare files in a way that allows for
  114 + // differences in the exact bytes of zlib compression. If all zlib implementations produced
  115 + // exactly the same output, we would just be able to use straight comparison, but since they
  116 + // don't, we use this. As such, we are enforcing a standard of "sameness" that goes beyond
  117 + // showing semantic equivalence. The only difference we are allowing is compressed data.
  118 +
  119 + auto act_trailer = actual.getTrailer();
  120 + auto exp_trailer = expected.getTrailer();
  121 + act_trailer.removeKey("/Length");
  122 + exp_trailer.removeKey("/Length");
  123 + auto trailer_diff = compareObjects("trailer", act_trailer, exp_trailer);
  124 + if (!trailer_diff.empty()) {
  125 + QTC::TC("compare", "different trailer");
  126 + return trailer_diff;
  127 + }
  128 +
  129 + cleanEncryption(actual);
  130 + cleanEncryption(expected);
  131 +
  132 + auto actual_objects = actual.getAllObjects();
  133 + auto expected_objects = expected.getAllObjects();
  134 + if (actual_objects.size() != expected_objects.size()) {
  135 + // Not exercised in the test suite since the trailers will differ in this case.
  136 + return "different number of objects";
  137 + }
  138 + for (size_t i = 0; i < actual_objects.size(); ++i) {
  139 + auto act = actual_objects[i];
  140 + auto exp = expected_objects[i];
  141 + auto act_og = act.getObjGen();
  142 + auto exp_og = exp.getObjGen();
  143 + if (act_og != exp_og) {
  144 + // not reproduced in the test suite
  145 + return "different object IDs";
  146 + }
  147 + auto ret = compareObjects(act_og.unparse(), act, exp);
  148 + if (!ret.empty()) {
  149 + return ret;
  150 + }
  151 + }
  152 + return "";
  153 +}
  154 +
  155 +int
  156 +main(int argc, char* argv[])
  157 +{
  158 + if ((whoami = strrchr(argv[0], '/')) == nullptr) {
  159 + whoami = argv[0];
  160 + } else {
  161 + ++whoami;
  162 + }
  163 +
  164 + if ((argc == 2) && (strcmp(argv[1], "--version") == 0)) {
  165 + std::cout << whoami << " from qpdf version " << QPDF::QPDFVersion() << std::endl;
  166 + exit(0);
  167 + }
  168 +
  169 + if (argc != 3) {
  170 + usage();
  171 + }
  172 +
  173 + bool show_why = QUtil::get_env("QPDF_COMPARE_WHY");
  174 + try {
  175 + char const* to_output;
  176 + auto actual = argv[1];
  177 + auto expected = argv[2];
  178 + auto difference = compare(actual, expected);
  179 + if (difference.empty()) {
  180 + // The files are identical; write the expected file. This way, tests can be written
  181 + // that compare the output of this program to the expected file.
  182 + to_output = expected;
  183 + } else {
  184 + if (show_why) {
  185 + std::cerr << difference << std::endl;
  186 + exit(2);
  187 + }
  188 + // The files differ; write the actual file. If it is determined that the actual file
  189 + // is correct because of changes that result in intended differences, this enables
  190 + // the output of this program to replace the expected file in the test suite.
  191 + to_output = actual;
  192 + }
  193 + auto f = QUtil::safe_fopen(to_output, "rb");
  194 + QUtil::FileCloser fc(f);
  195 + QUtil::binary_stdout();
  196 + auto out = std::make_unique<Pl_StdioFile>("stdout", stdout);
  197 + unsigned char buf[2048];
  198 + bool done = false;
  199 + while (!done) {
  200 + size_t len = fread(buf, 1, sizeof(buf), f);
  201 + if (len <= 0) {
  202 + done = true;
  203 + } else {
  204 + out->write(buf, len);
  205 + }
  206 + }
  207 + if (!difference.empty()) {
  208 + exit(2);
  209 + }
  210 + } catch (std::exception& e) {
  211 + std::cerr << whoami << ": " << e.what() << std::endl;
  212 + exit(2);
  213 + }
  214 + return 0;
  215 +}
... ...
compare-for-test/qtest/compare.test 0 → 100644
  1 +#!/usr/bin/env perl
  2 +require 5.008;
  3 +BEGIN { $^W = 1; }
  4 +use strict;
  5 +
  6 +chdir("compare") or die "chdir testdir failed: $!\n";
  7 +
  8 +require TestDriver;
  9 +
  10 +my $td = new TestDriver('compare');
  11 +
  12 +# The comparison tool is designed so that you can write tests that run
  13 +# `compare actual expected` and compare the result to expected. This
  14 +# allows you to just replace the actual file in a comparison with the
  15 +# comparison command. If the files match, the output is the expected
  16 +# file, which means that if the actual file is the expected file with
  17 +# different zlib compression, the test will pass. If the files differ,
  18 +# the actual output shown will be the real actual output. If it is
  19 +# determined to be correct and used to replace the expected output,
  20 +# the test will pass next time regardless of whether the same zlib
  21 +# implementation is used.
  22 +
  23 +# These files are the same file compressed with a different
  24 +# compression level and/or a different zlib implementation.
  25 +my @same = qw(zlib.pdf zlib-9.pdf zlib-ng.pdf);
  26 +my $comparisons = (scalar(@same) * (scalar(@same) + 1))/2;
  27 +my $n_tests = 2 * $comparisons;
  28 +
  29 +for (my $i = 0; $i < scalar(@same); $i++)
  30 +{
  31 + for (my $j = $i; $j < scalar(@same); $j++)
  32 + {
  33 + # Make sure the files are byte-wise different (unless they are the same file).
  34 + $td->runtest("byte-wise compare $i and $j",
  35 + {$td->COMMAND => "cmp $same[$i] $same[$j]"},
  36 + {$td->REGEXP => ".*", $td->EXIT_STATUS => $i == $j ? 0 : "!0"});
  37 + # Make sure they match. This is how compare should be used:
  38 + # the expected output is the same file as the second argument
  39 + # to the command.
  40 + $td->runtest("compare $i and $j",
  41 + {$td->COMMAND => "qpdf-test-compare $same[$i] $same[$j]"},
  42 + {$td->FILE => $same[$j], $td->EXIT_STATUS => 0});
  43 + }
  44 +}
  45 +
  46 +my @diff = (
  47 + ["diff-num-objects.pdf", "trailer: object contents differ"],
  48 + ["diff-non-stream.pdf", "3,0: object contents differ"],
  49 + ["diff-data-size.pdf", "4,0: stream data size differs"],
  50 + ["diff-data.pdf", "4,0: stream data differs"],
  51 + ["diff-data-size-unc.pdf", "5,0: stream data size differs"],
  52 + ["diff-data-unc.pdf", "5,0: stream data differs"],
  53 + ["diff-stream-dict.pdf", "4,0: stream dictionaries differ"],
  54 + ["diff-object-type.pdf", "6,0: different types"],
  55 + );
  56 +$n_tests += 2 * scalar(@diff);
  57 +
  58 +foreach my $f (@diff)
  59 +{
  60 + # In a real test, the expected output would be the expected file
  61 + # as above. Here, we are actually testing the comparison tool to
  62 + # verify that it returns a non-zero status and the actual file
  63 + # when there is mismatch. Don't copy this test.
  64 + $td->runtest("$f->[0] is different",
  65 + {$td->COMMAND => "qpdf-test-compare $f->[0] zlib.pdf"},
  66 + {$td->FILE => $f->[0], $td->EXIT_STATUS => 2});
  67 + $td->runtest("$f->[0] is different (why)",
  68 + {$td->COMMAND => "env QPDF_COMPARE_WHY=1" .
  69 + " qpdf-test-compare $f->[0] zlib.pdf"},
  70 + {$td->STRING => "$f->[1]\n", $td->EXIT_STATUS => 2},
  71 + $td->NORMALIZE_NEWLINES);
  72 +}
  73 +
  74 +# Repeat for encrypted files.
  75 +$n_tests += 3;
  76 +$td->runtest("byte-wise compare encrypted files",
  77 + {$td->COMMAND => "cmp enc1.pdf enc2.pdf"},
  78 + {$td->REGEXP => ".*", $td->EXIT_STATUS => "!0"});
  79 +$td->runtest("compare encrypted files (same)",
  80 + {$td->COMMAND => "env QPDF_COMPARE_WHY=1 qpdf-test-compare enc1.pdf enc2.pdf"},
  81 + {$td->FILE => "enc2.pdf", $td->EXIT_STATUS => 0});
  82 +$td->runtest("compare encrypted files (different)",
  83 + {$td->COMMAND => "env QPDF_COMPARE_WHY=1 qpdf-test-compare enc1.pdf diff-data-enc.pdf"},
  84 + {$td->STRING => "4,0: stream data differs\n", $td->EXIT_STATUS => 2},
  85 + $td->NORMALIZE_NEWLINES);
  86 +
  87 +# Object streams
  88 +$n_tests += 1;
  89 +$td->runtest("compare object stream files (same)",
  90 + {$td->COMMAND => "env QPDF_COMPARE_WHY=1 qpdf-test-compare ostream1.pdf ostream2.pdf"},
  91 + {$td->FILE => "ostream2.pdf", $td->EXIT_STATUS => 0});
  92 +
  93 +$td->report($n_tests);
... ...
compare-for-test/qtest/compare/diff-data-enc.pdf 0 → 100644
No preview for this file type
compare-for-test/qtest/compare/diff-data-size-unc.pdf 0 → 100644
No preview for this file type
compare-for-test/qtest/compare/diff-data-size.pdf 0 → 100644
No preview for this file type
compare-for-test/qtest/compare/diff-data-unc.pdf 0 → 100644
No preview for this file type
compare-for-test/qtest/compare/diff-data.pdf 0 → 100644
No preview for this file type
compare-for-test/qtest/compare/diff-non-stream.pdf 0 → 100644
No preview for this file type
compare-for-test/qtest/compare/diff-num-objects.pdf 0 → 100644
No preview for this file type
compare-for-test/qtest/compare/diff-object-type.pdf 0 → 100644
No preview for this file type
compare-for-test/qtest/compare/diff-stream-dict.pdf 0 → 100644
No preview for this file type
compare-for-test/qtest/compare/enc1.pdf 0 → 100644
No preview for this file type
compare-for-test/qtest/compare/enc2.pdf 0 → 100644
No preview for this file type
compare-for-test/qtest/compare/ostream1.pdf 0 → 100644
No preview for this file type
compare-for-test/qtest/compare/ostream2.pdf 0 → 100644
No preview for this file type
compare-for-test/qtest/compare/start.pdf 0 → 100644
  1 +%PDF-2.0
  2 +%¿÷¢þ
  3 +1 0 obj
  4 +<< /Pages 2 0 R /Type /Catalog >>
  5 +endobj
  6 +2 0 obj
  7 +<< /Count 1 /Kids [ 3 0 R ] /Type /Pages >>
  8 +endobj
  9 +3 0 obj
  10 +<< /Contents [ 4 0 R 5 0 R ] /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 6 0 R >> >> /Type /Page >>
  11 +endobj
  12 +4 0 obj
  13 +<< /Length 48 /Filter /FlateDecode >>
  14 +stream
  15 +BT
  16 + /F1 24 Tf
  17 + 72 720 Td
  18 + (WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW) Tj
  19 +ET
  20 +endstream
  21 +endobj
  22 +5 0 obj
  23 +<< /Length 43 >>
  24 +stream
  25 +BT
  26 + /F1 24 Tf
  27 + 72 681 Td
  28 + (Potato) Tj
  29 +ET
  30 +endstream
  31 +endobj
  32 +6 0 obj
  33 +<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Subtype /Type1 /Type /Font >>
  34 +endobj
  35 +xref
  36 +0 7
  37 +0000000000 65535 f
  38 +0000000015 00000 n
  39 +0000000064 00000 n
  40 +0000000123 00000 n
  41 +0000000261 00000 n
  42 +0000000379 00000 n
  43 +0000000471 00000 n
  44 +trailer << /Root 1 0 R /Size 7 /ID [<42841c13bbf709d79a200fa1691836f8><31415926535897932384626433832795>] >>
  45 +startxref
  46 +568
  47 +%%EOF
... ...
compare-for-test/qtest/compare/zlib-9.pdf 0 → 100644
No preview for this file type
compare-for-test/qtest/compare/zlib-ng.pdf 0 → 100644
No preview for this file type
compare-for-test/qtest/compare/zlib.pdf 0 → 100644
No preview for this file type