Commit 599daddb47fc0340e48b02f7ba00ef86bfef8c45
1 parent
9210dd46
decode streams on check, always exit abnormally when warnings are detected
git-svn-id: svn+q:///qpdf/trunk@660 71b93d88-0707-0410-a8cf-f5a4172ac649
Showing
7 changed files
with
118 additions
and
32 deletions
ChangeLog
| 1 | +2009-03-08 Jay Berkenbilt <ejb@ql.org> | |
| 2 | + | |
| 3 | + * qpdf/fix-qdf (write_ostream): Adjust offsets while writing | |
| 4 | + object streams to account for changes in the length of the | |
| 5 | + dictionary and offset tables. | |
| 6 | + | |
| 7 | + * qpdf/qpdf.cc (main): In check mode, in addition to checking | |
| 8 | + structure of file, attempt to decode all stream data. | |
| 9 | + | |
| 10 | + * libqpdf/QPDFWriter.cc (QPDFWriter::writeObject): In QDF mode, | |
| 11 | + write a comment to the QDF file that indicates the object ID from | |
| 12 | + the original file. | |
| 13 | + | |
| 14 | + * libqpdf/QPDF.cc (QPDF::pipeStreamData): Issue a warning instead | |
| 15 | + of failing if there is a problem found while decoding stream. | |
| 16 | + | |
| 17 | + * qpdf/qpdf.cc: Exit with a status of 3 if warnings were found | |
| 18 | + regardless of what mode we're in. | |
| 19 | + | |
| 1 | 20 | 2009-02-21 Jay Berkenbilt <ejb@ql.org> |
| 2 | 21 | |
| 3 | 22 | * 2.0.4: release | ... | ... |
include/qpdf/QPDF.hh
| ... | ... | @@ -160,6 +160,10 @@ class QPDF |
| 160 | 160 | // dictionaries) with direct objects. |
| 161 | 161 | void flattenScalarReferences(); |
| 162 | 162 | |
| 163 | + // Decode all streams, discarding the output. Used to check | |
| 164 | + // correctness of stream encoding. | |
| 165 | + void decodeStreams(); | |
| 166 | + | |
| 163 | 167 | // For QPDFWriter: |
| 164 | 168 | |
| 165 | 169 | // Remove /ID, /Encrypt, and /Prev keys from the trailer | ... | ... |
libqpdf/QPDF.cc
| ... | ... | @@ -10,6 +10,7 @@ |
| 10 | 10 | #include <qpdf/QUtil.hh> |
| 11 | 11 | #include <qpdf/PCRE.hh> |
| 12 | 12 | #include <qpdf/Pipeline.hh> |
| 13 | +#include <qpdf/Pl_Discard.hh> | |
| 13 | 14 | |
| 14 | 15 | #include <qpdf/QPDFExc.hh> |
| 15 | 16 | #include <qpdf/QPDF_Null.hh> |
| ... | ... | @@ -1810,23 +1811,51 @@ QPDF::pipeStreamData(int objid, int generation, |
| 1810 | 1811 | } |
| 1811 | 1812 | } |
| 1812 | 1813 | |
| 1813 | - this->file.seek(offset, SEEK_SET); | |
| 1814 | - char buf[10240]; | |
| 1815 | - while (length > 0) | |
| 1814 | + try | |
| 1816 | 1815 | { |
| 1817 | - size_t to_read = (sizeof(buf) < length ? sizeof(buf) : length); | |
| 1818 | - size_t len = this->file.read(buf, to_read); | |
| 1819 | - if (len == 0) | |
| 1816 | + this->file.seek(offset, SEEK_SET); | |
| 1817 | + char buf[10240]; | |
| 1818 | + while (length > 0) | |
| 1820 | 1819 | { |
| 1821 | - throw QPDFExc(this->file.getName(), this->file.getLastOffset(), | |
| 1822 | - "unexpected EOF reading stream data"); | |
| 1820 | + size_t to_read = (sizeof(buf) < length ? sizeof(buf) : length); | |
| 1821 | + size_t len = this->file.read(buf, to_read); | |
| 1822 | + if (len == 0) | |
| 1823 | + { | |
| 1824 | + throw QPDFExc(this->file.getName(), this->file.getLastOffset(), | |
| 1825 | + "unexpected EOF reading stream data"); | |
| 1826 | + } | |
| 1827 | + length -= len; | |
| 1828 | + pipeline->write((unsigned char*)buf, len); | |
| 1823 | 1829 | } |
| 1824 | - length -= len; | |
| 1825 | - pipeline->write((unsigned char*)buf, len); | |
| 1830 | + } | |
| 1831 | + catch (QEXC::General& e) | |
| 1832 | + { | |
| 1833 | + QTC::TC("qpdf", "QPDF decoding error warning"); | |
| 1834 | + warn(QPDFExc(this->file.getName(), this->file.getLastOffset(), | |
| 1835 | + "error decoding stream data for object " + | |
| 1836 | + QUtil::int_to_string(objid) + " " + | |
| 1837 | + QUtil::int_to_string(generation) + ": " + e.unparse())); | |
| 1826 | 1838 | } |
| 1827 | 1839 | pipeline->finish(); |
| 1828 | 1840 | } |
| 1829 | 1841 | |
| 1842 | +void | |
| 1843 | +QPDF::decodeStreams() | |
| 1844 | +{ | |
| 1845 | + for (std::map<ObjGen, QPDFXRefEntry>::iterator iter = | |
| 1846 | + this->xref_table.begin(); | |
| 1847 | + iter != this->xref_table.end(); ++iter) | |
| 1848 | + { | |
| 1849 | + ObjGen const& og = (*iter).first; | |
| 1850 | + QPDFObjectHandle obj = getObjectByID(og.obj, og.gen); | |
| 1851 | + if (obj.isStream()) | |
| 1852 | + { | |
| 1853 | + Pl_Discard pl; | |
| 1854 | + obj.pipeStreamData(&pl, true, false, false); | |
| 1855 | + } | |
| 1856 | + } | |
| 1857 | +} | |
| 1858 | + | |
| 1830 | 1859 | std::vector<QPDFObjectHandle> const& |
| 1831 | 1860 | QPDF::getAllPages() |
| 1832 | 1861 | { | ... | ... |
manual/qpdf-manual.xml
| ... | ... | @@ -738,15 +738,15 @@ make |
| 738 | 738 | <term><option>-check</option></term> |
| 739 | 739 | <listitem> |
| 740 | 740 | <para> |
| 741 | - Checks file structure and well as encryption and | |
| 742 | - linearization. A file for which <option>--check</option> | |
| 743 | - reports no errors may still have errors in stream data but | |
| 744 | - should otherwise be otherwise structurally sound. If | |
| 745 | - <option>--check</option> any errors, qpdf will exit with a | |
| 746 | - status of 2. There are some recoverable conditions that | |
| 747 | - <option>--check</option> detects. These are issued as | |
| 748 | - warnings instead of errors. If qpdf finds no errors but finds | |
| 749 | - warnings, it will exit with a status of 3 (as of | |
| 741 | + Checks file structure and well as encryption, linearization, | |
| 742 | + and encoding of stream data. A file for which | |
| 743 | + <option>--check</option> reports no errors may still have | |
| 744 | + errors in stream data content but should otherwise be | |
| 745 | + structurally sound. If <option>--check</option> any errors, | |
| 746 | + qpdf will exit with a status of 2. There are some recoverable | |
| 747 | + conditions that <option>--check</option> detects. These are | |
| 748 | + issued as warnings instead of errors. If qpdf finds no errors | |
| 749 | + but finds warnings, it will exit with a status of 3 (as of | |
| 750 | 750 | version 2.0.4). |
| 751 | 751 | </para> |
| 752 | 752 | </listitem> |
| ... | ... | @@ -861,6 +861,12 @@ make |
| 861 | 861 | special comments that make them easy to find. |
| 862 | 862 | </para> |
| 863 | 863 | </listitem> |
| 864 | + <listitem> | |
| 865 | + <para> | |
| 866 | + Comments precede each object indicating the object number of the | |
| 867 | + corresponding object in the original file. | |
| 868 | + </para> | |
| 869 | + </listitem> | |
| 864 | 870 | </itemizedlist> |
| 865 | 871 | </para> |
| 866 | 872 | <para> | ... | ... |
qpdf/qpdf.cc
| ... | ... | @@ -12,6 +12,9 @@ |
| 12 | 12 | |
| 13 | 13 | #include <qpdf/QPDFWriter.hh> |
| 14 | 14 | |
| 15 | +static int const EXIT_ERROR = 2; | |
| 16 | +static int const EXIT_WARNING = 3; | |
| 17 | + | |
| 15 | 18 | static char const* whoami = 0; |
| 16 | 19 | |
| 17 | 20 | // Note: let's not be too noisy about documenting the fact that this |
| ... | ... | @@ -158,7 +161,7 @@ void usage(std::string const& msg) |
| 158 | 161 | << "Usage: " << whoami << " [options] infile outfile" << std::endl |
| 159 | 162 | << "For detailed help, run " << whoami << " --help" << std::endl |
| 160 | 163 | << std::endl; |
| 161 | - exit(2); | |
| 164 | + exit(EXIT_ERROR); | |
| 162 | 165 | } |
| 163 | 166 | |
| 164 | 167 | static void show_encryption(QPDF& pdf) |
| ... | ... | @@ -752,7 +755,7 @@ int main(int argc, char* argv[]) |
| 752 | 755 | } |
| 753 | 756 | else |
| 754 | 757 | { |
| 755 | - exit(2); | |
| 758 | + exit(EXIT_ERROR); | |
| 756 | 759 | } |
| 757 | 760 | } |
| 758 | 761 | if (show_linearization) |
| ... | ... | @@ -777,7 +780,7 @@ int main(int argc, char* argv[]) |
| 777 | 780 | QTC::TC("qpdf", "unable to filter"); |
| 778 | 781 | std::cerr << "Unable to filter stream data." |
| 779 | 782 | << std::endl; |
| 780 | - exit(2); | |
| 783 | + exit(EXIT_ERROR); | |
| 781 | 784 | } |
| 782 | 785 | else |
| 783 | 786 | { |
| ... | ... | @@ -869,6 +872,8 @@ int main(int argc, char* argv[]) |
| 869 | 872 | // traversal of file, so any structural errors |
| 870 | 873 | // would be exposed. |
| 871 | 874 | pdf.flattenScalarReferences(); |
| 875 | + // Also explicitly decode all streams. | |
| 876 | + pdf.decodeStreams(); | |
| 872 | 877 | okay = true; |
| 873 | 878 | } |
| 874 | 879 | } |
| ... | ... | @@ -880,8 +885,7 @@ int main(int argc, char* argv[]) |
| 880 | 885 | { |
| 881 | 886 | if (! pdf.getWarnings().empty()) |
| 882 | 887 | { |
| 883 | - // special exit status for warnings without errors | |
| 884 | - exit(3); | |
| 888 | + exit(EXIT_WARNING); | |
| 885 | 889 | } |
| 886 | 890 | else |
| 887 | 891 | { |
| ... | ... | @@ -946,11 +950,15 @@ int main(int argc, char* argv[]) |
| 946 | 950 | } |
| 947 | 951 | w.write(); |
| 948 | 952 | } |
| 953 | + if (! pdf.getWarnings().empty()) | |
| 954 | + { | |
| 955 | + exit(EXIT_WARNING); | |
| 956 | + } | |
| 949 | 957 | } |
| 950 | 958 | catch (std::exception& e) |
| 951 | 959 | { |
| 952 | 960 | std::cerr << e.what() << std::endl; |
| 953 | - exit(2); | |
| 961 | + exit(EXIT_ERROR); | |
| 954 | 962 | } |
| 955 | 963 | |
| 956 | 964 | return 0; | ... | ... |
qpdf/qpdf.testcov
qpdf/qtest/qpdf.test
| ... | ... | @@ -183,7 +183,7 @@ for (my $i = 1; $i <= scalar(@badfiles); ++$i) |
| 183 | 183 | $td->runtest("recover heifer file", |
| 184 | 184 | {$td->COMMAND => "qpdf --static-id -qdf heifer.pdf a.pdf"}, |
| 185 | 185 | {$td->FILE => "heifer.out", |
| 186 | - $td->EXIT_STATUS => 0}, | |
| 186 | + $td->EXIT_STATUS => 3}, | |
| 187 | 187 | $td->NORMALIZE_NEWLINES); |
| 188 | 188 | $td->runtest("check output", |
| 189 | 189 | {$td->FILE => "a.pdf"}, |
| ... | ... | @@ -206,7 +206,7 @@ $td->runtest("damaged replaced page contents", |
| 206 | 206 | {$td->COMMAND => "qpdf --static-id -qdf" . |
| 207 | 207 | " append-page-content-damaged.pdf a.pdf"}, |
| 208 | 208 | {$td->FILE => "append-page-content-damaged.out", |
| 209 | - $td->EXIT_STATUS => 0}, | |
| 209 | + $td->EXIT_STATUS => 3}, | |
| 210 | 210 | $td->NORMALIZE_NEWLINES); |
| 211 | 211 | $td->runtest("check output", |
| 212 | 212 | {$td->FILE => "a.pdf"}, |
| ... | ... | @@ -282,7 +282,7 @@ check_pdf("no recompression", |
| 282 | 282 | show_ntests(); |
| 283 | 283 | # ---------- |
| 284 | 284 | $td->notify("--- Object Stream Tests ---"); |
| 285 | -$n_tests += 36 * 4; | |
| 285 | +$n_tests += 36 * 6; | |
| 286 | 286 | $n_compare_pdfs += 36; |
| 287 | 287 | |
| 288 | 288 | for (my $n = 16; $n <= 19; ++$n) |
| ... | ... | @@ -294,7 +294,7 @@ for (my $n = 16; $n <= 19; ++$n) |
| 294 | 294 | { |
| 295 | 295 | foreach my $qdf ('-qdf', '', '-encrypt "" x 128 --') |
| 296 | 296 | { |
| 297 | - # 4 tests + 1 compare_pdfs | |
| 297 | + # 6 tests + 1 compare_pdfs | |
| 298 | 298 | $td->runtest("object stream mode", |
| 299 | 299 | {$td->COMMAND => |
| 300 | 300 | "qpdf --static-id $flags $qdf $in a.pdf"}, |
| ... | ... | @@ -316,6 +316,12 @@ for (my $n = 16; $n <= 19; ++$n) |
| 316 | 316 | $td->runtest("compare files", |
| 317 | 317 | {$td->FILE => "a.qdf"}, |
| 318 | 318 | {$td->FILE => "b.qdf"}); |
| 319 | + $td->runtest("fix-qdf identity check", | |
| 320 | + {$td->COMMAND => "fix-qdf a.qdf >| b.pdf"}, | |
| 321 | + {$td->STRING => "", $td->EXIT_STATUS => 0}); | |
| 322 | + $td->runtest("compare files", | |
| 323 | + {$td->FILE => "a.qdf"}, | |
| 324 | + {$td->FILE => "b.qdf"}); | |
| 319 | 325 | } |
| 320 | 326 | } |
| 321 | 327 | flush_tiff_cache(); |
| ... | ... | @@ -324,12 +330,14 @@ for (my $n = 16; $n <= 19; ++$n) |
| 324 | 330 | show_ntests(); |
| 325 | 331 | # ---------- |
| 326 | 332 | $td->notify("--- Specific File Tests ---"); |
| 327 | -$n_tests += 1; | |
| 333 | +$n_tests += 2; | |
| 334 | +$n_compare_pdfs += 1; | |
| 328 | 335 | |
| 329 | 336 | # Special PDF files that caused problems at some point |
| 330 | 337 | |
| 331 | 338 | # This file is a PDF 1.1 file with /# as a name and with |
| 332 | -# inconsistencies in its free table. | |
| 339 | +# inconsistencies in its free table. It also has LZW streams that | |
| 340 | +# happen to test boundary conditions in the LZW decoder. | |
| 333 | 341 | $td->runtest("old and complex", |
| 334 | 342 | {$td->COMMAND => "qpdf --check old-and-complex.pdf"}, |
| 335 | 343 | {$td->STRING => +("checking old-and-complex.pdf\n" . |
| ... | ... | @@ -339,6 +347,12 @@ $td->runtest("old and complex", |
| 339 | 347 | $td->EXIT_STATUS => 0}, |
| 340 | 348 | $td->NORMALIZE_NEWLINES); |
| 341 | 349 | |
| 350 | +$td->runtest("convert to qdf", | |
| 351 | + {$td->COMMAND => "qpdf --qdf old-and-complex.pdf a.qdf"}, | |
| 352 | + {$td->STRING => "", $td->EXIT_STATUS => 0}); | |
| 353 | + | |
| 354 | +compare_pdfs("old-and-complex.pdf", "a.qdf"); | |
| 355 | + | |
| 342 | 356 | show_ntests(); |
| 343 | 357 | # ---------- |
| 344 | 358 | $td->notify("--- Mutability Tests ---"); |
| ... | ... | @@ -823,7 +837,7 @@ foreach my $file (@files) |
| 823 | 837 | show_ntests(); |
| 824 | 838 | # ---------- |
| 825 | 839 | $td->notify("--- fix-qdf Tests ---"); |
| 826 | -$n_tests += 2; | |
| 840 | +$n_tests += 4; | |
| 827 | 841 | |
| 828 | 842 | for (my $n = 1; $n <= 2; ++$n) |
| 829 | 843 | { |
| ... | ... | @@ -831,6 +845,11 @@ for (my $n = 1; $n <= 2; ++$n) |
| 831 | 845 | {$td->COMMAND => "fix-qdf fix$n.qdf"}, |
| 832 | 846 | {$td->FILE => "fix$n.qdf.out", |
| 833 | 847 | $td->EXIT_STATUS => 0}); |
| 848 | + | |
| 849 | + $td->runtest("identity fix-qdf $n", | |
| 850 | + {$td->COMMAND => "fix-qdf fix$n.qdf.out"}, | |
| 851 | + {$td->FILE => "fix$n.qdf.out", | |
| 852 | + $td->EXIT_STATUS => 0}); | |
| 834 | 853 | } |
| 835 | 854 | |
| 836 | 855 | show_ntests(); | ... | ... |