Commit 599daddb47fc0340e48b02f7ba00ef86bfef8c45

Authored by Jay Berkenbilt
1 parent 9210dd46

decode streams on check, always exit abnormally when warnings are detected

git-svn-id: svn+q:///qpdf/trunk@660 71b93d88-0707-0410-a8cf-f5a4172ac649
ChangeLog
  1 +2009-03-08 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * qpdf/fix-qdf (write_ostream): Adjust offsets while writing
  4 + object streams to account for changes in the length of the
  5 + dictionary and offset tables.
  6 +
  7 + * qpdf/qpdf.cc (main): In check mode, in addition to checking
  8 + structure of file, attempt to decode all stream data.
  9 +
  10 + * libqpdf/QPDFWriter.cc (QPDFWriter::writeObject): In QDF mode,
  11 + write a comment to the QDF file that indicates the object ID from
  12 + the original file.
  13 +
  14 + * libqpdf/QPDF.cc (QPDF::pipeStreamData): Issue a warning instead
  15 + of failing if there is a problem found while decoding stream.
  16 +
  17 + * qpdf/qpdf.cc: Exit with a status of 3 if warnings were found
  18 + regardless of what mode we're in.
  19 +
1 2009-02-21 Jay Berkenbilt <ejb@ql.org> 20 2009-02-21 Jay Berkenbilt <ejb@ql.org>
2 21
3 * 2.0.4: release 22 * 2.0.4: release
include/qpdf/QPDF.hh
@@ -160,6 +160,10 @@ class QPDF @@ -160,6 +160,10 @@ class QPDF
160 // dictionaries) with direct objects. 160 // dictionaries) with direct objects.
161 void flattenScalarReferences(); 161 void flattenScalarReferences();
162 162
  163 + // Decode all streams, discarding the output. Used to check
  164 + // correctness of stream encoding.
  165 + void decodeStreams();
  166 +
163 // For QPDFWriter: 167 // For QPDFWriter:
164 168
165 // Remove /ID, /Encrypt, and /Prev keys from the trailer 169 // Remove /ID, /Encrypt, and /Prev keys from the trailer
libqpdf/QPDF.cc
@@ -10,6 +10,7 @@ @@ -10,6 +10,7 @@
10 #include <qpdf/QUtil.hh> 10 #include <qpdf/QUtil.hh>
11 #include <qpdf/PCRE.hh> 11 #include <qpdf/PCRE.hh>
12 #include <qpdf/Pipeline.hh> 12 #include <qpdf/Pipeline.hh>
  13 +#include <qpdf/Pl_Discard.hh>
13 14
14 #include <qpdf/QPDFExc.hh> 15 #include <qpdf/QPDFExc.hh>
15 #include <qpdf/QPDF_Null.hh> 16 #include <qpdf/QPDF_Null.hh>
@@ -1810,23 +1811,51 @@ QPDF::pipeStreamData(int objid, int generation, @@ -1810,23 +1811,51 @@ QPDF::pipeStreamData(int objid, int generation,
1810 } 1811 }
1811 } 1812 }
1812 1813
1813 - this->file.seek(offset, SEEK_SET);  
1814 - char buf[10240];  
1815 - while (length > 0) 1814 + try
1816 { 1815 {
1817 - size_t to_read = (sizeof(buf) < length ? sizeof(buf) : length);  
1818 - size_t len = this->file.read(buf, to_read);  
1819 - if (len == 0) 1816 + this->file.seek(offset, SEEK_SET);
  1817 + char buf[10240];
  1818 + while (length > 0)
1820 { 1819 {
1821 - throw QPDFExc(this->file.getName(), this->file.getLastOffset(),  
1822 - "unexpected EOF reading stream data"); 1820 + size_t to_read = (sizeof(buf) < length ? sizeof(buf) : length);
  1821 + size_t len = this->file.read(buf, to_read);
  1822 + if (len == 0)
  1823 + {
  1824 + throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
  1825 + "unexpected EOF reading stream data");
  1826 + }
  1827 + length -= len;
  1828 + pipeline->write((unsigned char*)buf, len);
1823 } 1829 }
1824 - length -= len;  
1825 - pipeline->write((unsigned char*)buf, len); 1830 + }
  1831 + catch (QEXC::General& e)
  1832 + {
  1833 + QTC::TC("qpdf", "QPDF decoding error warning");
  1834 + warn(QPDFExc(this->file.getName(), this->file.getLastOffset(),
  1835 + "error decoding stream data for object " +
  1836 + QUtil::int_to_string(objid) + " " +
  1837 + QUtil::int_to_string(generation) + ": " + e.unparse()));
1826 } 1838 }
1827 pipeline->finish(); 1839 pipeline->finish();
1828 } 1840 }
1829 1841
  1842 +void
  1843 +QPDF::decodeStreams()
  1844 +{
  1845 + for (std::map<ObjGen, QPDFXRefEntry>::iterator iter =
  1846 + this->xref_table.begin();
  1847 + iter != this->xref_table.end(); ++iter)
  1848 + {
  1849 + ObjGen const& og = (*iter).first;
  1850 + QPDFObjectHandle obj = getObjectByID(og.obj, og.gen);
  1851 + if (obj.isStream())
  1852 + {
  1853 + Pl_Discard pl;
  1854 + obj.pipeStreamData(&pl, true, false, false);
  1855 + }
  1856 + }
  1857 +}
  1858 +
1830 std::vector<QPDFObjectHandle> const& 1859 std::vector<QPDFObjectHandle> const&
1831 QPDF::getAllPages() 1860 QPDF::getAllPages()
1832 { 1861 {
manual/qpdf-manual.xml
@@ -738,15 +738,15 @@ make @@ -738,15 +738,15 @@ make
738 <term><option>-check</option></term> 738 <term><option>-check</option></term>
739 <listitem> 739 <listitem>
740 <para> 740 <para>
741 - Checks file structure and well as encryption and  
742 - linearization. A file for which <option>--check</option>  
743 - reports no errors may still have errors in stream data but  
744 - should otherwise be otherwise structurally sound. If  
745 - <option>--check</option> any errors, qpdf will exit with a  
746 - status of 2. There are some recoverable conditions that  
747 - <option>--check</option> detects. These are issued as  
748 - warnings instead of errors. If qpdf finds no errors but finds  
749 - warnings, it will exit with a status of 3 (as of 741 + Checks file structure and well as encryption, linearization,
  742 + and encoding of stream data. A file for which
  743 + <option>--check</option> reports no errors may still have
  744 + errors in stream data content but should otherwise be
  745 + structurally sound. If <option>--check</option> any errors,
  746 + qpdf will exit with a status of 2. There are some recoverable
  747 + conditions that <option>--check</option> detects. These are
  748 + issued as warnings instead of errors. If qpdf finds no errors
  749 + but finds warnings, it will exit with a status of 3 (as of
750 version&nbsp;2.0.4). 750 version&nbsp;2.0.4).
751 </para> 751 </para>
752 </listitem> 752 </listitem>
@@ -861,6 +861,12 @@ make @@ -861,6 +861,12 @@ make
861 special comments that make them easy to find. 861 special comments that make them easy to find.
862 </para> 862 </para>
863 </listitem> 863 </listitem>
  864 + <listitem>
  865 + <para>
  866 + Comments precede each object indicating the object number of the
  867 + corresponding object in the original file.
  868 + </para>
  869 + </listitem>
864 </itemizedlist> 870 </itemizedlist>
865 </para> 871 </para>
866 <para> 872 <para>
qpdf/qpdf.cc
@@ -12,6 +12,9 @@ @@ -12,6 +12,9 @@
12 12
13 #include <qpdf/QPDFWriter.hh> 13 #include <qpdf/QPDFWriter.hh>
14 14
  15 +static int const EXIT_ERROR = 2;
  16 +static int const EXIT_WARNING = 3;
  17 +
15 static char const* whoami = 0; 18 static char const* whoami = 0;
16 19
17 // Note: let's not be too noisy about documenting the fact that this 20 // Note: let's not be too noisy about documenting the fact that this
@@ -158,7 +161,7 @@ void usage(std::string const&amp; msg) @@ -158,7 +161,7 @@ void usage(std::string const&amp; msg)
158 << "Usage: " << whoami << " [options] infile outfile" << std::endl 161 << "Usage: " << whoami << " [options] infile outfile" << std::endl
159 << "For detailed help, run " << whoami << " --help" << std::endl 162 << "For detailed help, run " << whoami << " --help" << std::endl
160 << std::endl; 163 << std::endl;
161 - exit(2); 164 + exit(EXIT_ERROR);
162 } 165 }
163 166
164 static void show_encryption(QPDF& pdf) 167 static void show_encryption(QPDF& pdf)
@@ -752,7 +755,7 @@ int main(int argc, char* argv[]) @@ -752,7 +755,7 @@ int main(int argc, char* argv[])
752 } 755 }
753 else 756 else
754 { 757 {
755 - exit(2); 758 + exit(EXIT_ERROR);
756 } 759 }
757 } 760 }
758 if (show_linearization) 761 if (show_linearization)
@@ -777,7 +780,7 @@ int main(int argc, char* argv[]) @@ -777,7 +780,7 @@ int main(int argc, char* argv[])
777 QTC::TC("qpdf", "unable to filter"); 780 QTC::TC("qpdf", "unable to filter");
778 std::cerr << "Unable to filter stream data." 781 std::cerr << "Unable to filter stream data."
779 << std::endl; 782 << std::endl;
780 - exit(2); 783 + exit(EXIT_ERROR);
781 } 784 }
782 else 785 else
783 { 786 {
@@ -869,6 +872,8 @@ int main(int argc, char* argv[]) @@ -869,6 +872,8 @@ int main(int argc, char* argv[])
869 // traversal of file, so any structural errors 872 // traversal of file, so any structural errors
870 // would be exposed. 873 // would be exposed.
871 pdf.flattenScalarReferences(); 874 pdf.flattenScalarReferences();
  875 + // Also explicitly decode all streams.
  876 + pdf.decodeStreams();
872 okay = true; 877 okay = true;
873 } 878 }
874 } 879 }
@@ -880,8 +885,7 @@ int main(int argc, char* argv[]) @@ -880,8 +885,7 @@ int main(int argc, char* argv[])
880 { 885 {
881 if (! pdf.getWarnings().empty()) 886 if (! pdf.getWarnings().empty())
882 { 887 {
883 - // special exit status for warnings without errors  
884 - exit(3); 888 + exit(EXIT_WARNING);
885 } 889 }
886 else 890 else
887 { 891 {
@@ -946,11 +950,15 @@ int main(int argc, char* argv[]) @@ -946,11 +950,15 @@ int main(int argc, char* argv[])
946 } 950 }
947 w.write(); 951 w.write();
948 } 952 }
  953 + if (! pdf.getWarnings().empty())
  954 + {
  955 + exit(EXIT_WARNING);
  956 + }
949 } 957 }
950 catch (std::exception& e) 958 catch (std::exception& e)
951 { 959 {
952 std::cerr << e.what() << std::endl; 960 std::cerr << e.what() << std::endl;
953 - exit(2); 961 + exit(EXIT_ERROR);
954 } 962 }
955 963
956 return 0; 964 return 0;
qpdf/qpdf.testcov
@@ -117,3 +117,4 @@ QPDF piping xref stream from encrypted file 0 @@ -117,3 +117,4 @@ QPDF piping xref stream from encrypted file 0
117 unable to filter 0 117 unable to filter 0
118 QPDF_String non-trivial UTF-16 0 118 QPDF_String non-trivial UTF-16 0
119 QPDF xref overwrite object 0 119 QPDF xref overwrite object 0
  120 +QPDF decoding error warning 0
qpdf/qtest/qpdf.test
@@ -183,7 +183,7 @@ for (my $i = 1; $i &lt;= scalar(@badfiles); ++$i) @@ -183,7 +183,7 @@ for (my $i = 1; $i &lt;= scalar(@badfiles); ++$i)
183 $td->runtest("recover heifer file", 183 $td->runtest("recover heifer file",
184 {$td->COMMAND => "qpdf --static-id -qdf heifer.pdf a.pdf"}, 184 {$td->COMMAND => "qpdf --static-id -qdf heifer.pdf a.pdf"},
185 {$td->FILE => "heifer.out", 185 {$td->FILE => "heifer.out",
186 - $td->EXIT_STATUS => 0}, 186 + $td->EXIT_STATUS => 3},
187 $td->NORMALIZE_NEWLINES); 187 $td->NORMALIZE_NEWLINES);
188 $td->runtest("check output", 188 $td->runtest("check output",
189 {$td->FILE => "a.pdf"}, 189 {$td->FILE => "a.pdf"},
@@ -206,7 +206,7 @@ $td-&gt;runtest(&quot;damaged replaced page contents&quot;, @@ -206,7 +206,7 @@ $td-&gt;runtest(&quot;damaged replaced page contents&quot;,
206 {$td->COMMAND => "qpdf --static-id -qdf" . 206 {$td->COMMAND => "qpdf --static-id -qdf" .
207 " append-page-content-damaged.pdf a.pdf"}, 207 " append-page-content-damaged.pdf a.pdf"},
208 {$td->FILE => "append-page-content-damaged.out", 208 {$td->FILE => "append-page-content-damaged.out",
209 - $td->EXIT_STATUS => 0}, 209 + $td->EXIT_STATUS => 3},
210 $td->NORMALIZE_NEWLINES); 210 $td->NORMALIZE_NEWLINES);
211 $td->runtest("check output", 211 $td->runtest("check output",
212 {$td->FILE => "a.pdf"}, 212 {$td->FILE => "a.pdf"},
@@ -282,7 +282,7 @@ check_pdf(&quot;no recompression&quot;, @@ -282,7 +282,7 @@ check_pdf(&quot;no recompression&quot;,
282 show_ntests(); 282 show_ntests();
283 # ---------- 283 # ----------
284 $td->notify("--- Object Stream Tests ---"); 284 $td->notify("--- Object Stream Tests ---");
285 -$n_tests += 36 * 4; 285 +$n_tests += 36 * 6;
286 $n_compare_pdfs += 36; 286 $n_compare_pdfs += 36;
287 287
288 for (my $n = 16; $n <= 19; ++$n) 288 for (my $n = 16; $n <= 19; ++$n)
@@ -294,7 +294,7 @@ for (my $n = 16; $n &lt;= 19; ++$n) @@ -294,7 +294,7 @@ for (my $n = 16; $n &lt;= 19; ++$n)
294 { 294 {
295 foreach my $qdf ('-qdf', '', '-encrypt "" x 128 --') 295 foreach my $qdf ('-qdf', '', '-encrypt "" x 128 --')
296 { 296 {
297 - # 4 tests + 1 compare_pdfs 297 + # 6 tests + 1 compare_pdfs
298 $td->runtest("object stream mode", 298 $td->runtest("object stream mode",
299 {$td->COMMAND => 299 {$td->COMMAND =>
300 "qpdf --static-id $flags $qdf $in a.pdf"}, 300 "qpdf --static-id $flags $qdf $in a.pdf"},
@@ -316,6 +316,12 @@ for (my $n = 16; $n &lt;= 19; ++$n) @@ -316,6 +316,12 @@ for (my $n = 16; $n &lt;= 19; ++$n)
316 $td->runtest("compare files", 316 $td->runtest("compare files",
317 {$td->FILE => "a.qdf"}, 317 {$td->FILE => "a.qdf"},
318 {$td->FILE => "b.qdf"}); 318 {$td->FILE => "b.qdf"});
  319 + $td->runtest("fix-qdf identity check",
  320 + {$td->COMMAND => "fix-qdf a.qdf >| b.pdf"},
  321 + {$td->STRING => "", $td->EXIT_STATUS => 0});
  322 + $td->runtest("compare files",
  323 + {$td->FILE => "a.qdf"},
  324 + {$td->FILE => "b.qdf"});
319 } 325 }
320 } 326 }
321 flush_tiff_cache(); 327 flush_tiff_cache();
@@ -324,12 +330,14 @@ for (my $n = 16; $n &lt;= 19; ++$n) @@ -324,12 +330,14 @@ for (my $n = 16; $n &lt;= 19; ++$n)
324 show_ntests(); 330 show_ntests();
325 # ---------- 331 # ----------
326 $td->notify("--- Specific File Tests ---"); 332 $td->notify("--- Specific File Tests ---");
327 -$n_tests += 1; 333 +$n_tests += 2;
  334 +$n_compare_pdfs += 1;
328 335
329 # Special PDF files that caused problems at some point 336 # Special PDF files that caused problems at some point
330 337
331 # This file is a PDF 1.1 file with /# as a name and with 338 # This file is a PDF 1.1 file with /# as a name and with
332 -# inconsistencies in its free table. 339 +# inconsistencies in its free table. It also has LZW streams that
  340 +# happen to test boundary conditions in the LZW decoder.
333 $td->runtest("old and complex", 341 $td->runtest("old and complex",
334 {$td->COMMAND => "qpdf --check old-and-complex.pdf"}, 342 {$td->COMMAND => "qpdf --check old-and-complex.pdf"},
335 {$td->STRING => +("checking old-and-complex.pdf\n" . 343 {$td->STRING => +("checking old-and-complex.pdf\n" .
@@ -339,6 +347,12 @@ $td-&gt;runtest(&quot;old and complex&quot;, @@ -339,6 +347,12 @@ $td-&gt;runtest(&quot;old and complex&quot;,
339 $td->EXIT_STATUS => 0}, 347 $td->EXIT_STATUS => 0},
340 $td->NORMALIZE_NEWLINES); 348 $td->NORMALIZE_NEWLINES);
341 349
  350 +$td->runtest("convert to qdf",
  351 + {$td->COMMAND => "qpdf --qdf old-and-complex.pdf a.qdf"},
  352 + {$td->STRING => "", $td->EXIT_STATUS => 0});
  353 +
  354 +compare_pdfs("old-and-complex.pdf", "a.qdf");
  355 +
342 show_ntests(); 356 show_ntests();
343 # ---------- 357 # ----------
344 $td->notify("--- Mutability Tests ---"); 358 $td->notify("--- Mutability Tests ---");
@@ -823,7 +837,7 @@ foreach my $file (@files) @@ -823,7 +837,7 @@ foreach my $file (@files)
823 show_ntests(); 837 show_ntests();
824 # ---------- 838 # ----------
825 $td->notify("--- fix-qdf Tests ---"); 839 $td->notify("--- fix-qdf Tests ---");
826 -$n_tests += 2; 840 +$n_tests += 4;
827 841
828 for (my $n = 1; $n <= 2; ++$n) 842 for (my $n = 1; $n <= 2; ++$n)
829 { 843 {
@@ -831,6 +845,11 @@ for (my $n = 1; $n &lt;= 2; ++$n) @@ -831,6 +845,11 @@ for (my $n = 1; $n &lt;= 2; ++$n)
831 {$td->COMMAND => "fix-qdf fix$n.qdf"}, 845 {$td->COMMAND => "fix-qdf fix$n.qdf"},
832 {$td->FILE => "fix$n.qdf.out", 846 {$td->FILE => "fix$n.qdf.out",
833 $td->EXIT_STATUS => 0}); 847 $td->EXIT_STATUS => 0});
  848 +
  849 + $td->runtest("identity fix-qdf $n",
  850 + {$td->COMMAND => "fix-qdf fix$n.qdf.out"},
  851 + {$td->FILE => "fix$n.qdf.out",
  852 + $td->EXIT_STATUS => 0});
834 } 853 }
835 854
836 show_ntests(); 855 show_ntests();