Commit 599daddb47fc0340e48b02f7ba00ef86bfef8c45

Authored by Jay Berkenbilt
1 parent 9210dd46

decode streams on check, always exit abnormally when warnings are detected

git-svn-id: svn+q:///qpdf/trunk@660 71b93d88-0707-0410-a8cf-f5a4172ac649
ChangeLog
  1 +2009-03-08 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * qpdf/fix-qdf (write_ostream): Adjust offsets while writing
  4 + object streams to account for changes in the length of the
  5 + dictionary and offset tables.
  6 +
  7 + * qpdf/qpdf.cc (main): In check mode, in addition to checking
  8 + structure of file, attempt to decode all stream data.
  9 +
  10 + * libqpdf/QPDFWriter.cc (QPDFWriter::writeObject): In QDF mode,
  11 + write a comment to the QDF file that indicates the object ID from
  12 + the original file.
  13 +
  14 + * libqpdf/QPDF.cc (QPDF::pipeStreamData): Issue a warning instead
  15 + of failing if there is a problem found while decoding stream.
  16 +
  17 + * qpdf/qpdf.cc: Exit with a status of 3 if warnings were found
  18 + regardless of what mode we're in.
  19 +
1 20 2009-02-21 Jay Berkenbilt <ejb@ql.org>
2 21  
3 22 * 2.0.4: release
... ...
include/qpdf/QPDF.hh
... ... @@ -160,6 +160,10 @@ class QPDF
160 160 // dictionaries) with direct objects.
161 161 void flattenScalarReferences();
162 162  
  163 + // Decode all streams, discarding the output. Used to check
  164 + // correctness of stream encoding.
  165 + void decodeStreams();
  166 +
163 167 // For QPDFWriter:
164 168  
165 169 // Remove /ID, /Encrypt, and /Prev keys from the trailer
... ...
libqpdf/QPDF.cc
... ... @@ -10,6 +10,7 @@
10 10 #include <qpdf/QUtil.hh>
11 11 #include <qpdf/PCRE.hh>
12 12 #include <qpdf/Pipeline.hh>
  13 +#include <qpdf/Pl_Discard.hh>
13 14  
14 15 #include <qpdf/QPDFExc.hh>
15 16 #include <qpdf/QPDF_Null.hh>
... ... @@ -1810,23 +1811,51 @@ QPDF::pipeStreamData(int objid, int generation,
1810 1811 }
1811 1812 }
1812 1813  
1813   - this->file.seek(offset, SEEK_SET);
1814   - char buf[10240];
1815   - while (length > 0)
  1814 + try
1816 1815 {
1817   - size_t to_read = (sizeof(buf) < length ? sizeof(buf) : length);
1818   - size_t len = this->file.read(buf, to_read);
1819   - if (len == 0)
  1816 + this->file.seek(offset, SEEK_SET);
  1817 + char buf[10240];
  1818 + while (length > 0)
1820 1819 {
1821   - throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
1822   - "unexpected EOF reading stream data");
  1820 + size_t to_read = (sizeof(buf) < length ? sizeof(buf) : length);
  1821 + size_t len = this->file.read(buf, to_read);
  1822 + if (len == 0)
  1823 + {
  1824 + throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
  1825 + "unexpected EOF reading stream data");
  1826 + }
  1827 + length -= len;
  1828 + pipeline->write((unsigned char*)buf, len);
1823 1829 }
1824   - length -= len;
1825   - pipeline->write((unsigned char*)buf, len);
  1830 + }
  1831 + catch (QEXC::General& e)
  1832 + {
  1833 + QTC::TC("qpdf", "QPDF decoding error warning");
  1834 + warn(QPDFExc(this->file.getName(), this->file.getLastOffset(),
  1835 + "error decoding stream data for object " +
  1836 + QUtil::int_to_string(objid) + " " +
  1837 + QUtil::int_to_string(generation) + ": " + e.unparse()));
1826 1838 }
1827 1839 pipeline->finish();
1828 1840 }
1829 1841  
  1842 +void
  1843 +QPDF::decodeStreams()
  1844 +{
  1845 + for (std::map<ObjGen, QPDFXRefEntry>::iterator iter =
  1846 + this->xref_table.begin();
  1847 + iter != this->xref_table.end(); ++iter)
  1848 + {
  1849 + ObjGen const& og = (*iter).first;
  1850 + QPDFObjectHandle obj = getObjectByID(og.obj, og.gen);
  1851 + if (obj.isStream())
  1852 + {
  1853 + Pl_Discard pl;
  1854 + obj.pipeStreamData(&pl, true, false, false);
  1855 + }
  1856 + }
  1857 +}
  1858 +
1830 1859 std::vector<QPDFObjectHandle> const&
1831 1860 QPDF::getAllPages()
1832 1861 {
... ...
manual/qpdf-manual.xml
... ... @@ -738,15 +738,15 @@ make
738 738 <term><option>-check</option></term>
739 739 <listitem>
740 740 <para>
741   - Checks file structure and well as encryption and
742   - linearization. A file for which <option>--check</option>
743   - reports no errors may still have errors in stream data but
744   - should otherwise be otherwise structurally sound. If
745   - <option>--check</option> any errors, qpdf will exit with a
746   - status of 2. There are some recoverable conditions that
747   - <option>--check</option> detects. These are issued as
748   - warnings instead of errors. If qpdf finds no errors but finds
749   - warnings, it will exit with a status of 3 (as of
  741 + Checks file structure and well as encryption, linearization,
  742 + and encoding of stream data. A file for which
  743 + <option>--check</option> reports no errors may still have
  744 + errors in stream data content but should otherwise be
  745 + structurally sound. If <option>--check</option> any errors,
  746 + qpdf will exit with a status of 2. There are some recoverable
  747 + conditions that <option>--check</option> detects. These are
  748 + issued as warnings instead of errors. If qpdf finds no errors
  749 + but finds warnings, it will exit with a status of 3 (as of
750 750 version&nbsp;2.0.4).
751 751 </para>
752 752 </listitem>
... ... @@ -861,6 +861,12 @@ make
861 861 special comments that make them easy to find.
862 862 </para>
863 863 </listitem>
  864 + <listitem>
  865 + <para>
  866 + Comments precede each object indicating the object number of the
  867 + corresponding object in the original file.
  868 + </para>
  869 + </listitem>
864 870 </itemizedlist>
865 871 </para>
866 872 <para>
... ...
qpdf/qpdf.cc
... ... @@ -12,6 +12,9 @@
12 12  
13 13 #include <qpdf/QPDFWriter.hh>
14 14  
  15 +static int const EXIT_ERROR = 2;
  16 +static int const EXIT_WARNING = 3;
  17 +
15 18 static char const* whoami = 0;
16 19  
17 20 // Note: let's not be too noisy about documenting the fact that this
... ... @@ -158,7 +161,7 @@ void usage(std::string const&amp; msg)
158 161 << "Usage: " << whoami << " [options] infile outfile" << std::endl
159 162 << "For detailed help, run " << whoami << " --help" << std::endl
160 163 << std::endl;
161   - exit(2);
  164 + exit(EXIT_ERROR);
162 165 }
163 166  
164 167 static void show_encryption(QPDF& pdf)
... ... @@ -752,7 +755,7 @@ int main(int argc, char* argv[])
752 755 }
753 756 else
754 757 {
755   - exit(2);
  758 + exit(EXIT_ERROR);
756 759 }
757 760 }
758 761 if (show_linearization)
... ... @@ -777,7 +780,7 @@ int main(int argc, char* argv[])
777 780 QTC::TC("qpdf", "unable to filter");
778 781 std::cerr << "Unable to filter stream data."
779 782 << std::endl;
780   - exit(2);
  783 + exit(EXIT_ERROR);
781 784 }
782 785 else
783 786 {
... ... @@ -869,6 +872,8 @@ int main(int argc, char* argv[])
869 872 // traversal of file, so any structural errors
870 873 // would be exposed.
871 874 pdf.flattenScalarReferences();
  875 + // Also explicitly decode all streams.
  876 + pdf.decodeStreams();
872 877 okay = true;
873 878 }
874 879 }
... ... @@ -880,8 +885,7 @@ int main(int argc, char* argv[])
880 885 {
881 886 if (! pdf.getWarnings().empty())
882 887 {
883   - // special exit status for warnings without errors
884   - exit(3);
  888 + exit(EXIT_WARNING);
885 889 }
886 890 else
887 891 {
... ... @@ -946,11 +950,15 @@ int main(int argc, char* argv[])
946 950 }
947 951 w.write();
948 952 }
  953 + if (! pdf.getWarnings().empty())
  954 + {
  955 + exit(EXIT_WARNING);
  956 + }
949 957 }
950 958 catch (std::exception& e)
951 959 {
952 960 std::cerr << e.what() << std::endl;
953   - exit(2);
  961 + exit(EXIT_ERROR);
954 962 }
955 963  
956 964 return 0;
... ...
qpdf/qpdf.testcov
... ... @@ -117,3 +117,4 @@ QPDF piping xref stream from encrypted file 0
117 117 unable to filter 0
118 118 QPDF_String non-trivial UTF-16 0
119 119 QPDF xref overwrite object 0
  120 +QPDF decoding error warning 0
... ...
qpdf/qtest/qpdf.test
... ... @@ -183,7 +183,7 @@ for (my $i = 1; $i &lt;= scalar(@badfiles); ++$i)
183 183 $td->runtest("recover heifer file",
184 184 {$td->COMMAND => "qpdf --static-id -qdf heifer.pdf a.pdf"},
185 185 {$td->FILE => "heifer.out",
186   - $td->EXIT_STATUS => 0},
  186 + $td->EXIT_STATUS => 3},
187 187 $td->NORMALIZE_NEWLINES);
188 188 $td->runtest("check output",
189 189 {$td->FILE => "a.pdf"},
... ... @@ -206,7 +206,7 @@ $td-&gt;runtest(&quot;damaged replaced page contents&quot;,
206 206 {$td->COMMAND => "qpdf --static-id -qdf" .
207 207 " append-page-content-damaged.pdf a.pdf"},
208 208 {$td->FILE => "append-page-content-damaged.out",
209   - $td->EXIT_STATUS => 0},
  209 + $td->EXIT_STATUS => 3},
210 210 $td->NORMALIZE_NEWLINES);
211 211 $td->runtest("check output",
212 212 {$td->FILE => "a.pdf"},
... ... @@ -282,7 +282,7 @@ check_pdf(&quot;no recompression&quot;,
282 282 show_ntests();
283 283 # ----------
284 284 $td->notify("--- Object Stream Tests ---");
285   -$n_tests += 36 * 4;
  285 +$n_tests += 36 * 6;
286 286 $n_compare_pdfs += 36;
287 287  
288 288 for (my $n = 16; $n <= 19; ++$n)
... ... @@ -294,7 +294,7 @@ for (my $n = 16; $n &lt;= 19; ++$n)
294 294 {
295 295 foreach my $qdf ('-qdf', '', '-encrypt "" x 128 --')
296 296 {
297   - # 4 tests + 1 compare_pdfs
  297 + # 6 tests + 1 compare_pdfs
298 298 $td->runtest("object stream mode",
299 299 {$td->COMMAND =>
300 300 "qpdf --static-id $flags $qdf $in a.pdf"},
... ... @@ -316,6 +316,12 @@ for (my $n = 16; $n &lt;= 19; ++$n)
316 316 $td->runtest("compare files",
317 317 {$td->FILE => "a.qdf"},
318 318 {$td->FILE => "b.qdf"});
  319 + $td->runtest("fix-qdf identity check",
  320 + {$td->COMMAND => "fix-qdf a.qdf >| b.pdf"},
  321 + {$td->STRING => "", $td->EXIT_STATUS => 0});
  322 + $td->runtest("compare files",
  323 + {$td->FILE => "a.qdf"},
  324 + {$td->FILE => "b.qdf"});
319 325 }
320 326 }
321 327 flush_tiff_cache();
... ... @@ -324,12 +330,14 @@ for (my $n = 16; $n &lt;= 19; ++$n)
324 330 show_ntests();
325 331 # ----------
326 332 $td->notify("--- Specific File Tests ---");
327   -$n_tests += 1;
  333 +$n_tests += 2;
  334 +$n_compare_pdfs += 1;
328 335  
329 336 # Special PDF files that caused problems at some point
330 337  
331 338 # This file is a PDF 1.1 file with /# as a name and with
332   -# inconsistencies in its free table.
  339 +# inconsistencies in its free table. It also has LZW streams that
  340 +# happen to test boundary conditions in the LZW decoder.
333 341 $td->runtest("old and complex",
334 342 {$td->COMMAND => "qpdf --check old-and-complex.pdf"},
335 343 {$td->STRING => +("checking old-and-complex.pdf\n" .
... ... @@ -339,6 +347,12 @@ $td-&gt;runtest(&quot;old and complex&quot;,
339 347 $td->EXIT_STATUS => 0},
340 348 $td->NORMALIZE_NEWLINES);
341 349  
  350 +$td->runtest("convert to qdf",
  351 + {$td->COMMAND => "qpdf --qdf old-and-complex.pdf a.qdf"},
  352 + {$td->STRING => "", $td->EXIT_STATUS => 0});
  353 +
  354 +compare_pdfs("old-and-complex.pdf", "a.qdf");
  355 +
342 356 show_ntests();
343 357 # ----------
344 358 $td->notify("--- Mutability Tests ---");
... ... @@ -823,7 +837,7 @@ foreach my $file (@files)
823 837 show_ntests();
824 838 # ----------
825 839 $td->notify("--- fix-qdf Tests ---");
826   -$n_tests += 2;
  840 +$n_tests += 4;
827 841  
828 842 for (my $n = 1; $n <= 2; ++$n)
829 843 {
... ... @@ -831,6 +845,11 @@ for (my $n = 1; $n &lt;= 2; ++$n)
831 845 {$td->COMMAND => "fix-qdf fix$n.qdf"},
832 846 {$td->FILE => "fix$n.qdf.out",
833 847 $td->EXIT_STATUS => 0});
  848 +
  849 + $td->runtest("identity fix-qdf $n",
  850 + {$td->COMMAND => "fix-qdf fix$n.qdf.out"},
  851 + {$td->FILE => "fix$n.qdf.out",
  852 + $td->EXIT_STATUS => 0});
834 853 }
835 854  
836 855 show_ntests();
... ...