From c02cb9a7203d6b30557b317153aabd974af4d637 Mon Sep 17 00:00:00 2001 From: m-holger Date: Tue, 20 Aug 2024 12:44:19 +0100 Subject: [PATCH] Fix QPDF::recoverStreamLength --- .idea/codeStyles/Project.xml | 2 +- libqpdf/QPDF.cc | 25 +++++++++++++------------ qpdf/qtest/error-condition.test | 3 ++- qpdf/qtest/parsing.test | 2 +- qpdf/qtest/qpdf/bad-parse.qdf | 102 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ qpdf/qtest/qpdf/bad39-recover.out | 13 +++++++++++++ qpdf/qtest/qpdf/bad39.out | 7 +++++++ qpdf/qtest/qpdf/bad39.pdf | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ qpdf/qtest/qpdf/bad39.qdf | 102 ------------------------------------------------------------------------------------------------------ qpdf/qtest/qpdf/parse-object.out | 6 +++--- 10 files changed, 223 insertions(+), 120 deletions(-) create mode 100644 qpdf/qtest/qpdf/bad-parse.qdf create mode 100644 qpdf/qtest/qpdf/bad39-recover.out create mode 100644 qpdf/qtest/qpdf/bad39.out create mode 100644 qpdf/qtest/qpdf/bad39.pdf delete mode 100644 qpdf/qtest/qpdf/bad39.qdf diff --git a/.idea/codeStyles/Project.xml b/.idea/codeStyles/Project.xml index cc5fe12..4c77ebf 100644 --- a/.idea/codeStyles/Project.xml +++ b/.idea/codeStyles/Project.xml @@ -3,7 +3,7 @@ - + diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 258d4ee..69b0e62 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -1649,27 +1649,28 @@ QPDF::recoverStreamLength( } if (length) { - qpdf_offset_t this_obj_offset = 0; - QPDFObjGen this_og; + auto end = stream_offset + toO(length); + qpdf_offset_t found_offset = 0; + QPDFObjGen found_og; // Make sure this is inside this object - for (auto const& iter: m->xref_table) { - QPDFXRefEntry const& entry = iter.second; + for (auto const& [current_og, entry]: m->xref_table) { if (entry.getType() == 1) { qpdf_offset_t obj_offset = entry.getOffset(); - if ((obj_offset > stream_offset) && - ((this_obj_offset == 0) || (this_obj_offset > obj_offset))) { - this_obj_offset = obj_offset; - this_og = iter.first; + if (found_offset < obj_offset && obj_offset < end) { + found_offset = obj_offset; + found_og = current_og; } } } - if (this_obj_offset && (this_og == og)) { - // Well, we found endstream\nendobj within the space allowed for this object, so we're - // probably in good shape. - throw std::logic_error("unreachable success"); + if (!found_offset || found_og == og) { + // If we are trying to recover an XRef stream the xref table will not contain and + // won't contain any entries, therefore we cannot check the found length. Otherwise we + // found endstream\nendobj within the space allowed for this object, so we're probably + // in good shape. } else { QTC::TC("qpdf", "QPDF found wrong endstream in recovery"); + length = 0; } } diff --git a/qpdf/qtest/error-condition.test b/qpdf/qtest/error-condition.test index 097190e..52d0eb7 100644 --- a/qpdf/qtest/error-condition.test +++ b/qpdf/qtest/error-condition.test @@ -55,6 +55,7 @@ my @badfiles = ("not a PDF file", # 1 "bad dictionary key", # 36 "space before xref", # 37 "startxref to space then eof", # 38 + "stream lenth revocery overlapping", # 39 ); $n_tests += @badfiles + 8; @@ -65,7 +66,7 @@ $n_tests += @badfiles + 8; # have error conditions that used to be fatal but are now considered # non-fatal. my %badtest_overrides = (); -for(6, 12..15, 17, 18..32, 34..37) +for(6, 12..15, 17, 18..32, 34..37, 39) { $badtest_overrides{$_} = 0; } diff --git a/qpdf/qtest/parsing.test b/qpdf/qtest/parsing.test index bd1a7c6..5202342 100644 --- a/qpdf/qtest/parsing.test +++ b/qpdf/qtest/parsing.test @@ -17,7 +17,7 @@ my $td = new TestDriver('parsing'); my $n_tests = 17; $td->runtest("parse objects from string", - {$td->COMMAND => "test_driver 31 bad39.qdf"}, + {$td->COMMAND => "test_driver 31 bad-parse.qdf"}, {$td->FILE => "parse-object.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("EOF terminating literal tokens", diff --git a/qpdf/qtest/qpdf/bad-parse.qdf b/qpdf/qtest/qpdf/bad-parse.qdf new file mode 100644 index 0000000..1da316e --- /dev/null +++ b/qpdf/qtest/qpdf/bad-parse.qdf @@ -0,0 +1,102 @@ +%PDF-1.3 +%¿÷¢þ +%QDF-1.0 + +%% Original object ID: 1 0 +1 0 obj +<< + /Pages 2 0 R + /Type /Catalog +>> +endobj + +%% Original object ID: 2 0 +2 0 obj +<< + /Count 1 + /Kids [ + 3 0 R + ] + /Type /Pages +>> +endobj + +%% Page 1 +%% Original object ID: 3 0 +3 0 obj +<< + /Contents 4 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 2 0 R + /Resources << + /Font << + /F1 6 0 R + >> + /ProcSet 7 0 R + >> + /Type /Page +>> +endobj + +%% Contents for page 1 +%% Original object ID: 4 0 +4 0 obj +<< + /Length 5 0 R +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +5 0 obj +44 +endobj + +%% Original object ID: 6 0 +6 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +%% Original object ID: 5 0 +7 0 obj +[ + /PDF + /Text +] +endobj + +xref +0 8 +0000000000 65535 f +0000000052 00000 n +0000000133 00000 n +0000000242 00000 n +0000000484 00000 n +0000000583 00000 n +0000000629 00000 n +0000001113 00000 n +trailer << + /Root 1 0 R + /Size 8 + /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] +>> +startxref +809 +%%EOF +7 0 obj diff --git a/qpdf/qtest/qpdf/bad39-recover.out b/qpdf/qtest/qpdf/bad39-recover.out new file mode 100644 index 0000000..bf5df4b --- /dev/null +++ b/qpdf/qtest/qpdf/bad39-recover.out @@ -0,0 +1,13 @@ +WARNING: bad39.pdf (object 4 0, offset 385): expected endstream +WARNING: bad39.pdf (object 4 0, offset 341): attempting to recover stream length +WARNING: bad39.pdf (object 4 0, offset 341): unable to recover stream data; treating stream as empty +/QTest is indirect and has type stream (10) +/QTest is a stream. Dictionary: << /Length 44 >> +Raw stream data: + +Uncompressed stream data: + +End of stream data +unparse: 4 0 R +unparseResolved: 4 0 R +test 1 done diff --git a/qpdf/qtest/qpdf/bad39.out b/qpdf/qtest/qpdf/bad39.out new file mode 100644 index 0000000..435797e --- /dev/null +++ b/qpdf/qtest/qpdf/bad39.out @@ -0,0 +1,7 @@ +WARNING: bad39.pdf (object 4 0, offset 385): expected endstream +/QTest is implicit +/QTest is indirect and has type null (2) +/QTest is null +unparse: 4 0 R +unparseResolved: null +test 0 done diff --git a/qpdf/qtest/qpdf/bad39.pdf b/qpdf/qtest/qpdf/bad39.pdf new file mode 100644 index 0000000..d832197 --- /dev/null +++ b/qpdf/qtest/qpdf/bad39.pdf @@ -0,0 +1,81 @@ +%PDF-1.3 +1 0 obj +<< + /Type /Catalog + /Pages 2 0 R +>> +endobj + +2 0 obj +<< + /Type /Pages + /Kids [ + 3 0 R + ] + /Count 1 +>> +endobj + +3 0 obj +<< + /Type /Page + /Parent 2 0 R + /MediaBox [0 0 612 792] + /Contents 4 0 R + /Resources << + /ProcSet 5 0 R + /Font << + /F1 6 0 R + >> + >> +>> +endobj + +4 0 obj +<< + /Length 44 +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +enxstream +enxobj + +5 0 obj +[ + /PDF + /Text +] +endobj + +6 0 obj +<< + /Type /Font + /Subtype /Type1 + /Name /F1 + /BaseFont /Helvetica + /Encoding /Winng +>> +endstream +endobj + +xref +0 7 +0000000000 65535 f +0000000009 00000 n +0000000063 00000 n +0000000135 00000 n +0000000307 00000 n +0000000403 00000 n +0000000438 00000 n +trailer << + /Size 7 + /Root 1 0 R + /QTest 4 0 R +>> +startxref +556 +%%EOF diff --git a/qpdf/qtest/qpdf/bad39.qdf b/qpdf/qtest/qpdf/bad39.qdf deleted file mode 100644 index 1da316e..0000000 --- a/qpdf/qtest/qpdf/bad39.qdf +++ /dev/null @@ -1,102 +0,0 @@ -%PDF-1.3 -%¿÷¢þ -%QDF-1.0 - -%% Original object ID: 1 0 -1 0 obj -<< - /Pages 2 0 R - /Type /Catalog ->> -endobj - -%% Original object ID: 2 0 -2 0 obj -<< - /Count 1 - /Kids [ - 3 0 R - ] - /Type /Pages ->> -endobj - -%% Page 1 -%% Original object ID: 3 0 -3 0 obj -<< - /Contents 4 0 R - /MediaBox [ - 0 - 0 - 612 - 792 - ] - /Parent 2 0 R - /Resources << - /Font << - /F1 6 0 R - >> - /ProcSet 7 0 R - >> - /Type /Page ->> -endobj - -%% Contents for page 1 -%% Original object ID: 4 0 -4 0 obj -<< - /Length 5 0 R ->> -stream -BT - /F1 24 Tf - 72 720 Td - (Potato) Tj -ET -endstream -endobj - -5 0 obj -44 -endobj - -%% Original object ID: 6 0 -6 0 obj -<< - /BaseFont /Helvetica - /Encoding /WinAnsiEncoding - /Name /F1 - /Subtype /Type1 - /Type /Font ->> -endobj - -%% Original object ID: 5 0 -7 0 obj -[ - /PDF - /Text -] -endobj - -xref -0 8 -0000000000 65535 f -0000000052 00000 n -0000000133 00000 n -0000000242 00000 n -0000000484 00000 n -0000000583 00000 n -0000000629 00000 n -0000001113 00000 n -trailer << - /Root 1 0 R - /Size 8 - /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] ->> -startxref -809 -%%EOF -7 0 obj diff --git a/qpdf/qtest/qpdf/parse-object.out b/qpdf/qtest/qpdf/parse-object.out index de7b42e..47843fd 100644 --- a/qpdf/qtest/qpdf/parse-object.out +++ b/qpdf/qtest/qpdf/parse-object.out @@ -5,7 +5,7 @@ WARNING: parsed object (offset 9): unknown token while reading object; treating WARNING: parsed object: treating unexpected brace token as null WARNING: parsed object: treating unexpected brace token as null WARNING: parsed object: unexpected dictionary close token -WARNING: bad39.qdf (object 7 0, offset 1121): unexpected EOF -WARNING: bad39.qdf (object 7 0, offset 1121): expected endobj -WARNING: bad39.qdf (object 7 0, offset 1121): EOF after endobj +WARNING: bad-parse.qdf (object 7 0, offset 1121): unexpected EOF +WARNING: bad-parse.qdf (object 7 0, offset 1121): expected endobj +WARNING: bad-parse.qdf (object 7 0, offset 1121): EOF after endobj test 31 done -- libgit2 0.21.4