Commit a5d8783f6793de05381fe399d4d0025d480b2aa3

Authored by Jay Berkenbilt
1 parent a7e8b8c7

Improve qpdf --check

Fix exit status for case of errors without warnings, continue after
errors when possible, add test case for parsing a file with content
stream errors on some but not all pages.
ChangeLog
  1 +2013-01-25 Jay Berkenbilt <ejb@ql.org>
  2 +
  3 + * qpdf --check was exiting with status 0 in some rare cases even
  4 + when errors were found. It now always exits with one of the
  5 + document error codes (0 for success, 2 for errors, 3 or warnings).
  6 +
1 2013-01-24 Jay Berkenbilt <ejb@ql.org> 7 2013-01-24 Jay Berkenbilt <ejb@ql.org>
2 8
3 * qpdf --check now does syntactic checks all pages' content 9 * qpdf --check now does syntactic checks all pages' content
@@ -5,10 +11,6 @@ @@ -5,10 +11,6 @@
5 errors are still not checked, and there are no plans to add 11 errors are still not checked, and there are no plans to add
6 semantic checks. 12 semantic checks.
7 13
8 - * Bug fix: in versions 4.0.0 and 4.0.1, qpdf --check wasn't doing  
9 - as full of a check for linearized files as for non-linearized  
10 - files.  
11 -  
12 2013-01-22 Jay Berkenbilt <ejb@ql.org> 14 2013-01-22 Jay Berkenbilt <ejb@ql.org>
13 15
14 * Add QPDFObjectHandle::getTypeCode(). This method returns a 16 * Add QPDFObjectHandle::getTypeCode(). This method returns a
libqpdf/QPDFObjectHandle.cc
@@ -1012,10 +1012,10 @@ QPDFObjectHandle::parseInternal(PointerHolder&lt;InputSource&gt; input, @@ -1012,10 +1012,10 @@ QPDFObjectHandle::parseInternal(PointerHolder&lt;InputSource&gt; input,
1012 } 1012 }
1013 else if (! object.isInitialized()) 1013 else if (! object.isInitialized())
1014 { 1014 {
1015 - throw std::logic_error(  
1016 - "INTERNAL ERROR: uninitialized object (token = " +  
1017 - QUtil::int_to_string(token.getType()) +  
1018 - ", " + token.getValue() + ")"); 1015 + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
  1016 + object_description,
  1017 + input->getLastOffset(),
  1018 + "parse error while reading object");
1019 } 1019 }
1020 else 1020 else
1021 { 1021 {
qpdf/qpdf.cc
@@ -1428,7 +1428,11 @@ int main(int argc, char* argv[]) @@ -1428,7 +1428,11 @@ int main(int argc, char* argv[])
1428 } 1428 }
1429 if (check) 1429 if (check)
1430 { 1430 {
1431 - bool okay = false; 1431 + // Code below may set okay to false but not to true.
  1432 + // We assume okay until we prove otherwise but may
  1433 + // continue to perform additional checks after finding
  1434 + // errors.
  1435 + bool okay = true;
1432 std::cout << "checking " << infilename << std::endl; 1436 std::cout << "checking " << infilename << std::endl;
1433 try 1437 try
1434 { 1438 {
@@ -1444,8 +1448,11 @@ int main(int argc, char* argv[]) @@ -1444,8 +1448,11 @@ int main(int argc, char* argv[])
1444 if (pdf.isLinearized()) 1448 if (pdf.isLinearized())
1445 { 1449 {
1446 std::cout << "File is linearized\n"; 1450 std::cout << "File is linearized\n";
1447 - okay = pdf.checkLinearization();  
1448 - // any errors are reported by checkLinearization(). 1451 + if (! pdf.checkLinearization())
  1452 + {
  1453 + // any errors are reported by checkLinearization()
  1454 + okay = false;
  1455 + }
1449 } 1456 }
1450 else 1457 else
1451 { 1458 {
@@ -1453,8 +1460,8 @@ int main(int argc, char* argv[]) @@ -1453,8 +1460,8 @@ int main(int argc, char* argv[])
1453 } 1460 }
1454 1461
1455 // Write the file no nowhere, uncompressing 1462 // Write the file no nowhere, uncompressing
1456 - // streams. This causes full file traversal  
1457 - // and decoding of all streams we can decode. 1463 + // streams. This causes full file traversal and
  1464 + // decoding of all streams we can decode.
1458 QPDFWriter w(pdf); 1465 QPDFWriter w(pdf);
1459 Pl_Discard discard; 1466 Pl_Discard discard;
1460 w.setOutputPipeline(&discard); 1467 w.setOutputPipeline(&discard);
@@ -1464,19 +1471,30 @@ int main(int argc, char* argv[]) @@ -1464,19 +1471,30 @@ int main(int argc, char* argv[])
1464 // Parse all content streams 1471 // Parse all content streams
1465 std::vector<QPDFObjectHandle> pages = pdf.getAllPages(); 1472 std::vector<QPDFObjectHandle> pages = pdf.getAllPages();
1466 DiscardContents discard_contents; 1473 DiscardContents discard_contents;
  1474 + int pageno = 0;
1467 for (std::vector<QPDFObjectHandle>::iterator iter = 1475 for (std::vector<QPDFObjectHandle>::iterator iter =
1468 pages.begin(); 1476 pages.begin();
1469 iter != pages.end(); ++iter) 1477 iter != pages.end(); ++iter)
1470 { 1478 {
1471 - QPDFObjectHandle::parseContentStream(  
1472 - (*iter).getKey("/Contents"), &discard_contents); 1479 + ++pageno;
  1480 + try
  1481 + {
  1482 + QPDFObjectHandle::parseContentStream(
  1483 + (*iter).getKey("/Contents"),
  1484 + &discard_contents);
  1485 + }
  1486 + catch (QPDFExc& e)
  1487 + {
  1488 + okay = false;
  1489 + std::cout << "page " << pageno << ": "
  1490 + << e.what() << std::endl;
  1491 + }
1473 } 1492 }
1474 -  
1475 - okay = true;  
1476 } 1493 }
1477 catch (std::exception& e) 1494 catch (std::exception& e)
1478 { 1495 {
1479 std::cout << e.what() << std::endl; 1496 std::cout << e.what() << std::endl;
  1497 + okay = false;
1480 } 1498 }
1481 if (okay) 1499 if (okay)
1482 { 1500 {
@@ -1493,6 +1511,10 @@ int main(int argc, char* argv[]) @@ -1493,6 +1511,10 @@ int main(int argc, char* argv[])
1493 << std::endl; 1511 << std::endl;
1494 } 1512 }
1495 } 1513 }
  1514 + else
  1515 + {
  1516 + exit(EXIT_ERROR);
  1517 + }
1496 } 1518 }
1497 } 1519 }
1498 else 1520 else
qpdf/qtest/qpdf.test
@@ -199,7 +199,7 @@ $td-&gt;runtest(&quot;remove page we don&#39;t have&quot;, @@ -199,7 +199,7 @@ $td-&gt;runtest(&quot;remove page we don&#39;t have&quot;,
199 show_ntests(); 199 show_ntests();
200 # ---------- 200 # ----------
201 $td->notify("--- Miscellaneous Tests ---"); 201 $td->notify("--- Miscellaneous Tests ---");
202 -$n_tests += 59; 202 +$n_tests += 60;
203 203
204 $td->runtest("qpdf version", 204 $td->runtest("qpdf version",
205 {$td->COMMAND => "qpdf --version"}, 205 {$td->COMMAND => "qpdf --version"},
@@ -435,7 +435,7 @@ $td-&gt;runtest(&quot;EOF terminating literal tokens&quot;, @@ -435,7 +435,7 @@ $td-&gt;runtest(&quot;EOF terminating literal tokens&quot;,
435 $td->NORMALIZE_NEWLINES); 435 $td->NORMALIZE_NEWLINES);
436 $td->runtest("EOF reading token", 436 $td->runtest("EOF reading token",
437 {$td->COMMAND => "qpdf --check eof-reading-token.pdf"}, 437 {$td->COMMAND => "qpdf --check eof-reading-token.pdf"},
438 - {$td->FILE => "eof-reading-token.out", $td->EXIT_STATUS => 0}, 438 + {$td->FILE => "eof-reading-token.out", $td->EXIT_STATUS => 2},
439 $td->NORMALIZE_NEWLINES); 439 $td->NORMALIZE_NEWLINES);
440 $td->runtest("extra header text", 440 $td->runtest("extra header text",
441 {$td->COMMAND => "test_driver 32 minimal.pdf"}, 441 {$td->COMMAND => "test_driver 32 minimal.pdf"},
@@ -478,6 +478,11 @@ $td-&gt;runtest(&quot;tokenize content streams&quot;, @@ -478,6 +478,11 @@ $td-&gt;runtest(&quot;tokenize content streams&quot;,
478 {$td->FILE => "tokenize-content-streams.out", 478 {$td->FILE => "tokenize-content-streams.out",
479 $td->EXIT_STATUS => 0}, 479 $td->EXIT_STATUS => 0},
480 $td->NORMALIZE_NEWLINES); 480 $td->NORMALIZE_NEWLINES);
  481 +$td->runtest("content stream errors",
  482 + {$td->COMMAND => "qpdf --check content-stream-errors.pdf"},
  483 + {$td->FILE => "content-stream-errors.out",
  484 + $td->EXIT_STATUS => 2},
  485 + $td->NORMALIZE_NEWLINES);
481 486
482 show_ntests(); 487 show_ntests();
483 # ---------- 488 # ----------
qpdf/qtest/qpdf/content-stream-errors.out 0 → 100644
  1 +checking content-stream-errors.pdf
  2 +PDF Version: 1.3
  3 +File is not encrypted
  4 +File is not linearized
  5 +page 1: content stream object 7 0 (content, file position 52): parse error while reading object
  6 +page 3: content stream object 15 0 (stream data, file position 117): EOF found while reading inline image
  7 +page 4: content stream object 19 0 (content, file position 53): parse error while reading object
qpdf/qtest/qpdf/content-stream-errors.pdf 0 → 100644
No preview for this file type