Commit a5d8783f6793de05381fe399d4d0025d480b2aa3
1 parent
a7e8b8c7
Improve qpdf --check
Fix exit status for case of errors without warnings, continue after errors when possible, add test case for parsing a file with content stream errors on some but not all pages.
Showing
6 changed files
with
55 additions
and
19 deletions
ChangeLog
| 1 | +2013-01-25 Jay Berkenbilt <ejb@ql.org> | |
| 2 | + | |
| 3 | + * qpdf --check was exiting with status 0 in some rare cases even | |
| 4 | + when errors were found. It now always exits with one of the | |
| 5 | + document error codes (0 for success, 2 for errors, 3 or warnings). | |
| 6 | + | |
| 1 | 7 | 2013-01-24 Jay Berkenbilt <ejb@ql.org> |
| 2 | 8 | |
| 3 | 9 | * qpdf --check now does syntactic checks all pages' content |
| ... | ... | @@ -5,10 +11,6 @@ |
| 5 | 11 | errors are still not checked, and there are no plans to add |
| 6 | 12 | semantic checks. |
| 7 | 13 | |
| 8 | - * Bug fix: in versions 4.0.0 and 4.0.1, qpdf --check wasn't doing | |
| 9 | - as full of a check for linearized files as for non-linearized | |
| 10 | - files. | |
| 11 | - | |
| 12 | 14 | 2013-01-22 Jay Berkenbilt <ejb@ql.org> |
| 13 | 15 | |
| 14 | 16 | * Add QPDFObjectHandle::getTypeCode(). This method returns a | ... | ... |
libqpdf/QPDFObjectHandle.cc
| ... | ... | @@ -1012,10 +1012,10 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input, |
| 1012 | 1012 | } |
| 1013 | 1013 | else if (! object.isInitialized()) |
| 1014 | 1014 | { |
| 1015 | - throw std::logic_error( | |
| 1016 | - "INTERNAL ERROR: uninitialized object (token = " + | |
| 1017 | - QUtil::int_to_string(token.getType()) + | |
| 1018 | - ", " + token.getValue() + ")"); | |
| 1015 | + throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), | |
| 1016 | + object_description, | |
| 1017 | + input->getLastOffset(), | |
| 1018 | + "parse error while reading object"); | |
| 1019 | 1019 | } |
| 1020 | 1020 | else |
| 1021 | 1021 | { | ... | ... |
qpdf/qpdf.cc
| ... | ... | @@ -1428,7 +1428,11 @@ int main(int argc, char* argv[]) |
| 1428 | 1428 | } |
| 1429 | 1429 | if (check) |
| 1430 | 1430 | { |
| 1431 | - bool okay = false; | |
| 1431 | + // Code below may set okay to false but not to true. | |
| 1432 | + // We assume okay until we prove otherwise but may | |
| 1433 | + // continue to perform additional checks after finding | |
| 1434 | + // errors. | |
| 1435 | + bool okay = true; | |
| 1432 | 1436 | std::cout << "checking " << infilename << std::endl; |
| 1433 | 1437 | try |
| 1434 | 1438 | { |
| ... | ... | @@ -1444,8 +1448,11 @@ int main(int argc, char* argv[]) |
| 1444 | 1448 | if (pdf.isLinearized()) |
| 1445 | 1449 | { |
| 1446 | 1450 | std::cout << "File is linearized\n"; |
| 1447 | - okay = pdf.checkLinearization(); | |
| 1448 | - // any errors are reported by checkLinearization(). | |
| 1451 | + if (! pdf.checkLinearization()) | |
| 1452 | + { | |
| 1453 | + // any errors are reported by checkLinearization() | |
| 1454 | + okay = false; | |
| 1455 | + } | |
| 1449 | 1456 | } |
| 1450 | 1457 | else |
| 1451 | 1458 | { |
| ... | ... | @@ -1453,8 +1460,8 @@ int main(int argc, char* argv[]) |
| 1453 | 1460 | } |
| 1454 | 1461 | |
| 1455 | 1462 | // Write the file no nowhere, uncompressing |
| 1456 | - // streams. This causes full file traversal | |
| 1457 | - // and decoding of all streams we can decode. | |
| 1463 | + // streams. This causes full file traversal and | |
| 1464 | + // decoding of all streams we can decode. | |
| 1458 | 1465 | QPDFWriter w(pdf); |
| 1459 | 1466 | Pl_Discard discard; |
| 1460 | 1467 | w.setOutputPipeline(&discard); |
| ... | ... | @@ -1464,19 +1471,30 @@ int main(int argc, char* argv[]) |
| 1464 | 1471 | // Parse all content streams |
| 1465 | 1472 | std::vector<QPDFObjectHandle> pages = pdf.getAllPages(); |
| 1466 | 1473 | DiscardContents discard_contents; |
| 1474 | + int pageno = 0; | |
| 1467 | 1475 | for (std::vector<QPDFObjectHandle>::iterator iter = |
| 1468 | 1476 | pages.begin(); |
| 1469 | 1477 | iter != pages.end(); ++iter) |
| 1470 | 1478 | { |
| 1471 | - QPDFObjectHandle::parseContentStream( | |
| 1472 | - (*iter).getKey("/Contents"), &discard_contents); | |
| 1479 | + ++pageno; | |
| 1480 | + try | |
| 1481 | + { | |
| 1482 | + QPDFObjectHandle::parseContentStream( | |
| 1483 | + (*iter).getKey("/Contents"), | |
| 1484 | + &discard_contents); | |
| 1485 | + } | |
| 1486 | + catch (QPDFExc& e) | |
| 1487 | + { | |
| 1488 | + okay = false; | |
| 1489 | + std::cout << "page " << pageno << ": " | |
| 1490 | + << e.what() << std::endl; | |
| 1491 | + } | |
| 1473 | 1492 | } |
| 1474 | - | |
| 1475 | - okay = true; | |
| 1476 | 1493 | } |
| 1477 | 1494 | catch (std::exception& e) |
| 1478 | 1495 | { |
| 1479 | 1496 | std::cout << e.what() << std::endl; |
| 1497 | + okay = false; | |
| 1480 | 1498 | } |
| 1481 | 1499 | if (okay) |
| 1482 | 1500 | { |
| ... | ... | @@ -1493,6 +1511,10 @@ int main(int argc, char* argv[]) |
| 1493 | 1511 | << std::endl; |
| 1494 | 1512 | } |
| 1495 | 1513 | } |
| 1514 | + else | |
| 1515 | + { | |
| 1516 | + exit(EXIT_ERROR); | |
| 1517 | + } | |
| 1496 | 1518 | } |
| 1497 | 1519 | } |
| 1498 | 1520 | else | ... | ... |
qpdf/qtest/qpdf.test
| ... | ... | @@ -199,7 +199,7 @@ $td->runtest("remove page we don't have", |
| 199 | 199 | show_ntests(); |
| 200 | 200 | # ---------- |
| 201 | 201 | $td->notify("--- Miscellaneous Tests ---"); |
| 202 | -$n_tests += 59; | |
| 202 | +$n_tests += 60; | |
| 203 | 203 | |
| 204 | 204 | $td->runtest("qpdf version", |
| 205 | 205 | {$td->COMMAND => "qpdf --version"}, |
| ... | ... | @@ -435,7 +435,7 @@ $td->runtest("EOF terminating literal tokens", |
| 435 | 435 | $td->NORMALIZE_NEWLINES); |
| 436 | 436 | $td->runtest("EOF reading token", |
| 437 | 437 | {$td->COMMAND => "qpdf --check eof-reading-token.pdf"}, |
| 438 | - {$td->FILE => "eof-reading-token.out", $td->EXIT_STATUS => 0}, | |
| 438 | + {$td->FILE => "eof-reading-token.out", $td->EXIT_STATUS => 2}, | |
| 439 | 439 | $td->NORMALIZE_NEWLINES); |
| 440 | 440 | $td->runtest("extra header text", |
| 441 | 441 | {$td->COMMAND => "test_driver 32 minimal.pdf"}, |
| ... | ... | @@ -478,6 +478,11 @@ $td->runtest("tokenize content streams", |
| 478 | 478 | {$td->FILE => "tokenize-content-streams.out", |
| 479 | 479 | $td->EXIT_STATUS => 0}, |
| 480 | 480 | $td->NORMALIZE_NEWLINES); |
| 481 | +$td->runtest("content stream errors", | |
| 482 | + {$td->COMMAND => "qpdf --check content-stream-errors.pdf"}, | |
| 483 | + {$td->FILE => "content-stream-errors.out", | |
| 484 | + $td->EXIT_STATUS => 2}, | |
| 485 | + $td->NORMALIZE_NEWLINES); | |
| 481 | 486 | |
| 482 | 487 | show_ntests(); |
| 483 | 488 | # ---------- | ... | ... |
qpdf/qtest/qpdf/content-stream-errors.out
0 → 100644
| 1 | +checking content-stream-errors.pdf | |
| 2 | +PDF Version: 1.3 | |
| 3 | +File is not encrypted | |
| 4 | +File is not linearized | |
| 5 | +page 1: content stream object 7 0 (content, file position 52): parse error while reading object | |
| 6 | +page 3: content stream object 15 0 (stream data, file position 117): EOF found while reading inline image | |
| 7 | +page 4: content stream object 19 0 (content, file position 53): parse error while reading object | ... | ... |
qpdf/qtest/qpdf/content-stream-errors.pdf
0 → 100644
No preview for this file type