Commit 885b8781cccdc9d4402af58176e826a354b5ef7a
1 parent
570db9b6
Allow --check to coexist with and precede other operations (fixes #42)
Showing
6 changed files
with
243 additions
and
93 deletions
ChangeLog
| 1 | 2017-07-29 Jay Berkenbilt <ejb@ql.org> | 1 | 2017-07-29 Jay Berkenbilt <ejb@ql.org> |
| 2 | 2 | ||
| 3 | + * When passing multiple inspection arguments, run --check first, | ||
| 4 | + and defer exit until after all the checks have been run. This | ||
| 5 | + makes it possible to force operations such as --show-xref to be | ||
| 6 | + delayed until after recovery attempts have been made. For example, | ||
| 7 | + if you have a file with a syntactically valid xref table that has | ||
| 8 | + some offsets that are incorrect, running qpdf --check --show-xref | ||
| 9 | + on that file will first recover the xref and the dump the | ||
| 10 | + recovered xref, while just running qpdf --show-xref will show the | ||
| 11 | + xref table as present in the file. Fixes #42. | ||
| 12 | + | ||
| 3 | * When recovering stream length, indicate the recovered length. | 13 | * When recovering stream length, indicate the recovered length. |
| 4 | Fixes #44. | 14 | Fixes #44. |
| 5 | 15 |
qpdf/qpdf.cc
| @@ -1383,6 +1383,97 @@ int main(int argc, char* argv[]) | @@ -1383,6 +1383,97 @@ int main(int argc, char* argv[]) | ||
| 1383 | } | 1383 | } |
| 1384 | if (outfilename == 0) | 1384 | if (outfilename == 0) |
| 1385 | { | 1385 | { |
| 1386 | + int exit_code = 0; | ||
| 1387 | + if (check) | ||
| 1388 | + { | ||
| 1389 | + // Code below may set okay to false but not to true. | ||
| 1390 | + // We assume okay until we prove otherwise but may | ||
| 1391 | + // continue to perform additional checks after finding | ||
| 1392 | + // errors. | ||
| 1393 | + bool okay = true; | ||
| 1394 | + std::cout << "checking " << infilename << std::endl; | ||
| 1395 | + try | ||
| 1396 | + { | ||
| 1397 | + int extension_level = pdf.getExtensionLevel(); | ||
| 1398 | + std::cout << "PDF Version: " << pdf.getPDFVersion(); | ||
| 1399 | + if (extension_level > 0) | ||
| 1400 | + { | ||
| 1401 | + std::cout << " extension level " | ||
| 1402 | + << pdf.getExtensionLevel(); | ||
| 1403 | + } | ||
| 1404 | + std::cout << std::endl; | ||
| 1405 | + ::show_encryption(pdf); | ||
| 1406 | + if (pdf.isLinearized()) | ||
| 1407 | + { | ||
| 1408 | + std::cout << "File is linearized\n"; | ||
| 1409 | + if (! pdf.checkLinearization()) | ||
| 1410 | + { | ||
| 1411 | + // any errors are reported by checkLinearization() | ||
| 1412 | + okay = false; | ||
| 1413 | + } | ||
| 1414 | + } | ||
| 1415 | + else | ||
| 1416 | + { | ||
| 1417 | + std::cout << "File is not linearized\n"; | ||
| 1418 | + } | ||
| 1419 | + | ||
| 1420 | + // Write the file no nowhere, uncompressing | ||
| 1421 | + // streams. This causes full file traversal and | ||
| 1422 | + // decoding of all streams we can decode. | ||
| 1423 | + QPDFWriter w(pdf); | ||
| 1424 | + Pl_Discard discard; | ||
| 1425 | + w.setOutputPipeline(&discard); | ||
| 1426 | + w.setStreamDataMode(qpdf_s_uncompress); | ||
| 1427 | + w.write(); | ||
| 1428 | + | ||
| 1429 | + // Parse all content streams | ||
| 1430 | + std::vector<QPDFObjectHandle> pages = pdf.getAllPages(); | ||
| 1431 | + DiscardContents discard_contents; | ||
| 1432 | + int pageno = 0; | ||
| 1433 | + for (std::vector<QPDFObjectHandle>::iterator iter = | ||
| 1434 | + pages.begin(); | ||
| 1435 | + iter != pages.end(); ++iter) | ||
| 1436 | + { | ||
| 1437 | + ++pageno; | ||
| 1438 | + try | ||
| 1439 | + { | ||
| 1440 | + QPDFObjectHandle::parseContentStream( | ||
| 1441 | + (*iter).getKey("/Contents"), | ||
| 1442 | + &discard_contents); | ||
| 1443 | + } | ||
| 1444 | + catch (QPDFExc& e) | ||
| 1445 | + { | ||
| 1446 | + okay = false; | ||
| 1447 | + std::cout << "page " << pageno << ": " | ||
| 1448 | + << e.what() << std::endl; | ||
| 1449 | + } | ||
| 1450 | + } | ||
| 1451 | + } | ||
| 1452 | + catch (std::exception& e) | ||
| 1453 | + { | ||
| 1454 | + std::cout << e.what() << std::endl; | ||
| 1455 | + okay = false; | ||
| 1456 | + } | ||
| 1457 | + if (okay) | ||
| 1458 | + { | ||
| 1459 | + if (! pdf.getWarnings().empty()) | ||
| 1460 | + { | ||
| 1461 | + exit_code = EXIT_WARNING; | ||
| 1462 | + } | ||
| 1463 | + else | ||
| 1464 | + { | ||
| 1465 | + std::cout << "No syntax or stream encoding errors" | ||
| 1466 | + << " found; the file may still contain" | ||
| 1467 | + << std::endl | ||
| 1468 | + << "errors that qpdf cannot detect" | ||
| 1469 | + << std::endl; | ||
| 1470 | + } | ||
| 1471 | + } | ||
| 1472 | + else | ||
| 1473 | + { | ||
| 1474 | + exit_code = EXIT_ERROR; | ||
| 1475 | + } | ||
| 1476 | + } | ||
| 1386 | if (show_npages) | 1477 | if (show_npages) |
| 1387 | { | 1478 | { |
| 1388 | QTC::TC("qpdf", "qpdf npages"); | 1479 | QTC::TC("qpdf", "qpdf npages"); |
| @@ -1402,7 +1493,7 @@ int main(int argc, char* argv[]) | @@ -1402,7 +1493,7 @@ int main(int argc, char* argv[]) | ||
| 1402 | } | 1493 | } |
| 1403 | else | 1494 | else |
| 1404 | { | 1495 | { |
| 1405 | - exit(EXIT_ERROR); | 1496 | + exit_code = EXIT_ERROR; |
| 1406 | } | 1497 | } |
| 1407 | } | 1498 | } |
| 1408 | if (show_linearization) | 1499 | if (show_linearization) |
| @@ -1435,7 +1526,7 @@ int main(int argc, char* argv[]) | @@ -1435,7 +1526,7 @@ int main(int argc, char* argv[]) | ||
| 1435 | QTC::TC("qpdf", "qpdf unable to filter"); | 1526 | QTC::TC("qpdf", "qpdf unable to filter"); |
| 1436 | std::cerr << "Unable to filter stream data." | 1527 | std::cerr << "Unable to filter stream data." |
| 1437 | << std::endl; | 1528 | << std::endl; |
| 1438 | - exit(EXIT_ERROR); | 1529 | + exit_code = EXIT_ERROR; |
| 1439 | } | 1530 | } |
| 1440 | else | 1531 | else |
| 1441 | { | 1532 | { |
| @@ -1512,96 +1603,10 @@ int main(int argc, char* argv[]) | @@ -1512,96 +1603,10 @@ int main(int argc, char* argv[]) | ||
| 1512 | } | 1603 | } |
| 1513 | } | 1604 | } |
| 1514 | } | 1605 | } |
| 1515 | - if (check) | ||
| 1516 | - { | ||
| 1517 | - // Code below may set okay to false but not to true. | ||
| 1518 | - // We assume okay until we prove otherwise but may | ||
| 1519 | - // continue to perform additional checks after finding | ||
| 1520 | - // errors. | ||
| 1521 | - bool okay = true; | ||
| 1522 | - std::cout << "checking " << infilename << std::endl; | ||
| 1523 | - try | ||
| 1524 | - { | ||
| 1525 | - int extension_level = pdf.getExtensionLevel(); | ||
| 1526 | - std::cout << "PDF Version: " << pdf.getPDFVersion(); | ||
| 1527 | - if (extension_level > 0) | ||
| 1528 | - { | ||
| 1529 | - std::cout << " extension level " | ||
| 1530 | - << pdf.getExtensionLevel(); | ||
| 1531 | - } | ||
| 1532 | - std::cout << std::endl; | ||
| 1533 | - ::show_encryption(pdf); | ||
| 1534 | - if (pdf.isLinearized()) | ||
| 1535 | - { | ||
| 1536 | - std::cout << "File is linearized\n"; | ||
| 1537 | - if (! pdf.checkLinearization()) | ||
| 1538 | - { | ||
| 1539 | - // any errors are reported by checkLinearization() | ||
| 1540 | - okay = false; | ||
| 1541 | - } | ||
| 1542 | - } | ||
| 1543 | - else | ||
| 1544 | - { | ||
| 1545 | - std::cout << "File is not linearized\n"; | ||
| 1546 | - } | ||
| 1547 | - | ||
| 1548 | - // Write the file no nowhere, uncompressing | ||
| 1549 | - // streams. This causes full file traversal and | ||
| 1550 | - // decoding of all streams we can decode. | ||
| 1551 | - QPDFWriter w(pdf); | ||
| 1552 | - Pl_Discard discard; | ||
| 1553 | - w.setOutputPipeline(&discard); | ||
| 1554 | - w.setStreamDataMode(qpdf_s_uncompress); | ||
| 1555 | - w.write(); | ||
| 1556 | - | ||
| 1557 | - // Parse all content streams | ||
| 1558 | - std::vector<QPDFObjectHandle> pages = pdf.getAllPages(); | ||
| 1559 | - DiscardContents discard_contents; | ||
| 1560 | - int pageno = 0; | ||
| 1561 | - for (std::vector<QPDFObjectHandle>::iterator iter = | ||
| 1562 | - pages.begin(); | ||
| 1563 | - iter != pages.end(); ++iter) | ||
| 1564 | - { | ||
| 1565 | - ++pageno; | ||
| 1566 | - try | ||
| 1567 | - { | ||
| 1568 | - QPDFObjectHandle::parseContentStream( | ||
| 1569 | - (*iter).getKey("/Contents"), | ||
| 1570 | - &discard_contents); | ||
| 1571 | - } | ||
| 1572 | - catch (QPDFExc& e) | ||
| 1573 | - { | ||
| 1574 | - okay = false; | ||
| 1575 | - std::cout << "page " << pageno << ": " | ||
| 1576 | - << e.what() << std::endl; | ||
| 1577 | - } | ||
| 1578 | - } | ||
| 1579 | - } | ||
| 1580 | - catch (std::exception& e) | ||
| 1581 | - { | ||
| 1582 | - std::cout << e.what() << std::endl; | ||
| 1583 | - okay = false; | ||
| 1584 | - } | ||
| 1585 | - if (okay) | ||
| 1586 | - { | ||
| 1587 | - if (! pdf.getWarnings().empty()) | ||
| 1588 | - { | ||
| 1589 | - exit(EXIT_WARNING); | ||
| 1590 | - } | ||
| 1591 | - else | ||
| 1592 | - { | ||
| 1593 | - std::cout << "No syntax or stream encoding errors" | ||
| 1594 | - << " found; the file may still contain" | ||
| 1595 | - << std::endl | ||
| 1596 | - << "errors that qpdf cannot detect" | ||
| 1597 | - << std::endl; | ||
| 1598 | - } | ||
| 1599 | - } | ||
| 1600 | - else | ||
| 1601 | - { | ||
| 1602 | - exit(EXIT_ERROR); | ||
| 1603 | - } | ||
| 1604 | - } | 1606 | + if (exit_code) |
| 1607 | + { | ||
| 1608 | + exit(exit_code); | ||
| 1609 | + } | ||
| 1605 | } | 1610 | } |
| 1606 | else | 1611 | else |
| 1607 | { | 1612 | { |
qpdf/qtest/qpdf.test
| @@ -206,7 +206,7 @@ $td->runtest("remove page we don't have", | @@ -206,7 +206,7 @@ $td->runtest("remove page we don't have", | ||
| 206 | show_ntests(); | 206 | show_ntests(); |
| 207 | # ---------- | 207 | # ---------- |
| 208 | $td->notify("--- Miscellaneous Tests ---"); | 208 | $td->notify("--- Miscellaneous Tests ---"); |
| 209 | -$n_tests += 91; | 209 | +$n_tests += 93; |
| 210 | 210 | ||
| 211 | $td->runtest("qpdf version", | 211 | $td->runtest("qpdf version", |
| 212 | {$td->COMMAND => "qpdf --version"}, | 212 | {$td->COMMAND => "qpdf --version"}, |
| @@ -628,6 +628,19 @@ $td->runtest("check output", | @@ -628,6 +628,19 @@ $td->runtest("check output", | ||
| 628 | {$td->FILE => "a.pdf"}, | 628 | {$td->FILE => "a.pdf"}, |
| 629 | {$td->FILE => "newline-before-endstream.pdf"}); | 629 | {$td->FILE => "newline-before-endstream.pdf"}); |
| 630 | 630 | ||
| 631 | +# Demonstrate show-xref after check and not after check to illustrate | ||
| 632 | +# that it can dump the real xref or the recovered xref. | ||
| 633 | +$td->runtest("dump bad xref", | ||
| 634 | + {$td->COMMAND => "qpdf --show-xref bad-xref-entry.pdf"}, | ||
| 635 | + {$td->FILE => "bad-xref-entry.out", | ||
| 636 | + $td->EXIT_STATUS => 0}, | ||
| 637 | + $td->NORMALIZE_NEWLINES); | ||
| 638 | +$td->runtest("dump corrected bad xref", | ||
| 639 | + {$td->COMMAND => "qpdf --check --show-xref bad-xref-entry.pdf"}, | ||
| 640 | + {$td->FILE => "bad-xref-entry-corrected.out", | ||
| 641 | + $td->EXIT_STATUS => 3}, | ||
| 642 | + $td->NORMALIZE_NEWLINES); | ||
| 643 | + | ||
| 631 | 644 | ||
| 632 | show_ntests(); | 645 | show_ntests(); |
| 633 | # ---------- | 646 | # ---------- |
qpdf/qtest/qpdf/bad-xref-entry-corrected.out
0 โ 100644
| 1 | +checking bad-xref-entry.pdf | ||
| 2 | +PDF Version: 1.3 | ||
| 3 | +File is not encrypted | ||
| 4 | +File is not linearized | ||
| 5 | +WARNING: bad-xref-entry.pdf: file is damaged | ||
| 6 | +WARNING: bad-xref-entry.pdf (object 5 0, file position 580): expected n n obj | ||
| 7 | +WARNING: bad-xref-entry.pdf: Attempting to reconstruct cross-reference table | ||
| 8 | +1/0: uncompressed; offset = 52 | ||
| 9 | +2/0: uncompressed; offset = 133 | ||
| 10 | +3/0: uncompressed; offset = 242 | ||
| 11 | +4/0: uncompressed; offset = 484 | ||
| 12 | +5/0: uncompressed; offset = 583 | ||
| 13 | +6/0: uncompressed; offset = 629 | ||
| 14 | +7/0: uncompressed; offset = 774 |
qpdf/qtest/qpdf/bad-xref-entry.out
0 โ 100644
qpdf/qtest/qpdf/bad-xref-entry.pdf
0 โ 100644
| 1 | +%PDF-1.3 | ||
| 2 | +%ยฟรทยขรพ | ||
| 3 | +%QDF-1.0 | ||
| 4 | + | ||
| 5 | +%% Original object ID: 1 0 | ||
| 6 | +1 0 obj | ||
| 7 | +<< | ||
| 8 | + /Pages 2 0 R | ||
| 9 | + /Type /Catalog | ||
| 10 | +>> | ||
| 11 | +endobj | ||
| 12 | + | ||
| 13 | +%% Original object ID: 2 0 | ||
| 14 | +2 0 obj | ||
| 15 | +<< | ||
| 16 | + /Count 1 | ||
| 17 | + /Kids [ | ||
| 18 | + 3 0 R | ||
| 19 | + ] | ||
| 20 | + /Type /Pages | ||
| 21 | +>> | ||
| 22 | +endobj | ||
| 23 | + | ||
| 24 | +%% Page 1 | ||
| 25 | +%% Original object ID: 3 0 | ||
| 26 | +3 0 obj | ||
| 27 | +<< | ||
| 28 | + /Contents 4 0 R | ||
| 29 | + /MediaBox [ | ||
| 30 | + 0 | ||
| 31 | + 0 | ||
| 32 | + 612 | ||
| 33 | + 792 | ||
| 34 | + ] | ||
| 35 | + /Parent 2 0 R | ||
| 36 | + /Resources << | ||
| 37 | + /Font << | ||
| 38 | + /F1 6 0 R | ||
| 39 | + >> | ||
| 40 | + /ProcSet 7 0 R | ||
| 41 | + >> | ||
| 42 | + /Type /Page | ||
| 43 | +>> | ||
| 44 | +endobj | ||
| 45 | + | ||
| 46 | +%% Contents for page 1 | ||
| 47 | +%% Original object ID: 4 0 | ||
| 48 | +4 0 obj | ||
| 49 | +<< | ||
| 50 | + /Length 5 0 R | ||
| 51 | +>> | ||
| 52 | +stream | ||
| 53 | +BT | ||
| 54 | + /F1 24 Tf | ||
| 55 | + 72 720 Td | ||
| 56 | + (Potato) Tj | ||
| 57 | +ET | ||
| 58 | +endstream | ||
| 59 | +endobj | ||
| 60 | + | ||
| 61 | +5 0 obj | ||
| 62 | +44 | ||
| 63 | +endobj | ||
| 64 | + | ||
| 65 | +%% Original object ID: 6 0 | ||
| 66 | +6 0 obj | ||
| 67 | +<< | ||
| 68 | + /BaseFont /Helvetica | ||
| 69 | + /Encoding /WinAnsiEncoding | ||
| 70 | + /Name /F1 | ||
| 71 | + /Subtype /Type1 | ||
| 72 | + /Type /Font | ||
| 73 | +>> | ||
| 74 | +endobj | ||
| 75 | + | ||
| 76 | +%% Original object ID: 5 0 | ||
| 77 | +7 0 obj | ||
| 78 | +[ | ||
| 79 | |||
| 80 | + /Text | ||
| 81 | +] | ||
| 82 | +endobj | ||
| 83 | + | ||
| 84 | +xref | ||
| 85 | +0 8 | ||
| 86 | +0000000000 65535 f | ||
| 87 | +0000000052 00000 n | ||
| 88 | +0000000133 00000 n | ||
| 89 | +0000000242 00000 n | ||
| 90 | +0000000484 00000 n | ||
| 91 | +0000000580 00000 n | ||
| 92 | +0000000629 00000 n | ||
| 93 | +0000000774 00000 n | ||
| 94 | +trailer << | ||
| 95 | + /Root 1 0 R | ||
| 96 | + /Size 8 | ||
| 97 | + /ID [<2e68fbddcf3742fa64db89e66acd25d9><2e68fbddcf3742fa64db89e66acd25d9>] | ||
| 98 | +>> | ||
| 99 | +startxref | ||
| 100 | +809 | ||
| 101 | +%%EOF |