Commit 885b8781cccdc9d4402af58176e826a354b5ef7a
1 parent
570db9b6
Allow --check to coexist with and precede other operations (fixes #42)
Showing
6 changed files
with
243 additions
and
93 deletions
ChangeLog
| 1 | 1 | 2017-07-29 Jay Berkenbilt <ejb@ql.org> |
| 2 | 2 | |
| 3 | + * When passing multiple inspection arguments, run --check first, | |
| 4 | + and defer exit until after all the checks have been run. This | |
| 5 | + makes it possible to force operations such as --show-xref to be | |
| 6 | + delayed until after recovery attempts have been made. For example, | |
| 7 | + if you have a file with a syntactically valid xref table that has | |
| 8 | + some offsets that are incorrect, running qpdf --check --show-xref | |
| 9 | + on that file will first recover the xref and the dump the | |
| 10 | + recovered xref, while just running qpdf --show-xref will show the | |
| 11 | + xref table as present in the file. Fixes #42. | |
| 12 | + | |
| 3 | 13 | * When recovering stream length, indicate the recovered length. |
| 4 | 14 | Fixes #44. |
| 5 | 15 | ... | ... |
qpdf/qpdf.cc
| ... | ... | @@ -1383,6 +1383,97 @@ int main(int argc, char* argv[]) |
| 1383 | 1383 | } |
| 1384 | 1384 | if (outfilename == 0) |
| 1385 | 1385 | { |
| 1386 | + int exit_code = 0; | |
| 1387 | + if (check) | |
| 1388 | + { | |
| 1389 | + // Code below may set okay to false but not to true. | |
| 1390 | + // We assume okay until we prove otherwise but may | |
| 1391 | + // continue to perform additional checks after finding | |
| 1392 | + // errors. | |
| 1393 | + bool okay = true; | |
| 1394 | + std::cout << "checking " << infilename << std::endl; | |
| 1395 | + try | |
| 1396 | + { | |
| 1397 | + int extension_level = pdf.getExtensionLevel(); | |
| 1398 | + std::cout << "PDF Version: " << pdf.getPDFVersion(); | |
| 1399 | + if (extension_level > 0) | |
| 1400 | + { | |
| 1401 | + std::cout << " extension level " | |
| 1402 | + << pdf.getExtensionLevel(); | |
| 1403 | + } | |
| 1404 | + std::cout << std::endl; | |
| 1405 | + ::show_encryption(pdf); | |
| 1406 | + if (pdf.isLinearized()) | |
| 1407 | + { | |
| 1408 | + std::cout << "File is linearized\n"; | |
| 1409 | + if (! pdf.checkLinearization()) | |
| 1410 | + { | |
| 1411 | + // any errors are reported by checkLinearization() | |
| 1412 | + okay = false; | |
| 1413 | + } | |
| 1414 | + } | |
| 1415 | + else | |
| 1416 | + { | |
| 1417 | + std::cout << "File is not linearized\n"; | |
| 1418 | + } | |
| 1419 | + | |
| 1420 | + // Write the file no nowhere, uncompressing | |
| 1421 | + // streams. This causes full file traversal and | |
| 1422 | + // decoding of all streams we can decode. | |
| 1423 | + QPDFWriter w(pdf); | |
| 1424 | + Pl_Discard discard; | |
| 1425 | + w.setOutputPipeline(&discard); | |
| 1426 | + w.setStreamDataMode(qpdf_s_uncompress); | |
| 1427 | + w.write(); | |
| 1428 | + | |
| 1429 | + // Parse all content streams | |
| 1430 | + std::vector<QPDFObjectHandle> pages = pdf.getAllPages(); | |
| 1431 | + DiscardContents discard_contents; | |
| 1432 | + int pageno = 0; | |
| 1433 | + for (std::vector<QPDFObjectHandle>::iterator iter = | |
| 1434 | + pages.begin(); | |
| 1435 | + iter != pages.end(); ++iter) | |
| 1436 | + { | |
| 1437 | + ++pageno; | |
| 1438 | + try | |
| 1439 | + { | |
| 1440 | + QPDFObjectHandle::parseContentStream( | |
| 1441 | + (*iter).getKey("/Contents"), | |
| 1442 | + &discard_contents); | |
| 1443 | + } | |
| 1444 | + catch (QPDFExc& e) | |
| 1445 | + { | |
| 1446 | + okay = false; | |
| 1447 | + std::cout << "page " << pageno << ": " | |
| 1448 | + << e.what() << std::endl; | |
| 1449 | + } | |
| 1450 | + } | |
| 1451 | + } | |
| 1452 | + catch (std::exception& e) | |
| 1453 | + { | |
| 1454 | + std::cout << e.what() << std::endl; | |
| 1455 | + okay = false; | |
| 1456 | + } | |
| 1457 | + if (okay) | |
| 1458 | + { | |
| 1459 | + if (! pdf.getWarnings().empty()) | |
| 1460 | + { | |
| 1461 | + exit_code = EXIT_WARNING; | |
| 1462 | + } | |
| 1463 | + else | |
| 1464 | + { | |
| 1465 | + std::cout << "No syntax or stream encoding errors" | |
| 1466 | + << " found; the file may still contain" | |
| 1467 | + << std::endl | |
| 1468 | + << "errors that qpdf cannot detect" | |
| 1469 | + << std::endl; | |
| 1470 | + } | |
| 1471 | + } | |
| 1472 | + else | |
| 1473 | + { | |
| 1474 | + exit_code = EXIT_ERROR; | |
| 1475 | + } | |
| 1476 | + } | |
| 1386 | 1477 | if (show_npages) |
| 1387 | 1478 | { |
| 1388 | 1479 | QTC::TC("qpdf", "qpdf npages"); |
| ... | ... | @@ -1402,7 +1493,7 @@ int main(int argc, char* argv[]) |
| 1402 | 1493 | } |
| 1403 | 1494 | else |
| 1404 | 1495 | { |
| 1405 | - exit(EXIT_ERROR); | |
| 1496 | + exit_code = EXIT_ERROR; | |
| 1406 | 1497 | } |
| 1407 | 1498 | } |
| 1408 | 1499 | if (show_linearization) |
| ... | ... | @@ -1435,7 +1526,7 @@ int main(int argc, char* argv[]) |
| 1435 | 1526 | QTC::TC("qpdf", "qpdf unable to filter"); |
| 1436 | 1527 | std::cerr << "Unable to filter stream data." |
| 1437 | 1528 | << std::endl; |
| 1438 | - exit(EXIT_ERROR); | |
| 1529 | + exit_code = EXIT_ERROR; | |
| 1439 | 1530 | } |
| 1440 | 1531 | else |
| 1441 | 1532 | { |
| ... | ... | @@ -1512,96 +1603,10 @@ int main(int argc, char* argv[]) |
| 1512 | 1603 | } |
| 1513 | 1604 | } |
| 1514 | 1605 | } |
| 1515 | - if (check) | |
| 1516 | - { | |
| 1517 | - // Code below may set okay to false but not to true. | |
| 1518 | - // We assume okay until we prove otherwise but may | |
| 1519 | - // continue to perform additional checks after finding | |
| 1520 | - // errors. | |
| 1521 | - bool okay = true; | |
| 1522 | - std::cout << "checking " << infilename << std::endl; | |
| 1523 | - try | |
| 1524 | - { | |
| 1525 | - int extension_level = pdf.getExtensionLevel(); | |
| 1526 | - std::cout << "PDF Version: " << pdf.getPDFVersion(); | |
| 1527 | - if (extension_level > 0) | |
| 1528 | - { | |
| 1529 | - std::cout << " extension level " | |
| 1530 | - << pdf.getExtensionLevel(); | |
| 1531 | - } | |
| 1532 | - std::cout << std::endl; | |
| 1533 | - ::show_encryption(pdf); | |
| 1534 | - if (pdf.isLinearized()) | |
| 1535 | - { | |
| 1536 | - std::cout << "File is linearized\n"; | |
| 1537 | - if (! pdf.checkLinearization()) | |
| 1538 | - { | |
| 1539 | - // any errors are reported by checkLinearization() | |
| 1540 | - okay = false; | |
| 1541 | - } | |
| 1542 | - } | |
| 1543 | - else | |
| 1544 | - { | |
| 1545 | - std::cout << "File is not linearized\n"; | |
| 1546 | - } | |
| 1547 | - | |
| 1548 | - // Write the file no nowhere, uncompressing | |
| 1549 | - // streams. This causes full file traversal and | |
| 1550 | - // decoding of all streams we can decode. | |
| 1551 | - QPDFWriter w(pdf); | |
| 1552 | - Pl_Discard discard; | |
| 1553 | - w.setOutputPipeline(&discard); | |
| 1554 | - w.setStreamDataMode(qpdf_s_uncompress); | |
| 1555 | - w.write(); | |
| 1556 | - | |
| 1557 | - // Parse all content streams | |
| 1558 | - std::vector<QPDFObjectHandle> pages = pdf.getAllPages(); | |
| 1559 | - DiscardContents discard_contents; | |
| 1560 | - int pageno = 0; | |
| 1561 | - for (std::vector<QPDFObjectHandle>::iterator iter = | |
| 1562 | - pages.begin(); | |
| 1563 | - iter != pages.end(); ++iter) | |
| 1564 | - { | |
| 1565 | - ++pageno; | |
| 1566 | - try | |
| 1567 | - { | |
| 1568 | - QPDFObjectHandle::parseContentStream( | |
| 1569 | - (*iter).getKey("/Contents"), | |
| 1570 | - &discard_contents); | |
| 1571 | - } | |
| 1572 | - catch (QPDFExc& e) | |
| 1573 | - { | |
| 1574 | - okay = false; | |
| 1575 | - std::cout << "page " << pageno << ": " | |
| 1576 | - << e.what() << std::endl; | |
| 1577 | - } | |
| 1578 | - } | |
| 1579 | - } | |
| 1580 | - catch (std::exception& e) | |
| 1581 | - { | |
| 1582 | - std::cout << e.what() << std::endl; | |
| 1583 | - okay = false; | |
| 1584 | - } | |
| 1585 | - if (okay) | |
| 1586 | - { | |
| 1587 | - if (! pdf.getWarnings().empty()) | |
| 1588 | - { | |
| 1589 | - exit(EXIT_WARNING); | |
| 1590 | - } | |
| 1591 | - else | |
| 1592 | - { | |
| 1593 | - std::cout << "No syntax or stream encoding errors" | |
| 1594 | - << " found; the file may still contain" | |
| 1595 | - << std::endl | |
| 1596 | - << "errors that qpdf cannot detect" | |
| 1597 | - << std::endl; | |
| 1598 | - } | |
| 1599 | - } | |
| 1600 | - else | |
| 1601 | - { | |
| 1602 | - exit(EXIT_ERROR); | |
| 1603 | - } | |
| 1604 | - } | |
| 1606 | + if (exit_code) | |
| 1607 | + { | |
| 1608 | + exit(exit_code); | |
| 1609 | + } | |
| 1605 | 1610 | } |
| 1606 | 1611 | else |
| 1607 | 1612 | { | ... | ... |
qpdf/qtest/qpdf.test
| ... | ... | @@ -206,7 +206,7 @@ $td->runtest("remove page we don't have", |
| 206 | 206 | show_ntests(); |
| 207 | 207 | # ---------- |
| 208 | 208 | $td->notify("--- Miscellaneous Tests ---"); |
| 209 | -$n_tests += 91; | |
| 209 | +$n_tests += 93; | |
| 210 | 210 | |
| 211 | 211 | $td->runtest("qpdf version", |
| 212 | 212 | {$td->COMMAND => "qpdf --version"}, |
| ... | ... | @@ -628,6 +628,19 @@ $td->runtest("check output", |
| 628 | 628 | {$td->FILE => "a.pdf"}, |
| 629 | 629 | {$td->FILE => "newline-before-endstream.pdf"}); |
| 630 | 630 | |
| 631 | +# Demonstrate show-xref after check and not after check to illustrate | |
| 632 | +# that it can dump the real xref or the recovered xref. | |
| 633 | +$td->runtest("dump bad xref", | |
| 634 | + {$td->COMMAND => "qpdf --show-xref bad-xref-entry.pdf"}, | |
| 635 | + {$td->FILE => "bad-xref-entry.out", | |
| 636 | + $td->EXIT_STATUS => 0}, | |
| 637 | + $td->NORMALIZE_NEWLINES); | |
| 638 | +$td->runtest("dump corrected bad xref", | |
| 639 | + {$td->COMMAND => "qpdf --check --show-xref bad-xref-entry.pdf"}, | |
| 640 | + {$td->FILE => "bad-xref-entry-corrected.out", | |
| 641 | + $td->EXIT_STATUS => 3}, | |
| 642 | + $td->NORMALIZE_NEWLINES); | |
| 643 | + | |
| 631 | 644 | |
| 632 | 645 | show_ntests(); |
| 633 | 646 | # ---------- | ... | ... |
qpdf/qtest/qpdf/bad-xref-entry-corrected.out
0 → 100644
| 1 | +checking bad-xref-entry.pdf | |
| 2 | +PDF Version: 1.3 | |
| 3 | +File is not encrypted | |
| 4 | +File is not linearized | |
| 5 | +WARNING: bad-xref-entry.pdf: file is damaged | |
| 6 | +WARNING: bad-xref-entry.pdf (object 5 0, file position 580): expected n n obj | |
| 7 | +WARNING: bad-xref-entry.pdf: Attempting to reconstruct cross-reference table | |
| 8 | +1/0: uncompressed; offset = 52 | |
| 9 | +2/0: uncompressed; offset = 133 | |
| 10 | +3/0: uncompressed; offset = 242 | |
| 11 | +4/0: uncompressed; offset = 484 | |
| 12 | +5/0: uncompressed; offset = 583 | |
| 13 | +6/0: uncompressed; offset = 629 | |
| 14 | +7/0: uncompressed; offset = 774 | ... | ... |
qpdf/qtest/qpdf/bad-xref-entry.out
0 → 100644
qpdf/qtest/qpdf/bad-xref-entry.pdf
0 → 100644
| 1 | +%PDF-1.3 | |
| 2 | +%¿÷¢þ | |
| 3 | +%QDF-1.0 | |
| 4 | + | |
| 5 | +%% Original object ID: 1 0 | |
| 6 | +1 0 obj | |
| 7 | +<< | |
| 8 | + /Pages 2 0 R | |
| 9 | + /Type /Catalog | |
| 10 | +>> | |
| 11 | +endobj | |
| 12 | + | |
| 13 | +%% Original object ID: 2 0 | |
| 14 | +2 0 obj | |
| 15 | +<< | |
| 16 | + /Count 1 | |
| 17 | + /Kids [ | |
| 18 | + 3 0 R | |
| 19 | + ] | |
| 20 | + /Type /Pages | |
| 21 | +>> | |
| 22 | +endobj | |
| 23 | + | |
| 24 | +%% Page 1 | |
| 25 | +%% Original object ID: 3 0 | |
| 26 | +3 0 obj | |
| 27 | +<< | |
| 28 | + /Contents 4 0 R | |
| 29 | + /MediaBox [ | |
| 30 | + 0 | |
| 31 | + 0 | |
| 32 | + 612 | |
| 33 | + 792 | |
| 34 | + ] | |
| 35 | + /Parent 2 0 R | |
| 36 | + /Resources << | |
| 37 | + /Font << | |
| 38 | + /F1 6 0 R | |
| 39 | + >> | |
| 40 | + /ProcSet 7 0 R | |
| 41 | + >> | |
| 42 | + /Type /Page | |
| 43 | +>> | |
| 44 | +endobj | |
| 45 | + | |
| 46 | +%% Contents for page 1 | |
| 47 | +%% Original object ID: 4 0 | |
| 48 | +4 0 obj | |
| 49 | +<< | |
| 50 | + /Length 5 0 R | |
| 51 | +>> | |
| 52 | +stream | |
| 53 | +BT | |
| 54 | + /F1 24 Tf | |
| 55 | + 72 720 Td | |
| 56 | + (Potato) Tj | |
| 57 | +ET | |
| 58 | +endstream | |
| 59 | +endobj | |
| 60 | + | |
| 61 | +5 0 obj | |
| 62 | +44 | |
| 63 | +endobj | |
| 64 | + | |
| 65 | +%% Original object ID: 6 0 | |
| 66 | +6 0 obj | |
| 67 | +<< | |
| 68 | + /BaseFont /Helvetica | |
| 69 | + /Encoding /WinAnsiEncoding | |
| 70 | + /Name /F1 | |
| 71 | + /Subtype /Type1 | |
| 72 | + /Type /Font | |
| 73 | +>> | |
| 74 | +endobj | |
| 75 | + | |
| 76 | +%% Original object ID: 5 0 | |
| 77 | +7 0 obj | |
| 78 | +[ | |
| 79 | ||
| 80 | + /Text | |
| 81 | +] | |
| 82 | +endobj | |
| 83 | + | |
| 84 | +xref | |
| 85 | +0 8 | |
| 86 | +0000000000 65535 f | |
| 87 | +0000000052 00000 n | |
| 88 | +0000000133 00000 n | |
| 89 | +0000000242 00000 n | |
| 90 | +0000000484 00000 n | |
| 91 | +0000000580 00000 n | |
| 92 | +0000000629 00000 n | |
| 93 | +0000000774 00000 n | |
| 94 | +trailer << | |
| 95 | + /Root 1 0 R | |
| 96 | + /Size 8 | |
| 97 | + /ID [<2e68fbddcf3742fa64db89e66acd25d9><2e68fbddcf3742fa64db89e66acd25d9>] | |
| 98 | +>> | |
| 99 | +startxref | |
| 100 | +809 | |
| 101 | +%%EOF | ... | ... |