Commit 885b8781cccdc9d4402af58176e826a354b5ef7a

Authored by Jay Berkenbilt
1 parent 570db9b6

Allow --check to coexist with and precede other operations (fixes #42)

ChangeLog
1 1 2017-07-29 Jay Berkenbilt <ejb@ql.org>
2 2  
  3 + * When passing multiple inspection arguments, run --check first,
  4 + and defer exit until after all the checks have been run. This
  5 + makes it possible to force operations such as --show-xref to be
  6 + delayed until after recovery attempts have been made. For example,
  7 + if you have a file with a syntactically valid xref table that has
  8 + some offsets that are incorrect, running qpdf --check --show-xref
  9 + on that file will first recover the xref and the dump the
  10 + recovered xref, while just running qpdf --show-xref will show the
  11 + xref table as present in the file. Fixes #42.
  12 +
3 13 * When recovering stream length, indicate the recovered length.
4 14 Fixes #44.
5 15  
... ...
qpdf/qpdf.cc
... ... @@ -1383,6 +1383,97 @@ int main(int argc, char* argv[])
1383 1383 }
1384 1384 if (outfilename == 0)
1385 1385 {
  1386 + int exit_code = 0;
  1387 + if (check)
  1388 + {
  1389 + // Code below may set okay to false but not to true.
  1390 + // We assume okay until we prove otherwise but may
  1391 + // continue to perform additional checks after finding
  1392 + // errors.
  1393 + bool okay = true;
  1394 + std::cout << "checking " << infilename << std::endl;
  1395 + try
  1396 + {
  1397 + int extension_level = pdf.getExtensionLevel();
  1398 + std::cout << "PDF Version: " << pdf.getPDFVersion();
  1399 + if (extension_level > 0)
  1400 + {
  1401 + std::cout << " extension level "
  1402 + << pdf.getExtensionLevel();
  1403 + }
  1404 + std::cout << std::endl;
  1405 + ::show_encryption(pdf);
  1406 + if (pdf.isLinearized())
  1407 + {
  1408 + std::cout << "File is linearized\n";
  1409 + if (! pdf.checkLinearization())
  1410 + {
  1411 + // any errors are reported by checkLinearization()
  1412 + okay = false;
  1413 + }
  1414 + }
  1415 + else
  1416 + {
  1417 + std::cout << "File is not linearized\n";
  1418 + }
  1419 +
  1420 + // Write the file no nowhere, uncompressing
  1421 + // streams. This causes full file traversal and
  1422 + // decoding of all streams we can decode.
  1423 + QPDFWriter w(pdf);
  1424 + Pl_Discard discard;
  1425 + w.setOutputPipeline(&discard);
  1426 + w.setStreamDataMode(qpdf_s_uncompress);
  1427 + w.write();
  1428 +
  1429 + // Parse all content streams
  1430 + std::vector<QPDFObjectHandle> pages = pdf.getAllPages();
  1431 + DiscardContents discard_contents;
  1432 + int pageno = 0;
  1433 + for (std::vector<QPDFObjectHandle>::iterator iter =
  1434 + pages.begin();
  1435 + iter != pages.end(); ++iter)
  1436 + {
  1437 + ++pageno;
  1438 + try
  1439 + {
  1440 + QPDFObjectHandle::parseContentStream(
  1441 + (*iter).getKey("/Contents"),
  1442 + &discard_contents);
  1443 + }
  1444 + catch (QPDFExc& e)
  1445 + {
  1446 + okay = false;
  1447 + std::cout << "page " << pageno << ": "
  1448 + << e.what() << std::endl;
  1449 + }
  1450 + }
  1451 + }
  1452 + catch (std::exception& e)
  1453 + {
  1454 + std::cout << e.what() << std::endl;
  1455 + okay = false;
  1456 + }
  1457 + if (okay)
  1458 + {
  1459 + if (! pdf.getWarnings().empty())
  1460 + {
  1461 + exit_code = EXIT_WARNING;
  1462 + }
  1463 + else
  1464 + {
  1465 + std::cout << "No syntax or stream encoding errors"
  1466 + << " found; the file may still contain"
  1467 + << std::endl
  1468 + << "errors that qpdf cannot detect"
  1469 + << std::endl;
  1470 + }
  1471 + }
  1472 + else
  1473 + {
  1474 + exit_code = EXIT_ERROR;
  1475 + }
  1476 + }
1386 1477 if (show_npages)
1387 1478 {
1388 1479 QTC::TC("qpdf", "qpdf npages");
... ... @@ -1402,7 +1493,7 @@ int main(int argc, char* argv[])
1402 1493 }
1403 1494 else
1404 1495 {
1405   - exit(EXIT_ERROR);
  1496 + exit_code = EXIT_ERROR;
1406 1497 }
1407 1498 }
1408 1499 if (show_linearization)
... ... @@ -1435,7 +1526,7 @@ int main(int argc, char* argv[])
1435 1526 QTC::TC("qpdf", "qpdf unable to filter");
1436 1527 std::cerr << "Unable to filter stream data."
1437 1528 << std::endl;
1438   - exit(EXIT_ERROR);
  1529 + exit_code = EXIT_ERROR;
1439 1530 }
1440 1531 else
1441 1532 {
... ... @@ -1512,96 +1603,10 @@ int main(int argc, char* argv[])
1512 1603 }
1513 1604 }
1514 1605 }
1515   - if (check)
1516   - {
1517   - // Code below may set okay to false but not to true.
1518   - // We assume okay until we prove otherwise but may
1519   - // continue to perform additional checks after finding
1520   - // errors.
1521   - bool okay = true;
1522   - std::cout << "checking " << infilename << std::endl;
1523   - try
1524   - {
1525   - int extension_level = pdf.getExtensionLevel();
1526   - std::cout << "PDF Version: " << pdf.getPDFVersion();
1527   - if (extension_level > 0)
1528   - {
1529   - std::cout << " extension level "
1530   - << pdf.getExtensionLevel();
1531   - }
1532   - std::cout << std::endl;
1533   - ::show_encryption(pdf);
1534   - if (pdf.isLinearized())
1535   - {
1536   - std::cout << "File is linearized\n";
1537   - if (! pdf.checkLinearization())
1538   - {
1539   - // any errors are reported by checkLinearization()
1540   - okay = false;
1541   - }
1542   - }
1543   - else
1544   - {
1545   - std::cout << "File is not linearized\n";
1546   - }
1547   -
1548   - // Write the file no nowhere, uncompressing
1549   - // streams. This causes full file traversal and
1550   - // decoding of all streams we can decode.
1551   - QPDFWriter w(pdf);
1552   - Pl_Discard discard;
1553   - w.setOutputPipeline(&discard);
1554   - w.setStreamDataMode(qpdf_s_uncompress);
1555   - w.write();
1556   -
1557   - // Parse all content streams
1558   - std::vector<QPDFObjectHandle> pages = pdf.getAllPages();
1559   - DiscardContents discard_contents;
1560   - int pageno = 0;
1561   - for (std::vector<QPDFObjectHandle>::iterator iter =
1562   - pages.begin();
1563   - iter != pages.end(); ++iter)
1564   - {
1565   - ++pageno;
1566   - try
1567   - {
1568   - QPDFObjectHandle::parseContentStream(
1569   - (*iter).getKey("/Contents"),
1570   - &discard_contents);
1571   - }
1572   - catch (QPDFExc& e)
1573   - {
1574   - okay = false;
1575   - std::cout << "page " << pageno << ": "
1576   - << e.what() << std::endl;
1577   - }
1578   - }
1579   - }
1580   - catch (std::exception& e)
1581   - {
1582   - std::cout << e.what() << std::endl;
1583   - okay = false;
1584   - }
1585   - if (okay)
1586   - {
1587   - if (! pdf.getWarnings().empty())
1588   - {
1589   - exit(EXIT_WARNING);
1590   - }
1591   - else
1592   - {
1593   - std::cout << "No syntax or stream encoding errors"
1594   - << " found; the file may still contain"
1595   - << std::endl
1596   - << "errors that qpdf cannot detect"
1597   - << std::endl;
1598   - }
1599   - }
1600   - else
1601   - {
1602   - exit(EXIT_ERROR);
1603   - }
1604   - }
  1606 + if (exit_code)
  1607 + {
  1608 + exit(exit_code);
  1609 + }
1605 1610 }
1606 1611 else
1607 1612 {
... ...
qpdf/qtest/qpdf.test
... ... @@ -206,7 +206,7 @@ $td-&gt;runtest(&quot;remove page we don&#39;t have&quot;,
206 206 show_ntests();
207 207 # ----------
208 208 $td->notify("--- Miscellaneous Tests ---");
209   -$n_tests += 91;
  209 +$n_tests += 93;
210 210  
211 211 $td->runtest("qpdf version",
212 212 {$td->COMMAND => "qpdf --version"},
... ... @@ -628,6 +628,19 @@ $td-&gt;runtest(&quot;check output&quot;,
628 628 {$td->FILE => "a.pdf"},
629 629 {$td->FILE => "newline-before-endstream.pdf"});
630 630  
  631 +# Demonstrate show-xref after check and not after check to illustrate
  632 +# that it can dump the real xref or the recovered xref.
  633 +$td->runtest("dump bad xref",
  634 + {$td->COMMAND => "qpdf --show-xref bad-xref-entry.pdf"},
  635 + {$td->FILE => "bad-xref-entry.out",
  636 + $td->EXIT_STATUS => 0},
  637 + $td->NORMALIZE_NEWLINES);
  638 +$td->runtest("dump corrected bad xref",
  639 + {$td->COMMAND => "qpdf --check --show-xref bad-xref-entry.pdf"},
  640 + {$td->FILE => "bad-xref-entry-corrected.out",
  641 + $td->EXIT_STATUS => 3},
  642 + $td->NORMALIZE_NEWLINES);
  643 +
631 644  
632 645 show_ntests();
633 646 # ----------
... ...
qpdf/qtest/qpdf/bad-xref-entry-corrected.out 0 → 100644
  1 +checking bad-xref-entry.pdf
  2 +PDF Version: 1.3
  3 +File is not encrypted
  4 +File is not linearized
  5 +WARNING: bad-xref-entry.pdf: file is damaged
  6 +WARNING: bad-xref-entry.pdf (object 5 0, file position 580): expected n n obj
  7 +WARNING: bad-xref-entry.pdf: Attempting to reconstruct cross-reference table
  8 +1/0: uncompressed; offset = 52
  9 +2/0: uncompressed; offset = 133
  10 +3/0: uncompressed; offset = 242
  11 +4/0: uncompressed; offset = 484
  12 +5/0: uncompressed; offset = 583
  13 +6/0: uncompressed; offset = 629
  14 +7/0: uncompressed; offset = 774
... ...
qpdf/qtest/qpdf/bad-xref-entry.out 0 → 100644
  1 +1/0: uncompressed; offset = 52
  2 +2/0: uncompressed; offset = 133
  3 +3/0: uncompressed; offset = 242
  4 +4/0: uncompressed; offset = 484
  5 +5/0: uncompressed; offset = 580
  6 +6/0: uncompressed; offset = 629
  7 +7/0: uncompressed; offset = 774
... ...
qpdf/qtest/qpdf/bad-xref-entry.pdf 0 → 100644
  1 +%PDF-1.3
  2 +%¿÷¢þ
  3 +%QDF-1.0
  4 +
  5 +%% Original object ID: 1 0
  6 +1 0 obj
  7 +<<
  8 + /Pages 2 0 R
  9 + /Type /Catalog
  10 +>>
  11 +endobj
  12 +
  13 +%% Original object ID: 2 0
  14 +2 0 obj
  15 +<<
  16 + /Count 1
  17 + /Kids [
  18 + 3 0 R
  19 + ]
  20 + /Type /Pages
  21 +>>
  22 +endobj
  23 +
  24 +%% Page 1
  25 +%% Original object ID: 3 0
  26 +3 0 obj
  27 +<<
  28 + /Contents 4 0 R
  29 + /MediaBox [
  30 + 0
  31 + 0
  32 + 612
  33 + 792
  34 + ]
  35 + /Parent 2 0 R
  36 + /Resources <<
  37 + /Font <<
  38 + /F1 6 0 R
  39 + >>
  40 + /ProcSet 7 0 R
  41 + >>
  42 + /Type /Page
  43 +>>
  44 +endobj
  45 +
  46 +%% Contents for page 1
  47 +%% Original object ID: 4 0
  48 +4 0 obj
  49 +<<
  50 + /Length 5 0 R
  51 +>>
  52 +stream
  53 +BT
  54 + /F1 24 Tf
  55 + 72 720 Td
  56 + (Potato) Tj
  57 +ET
  58 +endstream
  59 +endobj
  60 +
  61 +5 0 obj
  62 +44
  63 +endobj
  64 +
  65 +%% Original object ID: 6 0
  66 +6 0 obj
  67 +<<
  68 + /BaseFont /Helvetica
  69 + /Encoding /WinAnsiEncoding
  70 + /Name /F1
  71 + /Subtype /Type1
  72 + /Type /Font
  73 +>>
  74 +endobj
  75 +
  76 +%% Original object ID: 5 0
  77 +7 0 obj
  78 +[
  79 + /PDF
  80 + /Text
  81 +]
  82 +endobj
  83 +
  84 +xref
  85 +0 8
  86 +0000000000 65535 f
  87 +0000000052 00000 n
  88 +0000000133 00000 n
  89 +0000000242 00000 n
  90 +0000000484 00000 n
  91 +0000000580 00000 n
  92 +0000000629 00000 n
  93 +0000000774 00000 n
  94 +trailer <<
  95 + /Root 1 0 R
  96 + /Size 8
  97 + /ID [<2e68fbddcf3742fa64db89e66acd25d9><2e68fbddcf3742fa64db89e66acd25d9>]
  98 +>>
  99 +startxref
  100 +809
  101 +%%EOF
... ...