Allow --check to coexist with and precede other operations (fixes #42)

Jay Berkenbilt
1 parent 570db9b6
Showing 6 changed files with 243 additions and 93 deletions
ChangeLog
qpdf/qpdf.cc
qpdf/qtest/qpdf.test
qpdf/qtest/qpdf/bad-xref-entry-corrected.out
qpdf/qtest/qpdf/bad-xref-entry.out
qpdf/qtest/qpdf/bad-xref-entry.pdf
 2017-07-29  Jay Berkenbilt  <ejb@ql.org>
  
+	* When passing multiple inspection arguments, run --check first,
+	and defer exit until after all the checks have been run. This
+	makes it possible to force operations such as --show-xref to be
+	delayed until after recovery attempts have been made. For example,
+	if you have a file with a syntactically valid xref table that has
+	some offsets that are incorrect, running qpdf --check --show-xref
+	on that file will first recover the xref and the dump the
+	recovered xref, while just running qpdf --show-xref will show the
+	xref table as present in the file. Fixes #42.
+
 	* When recovering stream length, indicate the recovered length.
 	Fixes #44.
  
@@ -1383,6 +1383,97 @@ int main(int argc, char* argv[])
         }
 	if (outfilename == 0)
 	{
+            int exit_code = 0;
+	    if (check)
+	    {
+                // Code below may set okay to false but not to true.
+                // We assume okay until we prove otherwise but may
+                // continue to perform additional checks after finding
+                // errors.
+		bool okay = true;
+		std::cout << "checking " << infilename << std::endl;
+		try
+		{
+                    int extension_level = pdf.getExtensionLevel();
+		    std::cout << "PDF Version: " << pdf.getPDFVersion();
+                    if (extension_level > 0)
+                    {
+                        std::cout << " extension level "
+                                  << pdf.getExtensionLevel();
+                    }
+                    std::cout << std::endl;
+		    ::show_encryption(pdf);
+		    if (pdf.isLinearized())
+		    {
+			std::cout << "File is linearized\n";
+			if (! pdf.checkLinearization())
+                        {
+                            // any errors are reported by checkLinearization()
+                            okay = false;
+                        }
+		    }
+		    else
+		    {
+			std::cout << "File is not linearized\n";
+                    }
+
+                    // Write the file no nowhere, uncompressing
+                    // streams.  This causes full file traversal and
+                    // decoding of all streams we can decode.
+                    QPDFWriter w(pdf);
+                    Pl_Discard discard;
+                    w.setOutputPipeline(&discard);
+                    w.setStreamDataMode(qpdf_s_uncompress);
+                    w.write();
+
+                    // Parse all content streams
+                    std::vector<QPDFObjectHandle> pages = pdf.getAllPages();
+                    DiscardContents discard_contents;
+                    int pageno = 0;
+                    for (std::vector<QPDFObjectHandle>::iterator iter =
+                             pages.begin();
+                         iter != pages.end(); ++iter)
+                    {
+                        ++pageno;
+                        try
+                        {
+                            QPDFObjectHandle::parseContentStream(
+                                (*iter).getKey("/Contents"),
+                                &discard_contents);
+                        }
+                        catch (QPDFExc& e)
+                        {
+                            okay = false;
+                            std::cout << "page " << pageno << ": "
+                                      << e.what() << std::endl;
+                        }
+                    }
+		}
+		catch (std::exception& e)
+		{
+		    std::cout << e.what() << std::endl;
+                    okay = false;
+		}
+		if (okay)
+		{
+		    if (! pdf.getWarnings().empty())
+		    {
+			exit_code = EXIT_WARNING;
+		    }
+		    else
+		    {
+			std::cout << "No syntax or stream encoding errors"
+				  << " found; the file may still contain"
+				  << std::endl
+				  << "errors that qpdf cannot detect"
+				  << std::endl;
+		    }
+		}
+                else
+                {
+                    exit_code = EXIT_ERROR;
+                }
+	    }
             if (show_npages)
             {
                 QTC::TC("qpdf", "qpdf npages");
@@ -1402,7 +1493,7 @@ int main(int argc, char* argv[])
 		}
 		else
 		{
-		    exit(EXIT_ERROR);
+		    exit_code = EXIT_ERROR;
 		}
 	    }
 	    if (show_linearization)
@@ -1435,7 +1526,7 @@ int main(int argc, char* argv[])
 			    QTC::TC("qpdf", "qpdf unable to filter");
 			    std::cerr << "Unable to filter stream data."
 				      << std::endl;
-			    exit(EXIT_ERROR);
+			    exit_code = EXIT_ERROR;
 			}
 			else
 			{
@@ -1512,96 +1603,10 @@ int main(int argc, char* argv[])
 		    }
 		}
 	    }
-	    if (check)
-	    {
-                // Code below may set okay to false but not to true.
-                // We assume okay until we prove otherwise but may
-                // continue to perform additional checks after finding
-                // errors.
-		bool okay = true;
-		std::cout << "checking " << infilename << std::endl;
-		try
-		{
-                    int extension_level = pdf.getExtensionLevel();
-		    std::cout << "PDF Version: " << pdf.getPDFVersion();
-                    if (extension_level > 0)
-                    {
-                        std::cout << " extension level "
-                                  << pdf.getExtensionLevel();
-                    }
-                    std::cout << std::endl;
-		    ::show_encryption(pdf);
-		    if (pdf.isLinearized())
-		    {
-			std::cout << "File is linearized\n";
-			if (! pdf.checkLinearization())
-                        {
-                            // any errors are reported by checkLinearization()
-                            okay = false;
-                        }
-		    }
-		    else
-		    {
-			std::cout << "File is not linearized\n";
-                    }
-
-                    // Write the file no nowhere, uncompressing
-                    // streams.  This causes full file traversal and
-                    // decoding of all streams we can decode.
-                    QPDFWriter w(pdf);
-                    Pl_Discard discard;
-                    w.setOutputPipeline(&discard);
-                    w.setStreamDataMode(qpdf_s_uncompress);
-                    w.write();
-
-                    // Parse all content streams
-                    std::vector<QPDFObjectHandle> pages = pdf.getAllPages();
-                    DiscardContents discard_contents;
-                    int pageno = 0;
-                    for (std::vector<QPDFObjectHandle>::iterator iter =
-                             pages.begin();
-                         iter != pages.end(); ++iter)
-                    {
-                        ++pageno;
-                        try
-                        {
-                            QPDFObjectHandle::parseContentStream(
-                                (*iter).getKey("/Contents"),
-                                &discard_contents);
-                        }
-                        catch (QPDFExc& e)
-                        {
-                            okay = false;
-                            std::cout << "page " << pageno << ": "
-                                      << e.what() << std::endl;
-                        }
-                    }
-		}
-		catch (std::exception& e)
-		{
-		    std::cout << e.what() << std::endl;
-                    okay = false;
-		}
-		if (okay)
-		{
-		    if (! pdf.getWarnings().empty())
-		    {
-			exit(EXIT_WARNING);
-		    }
-		    else
-		    {
-			std::cout << "No syntax or stream encoding errors"
-				  << " found; the file may still contain"
-				  << std::endl
-				  << "errors that qpdf cannot detect"
-				  << std::endl;
-		    }
-		}
-                else
-                {
-                    exit(EXIT_ERROR);
-                }
-	    }
+            if (exit_code)
+            {
+                exit(exit_code);
+            }
 	}
 	else
 	{
@@ -206,7 +206,7 @@ $td-&gt;runtest(&quot;remove page we don&#39;t have&quot;,
 show_ntests();
 # ----------
 $td->notify("--- Miscellaneous Tests ---");
-$n_tests += 91;
+$n_tests += 93;
  
 $td->runtest("qpdf version",
 	     {$td->COMMAND => "qpdf --version"},
@@ -628,6 +628,19 @@ $td-&gt;runtest(&quot;check output&quot;,
              {$td->FILE => "a.pdf"},
              {$td->FILE => "newline-before-endstream.pdf"});
  
+# Demonstrate show-xref after check and not after check to illustrate
+# that it can dump the real xref or the recovered xref.
+$td->runtest("dump bad xref",
+             {$td->COMMAND => "qpdf --show-xref bad-xref-entry.pdf"},
+             {$td->FILE => "bad-xref-entry.out",
+              $td->EXIT_STATUS => 0},
+             $td->NORMALIZE_NEWLINES);
+$td->runtest("dump corrected bad xref",
+             {$td->COMMAND => "qpdf --check --show-xref bad-xref-entry.pdf"},
+             {$td->FILE => "bad-xref-entry-corrected.out",
+              $td->EXIT_STATUS => 3},
+             $td->NORMALIZE_NEWLINES);
+
  
 show_ntests();
 # ----------
+checking bad-xref-entry.pdf
+PDF Version: 1.3
+File is not encrypted
+File is not linearized
+WARNING: bad-xref-entry.pdf: file is damaged
+WARNING: bad-xref-entry.pdf (object 5 0, file position 580): expected n n obj
+WARNING: bad-xref-entry.pdf: Attempting to reconstruct cross-reference table
+1/0: uncompressed; offset = 52
+2/0: uncompressed; offset = 133
+3/0: uncompressed; offset = 242
+4/0: uncompressed; offset = 484
+5/0: uncompressed; offset = 583
+6/0: uncompressed; offset = 629
+7/0: uncompressed; offset = 774
+1/0: uncompressed; offset = 52
+2/0: uncompressed; offset = 133
+3/0: uncompressed; offset = 242
+4/0: uncompressed; offset = 484
+5/0: uncompressed; offset = 580
+6/0: uncompressed; offset = 629
+7/0: uncompressed; offset = 774
+%PDF-1.3
+%¿÷¢þ
+%QDF-1.0
+
+%% Original object ID: 1 0
+1 0 obj
+<<
+  /Pages 2 0 R
+  /Type /Catalog
+>>
+endobj
+
+%% Original object ID: 2 0
+2 0 obj
+<<
+  /Count 1
+  /Kids [
+    3 0 R
+  ]
+  /Type /Pages
+>>
+endobj
+
+%% Page 1
+%% Original object ID: 3 0
+3 0 obj
+<<
+  /Contents 4 0 R
+  /MediaBox [
+    0
+    0
+    612
+    792
+  ]
+  /Parent 2 0 R
+  /Resources <<
+    /Font <<
+      /F1 6 0 R
+    >>
+    /ProcSet 7 0 R
+  >>
+  /Type /Page
+>>
+endobj
+
+%% Contents for page 1
+%% Original object ID: 4 0
+4 0 obj
+<<
+  /Length 5 0 R
+>>
+stream
+BT
+  /F1 24 Tf
+  72 720 Td
+  (Potato) Tj
+ET
+endstream
+endobj
+
+5 0 obj
+44
+endobj
+
+%% Original object ID: 6 0
+6 0 obj
+<<
+  /BaseFont /Helvetica
+  /Encoding /WinAnsiEncoding
+  /Name /F1
+  /Subtype /Type1
+  /Type /Font
+>>
+endobj
+
+%% Original object ID: 5 0
+7 0 obj
+[
+  /PDF
+  /Text
+]
+endobj
+
+xref
+0 8
+0000000000 65535 f 
+0000000052 00000 n 
+0000000133 00000 n 
+0000000242 00000 n 
+0000000484 00000 n 
+0000000580 00000 n 
+0000000629 00000 n 
+0000000774 00000 n 
+trailer <<
+  /Root 1 0 R
+  /Size 8
+  /ID [<2e68fbddcf3742fa64db89e66acd25d9><2e68fbddcf3742fa64db89e66acd25d9>]
+>>
+startxref
+809
+%%EOF