Commit 68e721981a1fe4f1398d811bb8ed99af0e100da8

Authored by Jay Berkenbilt
1 parent 696ca532

Add new QPDF::warn that takes most of QPDFExc's arguments

ChangeLog
1 1 2022-04-23 Jay Berkenbilt <ejb@ql.org>
2 2  
  3 + * Add a new QPDF::warn method that takes the parameters of
  4 + QPDFExc's constructor except for the filename, which is taken from
  5 + the QPDF object. This is a shorter way to issue warnings on behalf
  6 + of a QPDF object.
  7 +
3 8 * Add new method QUtil::is_explicit_utf8 that tests whether a
4 9 string is explicitly marked as being UTF-8 encoded, as allowed by
5 10 the PDF 2.0 spec. Such a string starts with the bytes 0xEF 0xBB
... ...
... ... @@ -21,8 +21,6 @@ Misc
21 21 --show-encryption could potentially retry with this option if the
22 22 first time doesn't work. Then, with the file open, we can read the
23 23 encryption dictionary normally.
24   -* Have a warn in QPDF that passes its variable arguments onto QPDFExc
25   - so you don't have to do warn(QPDFExc(...))
26 24 * Find all places in the code that write to std::cout, std::err,
27 25 stdout, or stderr to make sure they obey default output stream
28 26 settings for QPDF and QPDFJob. This probably includes adding a
... ...
include/qpdf/QPDF.hh
... ... @@ -262,6 +262,16 @@ class QPDF
262 262 // rules, and it will be available with getWarnings().
263 263 QPDF_DLL
264 264 void warn(QPDFExc const& e);
  265 + // Same as above but creates the QPDFExc object using the
  266 + // arguments passed to warn. The filename argument to QPDFExc is
  267 + // omitted. This method uses the filename associated with the QPDF
  268 + // object.
  269 + QPDF_DLL
  270 + void warn(
  271 + qpdf_error_code_e error_code,
  272 + std::string const& object,
  273 + qpdf_offset_t offset,
  274 + std::string const& message);
265 275  
266 276 QPDF_DLL
267 277 std::string getFilename() const;
... ...
libqpdf/NNTree.cc
... ... @@ -18,14 +18,7 @@ get_description(QPDFObjectHandle&amp; node)
18 18 static void
19 19 warn(QPDF& qpdf, QPDFObjectHandle& node, std::string const& msg)
20 20 {
21   - qpdf.warn(
22   - // line-break
23   - QPDFExc(
24   - qpdf_e_damaged_pdf,
25   - qpdf.getFilename(),
26   - get_description(node),
27   - 0,
28   - msg));
  21 + qpdf.warn(qpdf_e_damaged_pdf, get_description(node), 0, msg);
29 22 }
30 23  
31 24 static void
... ...
libqpdf/QPDF.cc
... ... @@ -459,12 +459,7 @@ QPDF::parse(char const* password)
459 459 PatternFinder hf(*this, &QPDF::findHeader);
460 460 if (!this->m->file->findFirst("%PDF-", 0, 1024, hf)) {
461 461 QTC::TC("qpdf", "QPDF not a pdf file");
462   - warn(QPDFExc(
463   - qpdf_e_damaged_pdf,
464   - this->m->file->getName(),
465   - "",
466   - 0,
467   - "can't find PDF header"));
  462 + warn(qpdf_e_damaged_pdf, "", 0, "can't find PDF header");
468 463 // QPDFWriter writes files that usually require at least
469 464 // version 1.2 for /FlateDecode
470 465 this->m->pdf_version = "1.2";
... ... @@ -542,6 +537,16 @@ QPDF::warn(QPDFExc const&amp; e)
542 537 }
543 538  
544 539 void
  540 +QPDF::warn(
  541 + qpdf_error_code_e error_code,
  542 + std::string const& object,
  543 + qpdf_offset_t offset,
  544 + std::string const& message)
  545 +{
  546 + warn(QPDFExc(error_code, this->getFilename(), object, offset, message));
  547 +}
  548 +
  549 +void
545 550 QPDF::setTrailer(QPDFObjectHandle obj)
546 551 {
547 552 if (this->m->trailer.isInitialized()) {
... ... @@ -562,19 +567,13 @@ QPDF::reconstruct_xref(QPDFExc&amp; e)
562 567  
563 568 this->m->reconstructed_xref = true;
564 569  
565   - warn(QPDFExc(
566   - qpdf_e_damaged_pdf,
567   - this->m->file->getName(),
568   - "",
569   - 0,
570   - "file is damaged"));
  570 + warn(qpdf_e_damaged_pdf, "", 0, "file is damaged");
571 571 warn(e);
572   - warn(QPDFExc(
  572 + warn(
573 573 qpdf_e_damaged_pdf,
574   - this->m->file->getName(),
575 574 "",
576 575 0,
577   - "Attempting to reconstruct cross-reference table"));
  576 + "Attempting to reconstruct cross-reference table");
578 577  
579 578 // Delete all references to type 1 (uncompressed) objects
580 579 std::set<QPDFObjGen> to_delete;
... ... @@ -700,12 +699,11 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
700 699 if ((strncmp(buf, "xref", 4) == 0) && QUtil::is_space(buf[4])) {
701 700 if (skipped_space) {
702 701 QTC::TC("qpdf", "QPDF xref skipped space");
703   - warn(QPDFExc(
  702 + warn(
704 703 qpdf_e_damaged_pdf,
705   - this->m->file->getName(),
706 704 "",
707 705 0,
708   - "extraneous whitespace seen before xref"));
  706 + "extraneous whitespace seen before xref");
709 707 }
710 708 QTC::TC(
711 709 "qpdf",
... ... @@ -753,15 +751,14 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
753 751 }
754 752 if ((size < 1) || (size - 1 != max_obj)) {
755 753 QTC::TC("qpdf", "QPDF xref size mismatch");
756   - warn(QPDFExc(
  754 + warn(
757 755 qpdf_e_damaged_pdf,
758   - this->m->file->getName(),
759 756 "",
760 757 0,
761 758 (std::string("reported number of objects (") +
762 759 QUtil::int_to_string(size) +
763 760 ") is not one plus the highest object number (" +
764   - QUtil::int_to_string(max_obj) + ")")));
  761 + QUtil::int_to_string(max_obj) + ")"));
765 762 }
766 763  
767 764 // We no longer need the deleted_objects table, so go ahead and
... ... @@ -885,12 +882,11 @@ QPDF::parse_xrefEntry(
885 882 }
886 883  
887 884 if (invalid) {
888   - warn(QPDFExc(
  885 + warn(
889 886 qpdf_e_damaged_pdf,
890   - this->m->file->getName(),
891 887 "xref table",
892 888 this->m->file->getLastOffset(),
893   - "accepting invalid xref table entry"));
  889 + "accepting invalid xref table entry");
894 890 }
895 891  
896 892 f1 = QUtil::string_to_ll(f1_str.c_str());
... ... @@ -1813,12 +1809,11 @@ QPDF::readObjectAtOffset(
1813 1809 // ignore these.
1814 1810 if (offset == 0) {
1815 1811 QTC::TC("qpdf", "QPDF bogus 0 offset", 0);
1816   - warn(QPDFExc(
  1812 + warn(
1817 1813 qpdf_e_damaged_pdf,
1818   - this->m->file->getName(),
1819 1814 this->m->last_object_description,
1820 1815 0,
1821   - "object has offset 0"));
  1816 + "object has offset 0");
1822 1817 return QPDFObjectHandle::newNull();
1823 1818 }
1824 1819  
... ... @@ -1898,16 +1893,15 @@ QPDF::readObjectAtOffset(
1898 1893 return result;
1899 1894 } else {
1900 1895 QTC::TC("qpdf", "QPDF object gone after xref reconstruction");
1901   - warn(QPDFExc(
  1896 + warn(
1902 1897 qpdf_e_damaged_pdf,
1903   - this->m->file->getName(),
1904 1898 "",
1905 1899 0,
1906 1900 std::string(
1907 1901 "object " + QUtil::int_to_string(exp_objid) + " " +
1908 1902 QUtil::int_to_string(exp_generation) +
1909 1903 " not found in file after regenerating"
1910   - " cross reference table")));
  1904 + " cross reference table"));
1911 1905 return QPDFObjectHandle::newNull();
1912 1906 }
1913 1907 } else {
... ... @@ -1921,12 +1915,11 @@ QPDF::readObjectAtOffset(
1921 1915 if (!(readToken(this->m->file) ==
1922 1916 QPDFTokenizer::Token(QPDFTokenizer::tt_word, "endobj"))) {
1923 1917 QTC::TC("qpdf", "QPDF err expected endobj");
1924   - warn(QPDFExc(
  1918 + warn(
1925 1919 qpdf_e_damaged_pdf,
1926   - this->m->file->getName(),
1927 1920 this->m->last_object_description,
1928 1921 this->m->file->getLastOffset(),
1929   - "expected endobj"));
  1922 + "expected endobj");
1930 1923 }
1931 1924  
1932 1925 QPDFObjGen og(objid, generation);
... ... @@ -2005,13 +1998,12 @@ QPDF::resolve(int objid, int generation)
2005 1998 // indirectly in some key that has to be resolved during
2006 1999 // object parsing, such as stream length.
2007 2000 QTC::TC("qpdf", "QPDF recursion loop in resolve");
2008   - warn(QPDFExc(
  2001 + warn(
2009 2002 qpdf_e_damaged_pdf,
2010   - this->m->file->getName(),
2011 2003 "",
2012 2004 this->m->file->getLastOffset(),
2013 2005 ("loop detected resolving object " + QUtil::int_to_string(objid) +
2014   - " " + QUtil::int_to_string(generation))));
  2006 + " " + QUtil::int_to_string(generation)));
2015 2007 return std::shared_ptr<QPDFObject>(new QPDF_Null);
2016 2008 }
2017 2009 ResolveRecorder rr(this, og);
... ... @@ -2054,14 +2046,13 @@ QPDF::resolve(int objid, int generation)
2054 2046 } catch (QPDFExc& e) {
2055 2047 warn(e);
2056 2048 } catch (std::exception& e) {
2057   - warn(QPDFExc(
  2049 + warn(
2058 2050 qpdf_e_damaged_pdf,
2059   - this->m->file->getName(),
2060 2051 "",
2061 2052 0,
2062 2053 ("object " + QUtil::int_to_string(objid) + "/" +
2063 2054 QUtil::int_to_string(generation) +
2064   - ": error reading object: " + e.what())));
  2055 + ": error reading object: " + e.what()));
2065 2056 }
2066 2057 }
2067 2058 if (this->m->obj_cache.count(og) == 0) {
... ... @@ -2112,13 +2103,12 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
2112 2103 QPDFObjectHandle dict = obj_stream.getDict();
2113 2104 if (!dict.isDictionaryOfType("/ObjStm")) {
2114 2105 QTC::TC("qpdf", "QPDF ERR object stream with wrong type");
2115   - warn(QPDFExc(
  2106 + warn(
2116 2107 qpdf_e_damaged_pdf,
2117   - this->m->file->getName(),
2118 2108 this->m->last_object_description,
2119 2109 this->m->file->getLastOffset(),
2120 2110 ("supposed object stream " +
2121   - QUtil::int_to_string(obj_stream_number) + " has wrong type")));
  2111 + QUtil::int_to_string(obj_stream_number) + " has wrong type"));
2122 2112 }
2123 2113  
2124 2114 if (!(dict.getKey("/N").isInteger() && dict.getKey("/First").isInteger())) {
... ...
libqpdf/QPDFJob.cc
... ... @@ -2677,14 +2677,13 @@ QPDFJob::handlePageSpecs(
2677 2677 *other_afdh,
2678 2678 &referenced_fields);
2679 2679 } catch (std::exception& e) {
2680   - pdf.warn(QPDFExc(
  2680 + pdf.warn(
2681 2681 qpdf_e_damaged_pdf,
2682   - pdf.getFilename(),
2683 2682 "",
2684 2683 0,
2685 2684 ("Exception caught while fixing copied"
2686 2685 " annotations. This may be a qpdf bug. " +
2687   - std::string("Exception: ") + e.what())));
  2686 + std::string("Exception: ") + e.what()));
2688 2687 }
2689 2688 }
2690 2689 }
... ... @@ -3127,14 +3126,13 @@ QPDFJob::doSplitPages(QPDF&amp; pdf, bool&amp; warnings)
3127 3126 try {
3128 3127 out_afdh->fixCopiedAnnotations(new_page, page, afdh);
3129 3128 } catch (std::exception& e) {
3130   - pdf.warn(QPDFExc(
  3129 + pdf.warn(
3131 3130 qpdf_e_damaged_pdf,
3132   - pdf.getFilename(),
3133 3131 "",
3134 3132 0,
3135   - "Exception caught while fixing copied"
3136   - " annotations. This may be a qpdf bug." +
3137   - std::string("Exception: ") + e.what()));
  3133 + ("Exception caught while fixing copied"
  3134 + " annotations. This may be a qpdf bug." +
  3135 + std::string("Exception: ") + e.what()));
3138 3136 }
3139 3137 }
3140 3138 }
... ...
libqpdf/QPDFObjectHandle.cc
... ... @@ -1556,8 +1556,9 @@ QPDFObjectHandle::arrayOrStreamToStreamArray(
1556 1556 item.getOwningQPDF(),
1557 1557 QPDFExc(
1558 1558 qpdf_e_damaged_pdf,
1559   - description,
1560   - "item index " + QUtil::int_to_string(i) + " (from 0)",
  1559 + "",
  1560 + description + ": item index " +
  1561 + QUtil::int_to_string(i) + " (from 0)",
1561 1562 0,
1562 1563 "ignoring non-stream in an array of streams"));
1563 1564 }
... ...
libqpdf/QPDF_Stream.cc
... ... @@ -302,12 +302,10 @@ QPDF_Stream::filterable(
302 302  
303 303 if (!filters_okay) {
304 304 QTC::TC("qpdf", "QPDF_Stream invalid filter");
305   - warn(QPDFExc(
  305 + warn(
306 306 qpdf_e_damaged_pdf,
307   - qpdf->getFilename(),
308   - "",
309 307 this->offset,
310   - "stream filter type is not name or array"));
  308 + "stream filter type is not name or array");
311 309 return false;
312 310 }
313 311  
... ... @@ -355,13 +353,11 @@ QPDF_Stream::filterable(
355 353 // one case of a file whose /DecodeParms was [ << >> ] when
356 354 // /Filters was empty has been seen in the wild.
357 355 if ((filters.size() != 0) && (decode_parms.size() != filters.size())) {
358   - warn(QPDFExc(
  356 + warn(
359 357 qpdf_e_damaged_pdf,
360   - qpdf->getFilename(),
361   - "",
362 358 this->offset,
363 359 "stream /DecodeParms length is"
364   - " inconsistent with filters"));
  360 + " inconsistent with filters");
365 361 filterable = false;
366 362 }
367 363  
... ... @@ -474,12 +470,7 @@ QPDF_Stream::pipeStreamData(
474 470 Pl_Flate* flate = dynamic_cast<Pl_Flate*>(pipeline);
475 471 if (flate != nullptr) {
476 472 flate->setWarnCallback([this](char const* msg, int code) {
477   - warn(QPDFExc(
478   - qpdf_e_damaged_pdf,
479   - qpdf->getFilename(),
480   - "",
481   - this->offset,
482   - msg));
  473 + warn(qpdf_e_damaged_pdf, this->offset, msg);
483 474 });
484 475 }
485 476 }
... ... @@ -551,34 +542,28 @@ QPDF_Stream::pipeStreamData(
551 542  
552 543 if (filter && (!suppress_warnings) && normalizer.get() &&
553 544 normalizer->anyBadTokens()) {
554   - warn(QPDFExc(
  545 + warn(
555 546 qpdf_e_damaged_pdf,
556   - qpdf->getFilename(),
557   - "",
558 547 this->offset,
559   - "content normalization encountered bad tokens"));
  548 + "content normalization encountered bad tokens");
560 549 if (normalizer->lastTokenWasBad()) {
561 550 QTC::TC("qpdf", "QPDF_Stream bad token at end during normalize");
562   - warn(QPDFExc(
  551 + warn(
563 552 qpdf_e_damaged_pdf,
564   - qpdf->getFilename(),
565   - "",
566 553 this->offset,
567 554 "normalized content ended with a bad token;"
568 555 " you may be able to resolve this by"
569 556 " coalescing content streams in combination"
570 557 " with normalizing content. From the command"
571   - " line, specify --coalesce-contents"));
  558 + " line, specify --coalesce-contents");
572 559 }
573   - warn(QPDFExc(
  560 + warn(
574 561 qpdf_e_damaged_pdf,
575   - qpdf->getFilename(),
576   - "",
577 562 this->offset,
578 563 "Resulting stream data may be corrupted but is"
579 564 " may still useful for manual inspection."
580 565 " For more information on this warning, search"
581   - " for content normalization in the manual."));
  566 + " for content normalization in the manual.");
582 567 }
583 568  
584 569 return success;
... ... @@ -645,7 +630,10 @@ QPDF_Stream::replaceDict(QPDFObjectHandle new_dict)
645 630 }
646 631  
647 632 void
648   -QPDF_Stream::warn(QPDFExc const& e)
  633 +QPDF_Stream::warn(
  634 + qpdf_error_code_e error_code,
  635 + qpdf_offset_t offset,
  636 + std::string const& message)
649 637 {
650   - this->qpdf->warn(e);
  638 + this->qpdf->warn(error_code, "", offset, message);
651 639 }
... ...
libqpdf/QPDF_encryption.cc
... ... @@ -803,12 +803,11 @@ QPDF::initializeEncryption()
803 803 // Treating a missing ID as the empty string enables qpdf to
804 804 // decrypt some invalid encrypted files with no /ID that
805 805 // poppler can read but Adobe Reader can't.
806   - warn(QPDFExc(
  806 + warn(
807 807 qpdf_e_damaged_pdf,
808   - this->m->file->getName(),
809 808 "trailer",
810 809 this->m->file->getLastOffset(),
811   - "invalid /ID in trailer dictionary"));
  810 + "invalid /ID in trailer dictionary");
812 811 }
813 812  
814 813 QPDFObjectHandle encryption_dict = this->m->trailer.getKey("/Encrypt");
... ... @@ -831,13 +830,12 @@ QPDF::initializeEncryption()
831 830 "unsupported encryption filter");
832 831 }
833 832 if (!encryption_dict.getKey("/SubFilter").isNull()) {
834   - warn(QPDFExc(
  833 + warn(
835 834 qpdf_e_unsupported,
836   - this->m->file->getName(),
837 835 "encryption dictionary",
838 836 this->m->file->getLastOffset(),
839 837 "file uses encryption SubFilters,"
840   - " which qpdf does not support"));
  838 + " which qpdf does not support");
841 839 }
842 840  
843 841 if (!(encryption_dict.getKey("/V").isInteger() &&
... ... @@ -1067,13 +1065,12 @@ QPDF::initializeEncryption()
1067 1065 this->m->encp->encryption_key = recover_encryption_key_with_password(
1068 1066 this->m->encp->provided_password, data, perms_valid);
1069 1067 if (!perms_valid) {
1070   - warn(QPDFExc(
  1068 + warn(
1071 1069 qpdf_e_damaged_pdf,
1072   - this->m->file->getName(),
1073 1070 "encryption dictionary",
1074 1071 this->m->file->getLastOffset(),
1075 1072 "/Perms field in encryption dictionary"
1076   - " doesn't match expected value"));
  1073 + " doesn't match expected value");
1077 1074 }
1078 1075 }
1079 1076 }
... ... @@ -1130,14 +1127,13 @@ QPDF::decryptString(std::string&amp; str, int objid, int generation)
1130 1127 break;
1131 1128  
1132 1129 default:
1133   - warn(QPDFExc(
  1130 + warn(
1134 1131 qpdf_e_damaged_pdf,
1135   - this->m->file->getName(),
1136 1132 this->m->last_object_description,
1137 1133 this->m->file->getLastOffset(),
1138 1134 "unknown encryption filter for strings"
1139 1135 " (check /StrF in /Encrypt dictionary);"
1140   - " strings may be decrypted improperly"));
  1136 + " strings may be decrypted improperly");
1141 1137 // To avoid repeated warnings, reset cf_string. Assume
1142 1138 // we'd want to use AES if V == 4.
1143 1139 this->m->encp->cf_string = e_aes;
... ...
libqpdf/QPDF_optimization.cc
... ... @@ -266,15 +266,14 @@ QPDF::pushInheritedAttributesToPageInternal(
266 266 "Pages object",
267 267 cur_pages.getObjectID(),
268 268 cur_pages.getGeneration());
269   - warn(QPDFExc(
  269 + warn(
270 270 qpdf_e_pages,
271   - this->m->file->getName(),
272 271 this->m->last_object_description,
273 272 0,
274   - "Unknown key " + key +
275   - " in /Pages object"
276   - " is being discarded as a result of"
277   - " flattening the /Pages tree"));
  273 + ("Unknown key " + key +
  274 + " in /Pages object"
  275 + " is being discarded as a result of"
  276 + " flattening the /Pages tree"));
278 277 }
279 278 }
280 279 }
... ...
libqpdf/QPDF_pages.cc
... ... @@ -130,12 +130,11 @@ QPDF::getAllPagesInternal(
130 130 }
131 131  
132 132 if (!cur_node.isDictionaryOfType(wanted_type)) {
133   - warn(QPDFExc(
  133 + warn(
134 134 qpdf_e_damaged_pdf,
135   - this->m->file->getName(),
136 135 "page tree node",
137 136 this->m->file->getLastOffset(),
138   - "/Type key should be " + wanted_type + " but is not; overriding"));
  137 + "/Type key should be " + wanted_type + " but is not; overriding");
139 138 cur_node.replaceKey("/Type", QPDFObjectHandle::newName(wanted_type));
140 139 }
141 140 visited.erase(this_og);
... ...
libqpdf/qpdf/QPDF_Stream.hh
... ... @@ -90,7 +90,10 @@ class QPDF_Stream: public QPDFObject
90 90 std::vector<std::shared_ptr<QPDFStreamFilter>>& filters,
91 91 bool& specialized_compression,
92 92 bool& lossy_compression);
93   - void warn(QPDFExc const& e);
  93 + void warn(
  94 + qpdf_error_code_e error_code,
  95 + qpdf_offset_t offset,
  96 + std::string const& message);
94 97 void setDictDescription();
95 98 void setStreamDescription();
96 99  
... ...
qpdf/qtest/qpdf/split-content-stream-errors.out
... ... @@ -4,7 +4,7 @@ File is not encrypted
4 4 File is not linearized
5 5 WARNING: split-content-stream-errors.pdf (offset 557): error decoding stream data for object 6 0: LZWDecoder: bad code received
6 6 WARNING: split-content-stream-errors.pdf (offset 557): stream will be re-processed without filtering to avoid data loss
7   -WARNING: page object 3 0 (item index 0 (from 0)): ignoring non-stream in an array of streams
  7 +WARNING: page object 3 0: item index 0 (from 0): ignoring non-stream in an array of streams
8 8 WARNING: split-content-stream-errors.pdf (offset 557): error decoding stream data for object 6 0: LZWDecoder: bad code received
9 9 ERROR: page 1: content stream (content stream object 6 0): errors while decoding content stream
10 10 qpdf: errors detected
... ...