Commit 83897e8789acc07c7bc72f24ab1aff7ababaca66

Authored by m-holger
1 parent 9f0cc086

Split QPDF.cc into QPDF.cc and QPDF_objects.cc

Move methods responsible for loading or keeping track of objects to
QPDF_objects.cc.
libqpdf/CMakeLists.txt
@@ -107,6 +107,7 @@ set(libqpdf_SOURCES @@ -107,6 +107,7 @@ set(libqpdf_SOURCES
107 QPDF_encryption.cc 107 QPDF_encryption.cc
108 QPDF_json.cc 108 QPDF_json.cc
109 QPDF_linearization.cc 109 QPDF_linearization.cc
  110 + QPDF_objects.cc
110 QPDF_optimization.cc 111 QPDF_optimization.cc
111 QPDF_pages.cc 112 QPDF_pages.cc
112 QTC.cc 113 QTC.cc
libqpdf/QPDF.cc
@@ -2,10 +2,8 @@ @@ -2,10 +2,8 @@
2 2
3 #include <qpdf/QPDF_private.hh> 3 #include <qpdf/QPDF_private.hh>
4 4
5 -#include <array>  
6 #include <atomic> 5 #include <atomic>
7 #include <cstring> 6 #include <cstring>
8 -#include <limits>  
9 #include <map> 7 #include <map>
10 #include <regex> 8 #include <regex>
11 #include <sstream> 9 #include <sstream>
@@ -409,17 +407,6 @@ QPDF::findHeader() @@ -409,17 +407,6 @@ QPDF::findHeader()
409 return valid; 407 return valid;
410 } 408 }
411 409
412 -bool  
413 -QPDF::findStartxref()  
414 -{  
415 - if (readToken(*m->file).isWord("startxref") && readToken(*m->file).isInteger()) {  
416 - // Position in front of offset token  
417 - m->file->seek(m->file->getLastOffset(), SEEK_SET);  
418 - return true;  
419 - }  
420 - return false;  
421 -}  
422 -  
423 void 410 void
424 QPDF::parse(char const* password) 411 QPDF::parse(char const* password)
425 { 412 {
@@ -464,1019 +451,18 @@ QPDF::warn(QPDFExc const&amp; e) @@ -464,1019 +451,18 @@ QPDF::warn(QPDFExc const&amp; e)
464 } 451 }
465 m->warnings.push_back(e); 452 m->warnings.push_back(e);
466 if (!m->suppress_warnings) { 453 if (!m->suppress_warnings) {
467 - *m->log->getWarn() << "WARNING: " << m->warnings.back().what() << "\n";  
468 - }  
469 -}  
470 -  
471 -void  
472 -QPDF::warn(  
473 - qpdf_error_code_e error_code,  
474 - std::string const& object,  
475 - qpdf_offset_t offset,  
476 - std::string const& message)  
477 -{  
478 - warn(QPDFExc(error_code, getFilename(), object, offset, message));  
479 -}  
480 -  
481 -void  
482 -QPDF::Xref_table::initialize_empty()  
483 -{  
484 - initialized_ = true;  
485 - trailer_ = QPDFObjectHandle::newDictionary();  
486 - auto rt = qpdf.makeIndirectObject(QPDFObjectHandle::newDictionary());  
487 - auto pgs = qpdf.makeIndirectObject(QPDFObjectHandle::newDictionary());  
488 - pgs.replaceKey("/Type", QPDFObjectHandle::newName("/Pages"));  
489 - pgs.replaceKey("/Kids", QPDFObjectHandle::newArray());  
490 - pgs.replaceKey("/Count", QPDFObjectHandle::newInteger(0));  
491 - rt.replaceKey("/Type", QPDFObjectHandle::newName("/Catalog"));  
492 - rt.replaceKey("/Pages", pgs);  
493 - trailer_.replaceKey("/Root", rt);  
494 - trailer_.replaceKey("/Size", QPDFObjectHandle::newInteger(3));  
495 -}  
496 -  
497 -void  
498 -QPDF::Xref_table::initialize_json()  
499 -{  
500 - initialized_ = true;  
501 - table.resize(1);  
502 - trailer_ = QPDFObjectHandle::newDictionary();  
503 - trailer_.replaceKey("/Size", QPDFObjectHandle::newInteger(1));  
504 -}  
505 -  
506 -void  
507 -QPDF::Xref_table::initialize()  
508 -{  
509 - // PDF spec says %%EOF must be found within the last 1024 bytes of/ the file. We add an extra  
510 - // 30 characters to leave room for the startxref stuff.  
511 - file->seek(0, SEEK_END);  
512 - qpdf_offset_t end_offset = file->tell();  
513 - // Sanity check on object ids. All objects must appear in xref table / stream. In all realistic  
514 - // scenarios at least 3 bytes are required.  
515 - if (max_id_ > end_offset / 3) {  
516 - max_id_ = static_cast<int>(end_offset / 3);  
517 - }  
518 - qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0);  
519 - PatternFinder sf(qpdf, &QPDF::findStartxref);  
520 - qpdf_offset_t xref_offset = 0;  
521 - if (file->findLast("startxref", start_offset, 0, sf)) {  
522 - xref_offset = QUtil::string_to_ll(read_token().getValue().c_str());  
523 - }  
524 -  
525 - try {  
526 - if (xref_offset == 0) {  
527 - QTC::TC("qpdf", "QPDF can't find startxref");  
528 - throw damaged_pdf("can't find startxref");  
529 - }  
530 - try {  
531 - read(xref_offset);  
532 - } catch (QPDFExc&) {  
533 - throw;  
534 - } catch (std::exception& e) {  
535 - throw damaged_pdf(std::string("error reading xref: ") + e.what());  
536 - }  
537 - } catch (QPDFExc& e) {  
538 - if (attempt_recovery_) {  
539 - reconstruct(e);  
540 - QTC::TC("qpdf", "QPDF reconstructed xref table");  
541 - } else {  
542 - throw;  
543 - }  
544 - }  
545 -  
546 - initialized_ = true;  
547 -}  
548 -  
549 -void  
550 -QPDF::Xref_table::reconstruct(QPDFExc& e)  
551 -{  
552 - if (reconstructed_) {  
553 - // Avoid xref reconstruction infinite loops. This is getting very hard to reproduce because  
554 - // qpdf is throwing many fewer exceptions while parsing. Most situations are warnings now.  
555 - throw e;  
556 - }  
557 -  
558 - // If recovery generates more than 1000 warnings, the file is so severely damaged that there  
559 - // probably is no point trying to continue.  
560 - const auto max_warnings = qpdf.m->warnings.size() + 1000U;  
561 - auto check_warnings = [this, max_warnings]() {  
562 - if (qpdf.m->warnings.size() > max_warnings) {  
563 - throw damaged_pdf("too many errors while reconstructing cross-reference table");  
564 - }  
565 - };  
566 -  
567 - reconstructed_ = true;  
568 - // We may find more objects, which may contain dangling references.  
569 - qpdf.m->fixed_dangling_refs = false;  
570 -  
571 - warn_damaged("file is damaged");  
572 - qpdf.warn(e);  
573 - warn_damaged("Attempting to reconstruct cross-reference table");  
574 -  
575 - // Delete all references to type 1 (uncompressed) objects  
576 - for (auto& iter: table) {  
577 - if (iter.type() == 1) {  
578 - iter = {};  
579 - }  
580 - }  
581 -  
582 - std::vector<std::tuple<int, int, qpdf_offset_t>> objects;  
583 - std::vector<qpdf_offset_t> trailers;  
584 - int max_found = 0;  
585 -  
586 - file->seek(0, SEEK_END);  
587 - qpdf_offset_t eof = file->tell();  
588 - file->seek(0, SEEK_SET);  
589 - // Don't allow very long tokens here during recovery. All the interesting tokens are covered.  
590 - static size_t const MAX_LEN = 10;  
591 - while (file->tell() < eof) {  
592 - QPDFTokenizer::Token t1 = read_token(MAX_LEN);  
593 - qpdf_offset_t token_start = file->tell() - toO(t1.getValue().length());  
594 - if (t1.isInteger()) {  
595 - auto pos = file->tell();  
596 - QPDFTokenizer::Token t2 = read_token(MAX_LEN);  
597 - if (t2.isInteger() && read_token(MAX_LEN).isWord("obj")) {  
598 - int obj = QUtil::string_to_int(t1.getValue().c_str());  
599 - int gen = QUtil::string_to_int(t2.getValue().c_str());  
600 - if (obj <= max_id_) {  
601 - objects.emplace_back(obj, gen, token_start);  
602 - if (obj > max_found) {  
603 - max_found = obj;  
604 - }  
605 - } else {  
606 - warn_damaged("ignoring object with impossibly large id " + std::to_string(obj));  
607 - }  
608 - }  
609 - file->seek(pos, SEEK_SET);  
610 - } else if (!trailer_ && t1.isWord("trailer")) {  
611 - trailers.emplace_back(file->tell());  
612 - }  
613 - file->findAndSkipNextEOL();  
614 - }  
615 -  
616 - table.resize(toS(max_found) + 1);  
617 -  
618 - for (auto tr: trailers) {  
619 - file->seek(tr, SEEK_SET);  
620 - auto t = read_trailer();  
621 - if (!t.isDictionary()) {  
622 - // Oh well. It was worth a try.  
623 - } else {  
624 - trailer_ = t;  
625 - break;  
626 - }  
627 - check_warnings();  
628 - }  
629 -  
630 - auto rend = objects.rend();  
631 - for (auto it = objects.rbegin(); it != rend; it++) {  
632 - auto [obj, gen, token_start] = *it;  
633 - insert(obj, 1, token_start, gen);  
634 - check_warnings();  
635 - }  
636 -  
637 - if (!trailer_) {  
638 - qpdf_offset_t max_offset{0};  
639 - // If there are any xref streams, take the last one to appear.  
640 - int i = -1;  
641 - for (auto const& item: table) {  
642 - ++i;  
643 - if (item.type() != 1) {  
644 - continue;  
645 - }  
646 - auto oh = qpdf.getObject(i, item.gen());  
647 - try {  
648 - if (!oh.isStreamOfType("/XRef")) {  
649 - continue;  
650 - }  
651 - } catch (std::exception&) {  
652 - continue;  
653 - }  
654 - auto offset = item.offset();  
655 - if (offset > max_offset) {  
656 - max_offset = offset;  
657 - trailer_ = oh.getDict();  
658 - }  
659 - check_warnings();  
660 - }  
661 - if (max_offset > 0) {  
662 - try {  
663 - read(max_offset);  
664 - } catch (std::exception&) {  
665 - throw damaged_pdf(  
666 - "error decoding candidate xref stream while recovering damaged file");  
667 - }  
668 - QTC::TC("qpdf", "QPDF recover xref stream");  
669 - }  
670 - }  
671 -  
672 - if (!trailer_) {  
673 - // We could check the last encountered object to see if it was an xref stream. If so, we  
674 - // could try to get the trailer from there. This may make it possible to recover files with  
675 - // bad startxref pointers even when they have object streams.  
676 -  
677 - throw damaged_pdf("unable to find trailer dictionary while recovering damaged file");  
678 - }  
679 - if (table.empty()) {  
680 - // We cannot check for an empty xref table in parse because empty tables are valid when  
681 - // creating QPDF objects from JSON.  
682 - throw damaged_pdf("unable to find objects while recovering damaged file");  
683 - }  
684 - check_warnings();  
685 - if (!initialized_) {  
686 - initialized_ = true;  
687 - qpdf.getAllPages();  
688 - check_warnings();  
689 - if (qpdf.m->all_pages.empty()) {  
690 - initialized_ = false;  
691 - throw damaged_pdf("unable to find any pages while recovering damaged file");  
692 - }  
693 - }  
694 - // We could iterate through the objects looking for streams and try to find objects inside of  
695 - // them, but it's probably not worth the trouble. Acrobat can't recover files with any errors  
696 - // in an xref stream, and this would be a real long shot anyway. If we wanted to do anything  
697 - // that involved looking at stream contents, we'd also have to call initializeEncryption() here.  
698 - // It's safe to call it more than once.  
699 -}  
700 -  
701 -void  
702 -QPDF::Xref_table::read(qpdf_offset_t xref_offset)  
703 -{  
704 - std::map<int, int> free_table;  
705 - std::set<qpdf_offset_t> visited;  
706 - while (xref_offset) {  
707 - visited.insert(xref_offset);  
708 - char buf[7];  
709 - memset(buf, 0, sizeof(buf));  
710 - file->seek(xref_offset, SEEK_SET);  
711 - // Some files miss the mark a little with startxref. We could do a better job of searching  
712 - // in the neighborhood for something that looks like either an xref table or stream, but the  
713 - // simple heuristic of skipping whitespace can help with the xref table case and is harmless  
714 - // with the stream case.  
715 - bool done = false;  
716 - bool skipped_space = false;  
717 - while (!done) {  
718 - char ch;  
719 - if (1 == file->read(&ch, 1)) {  
720 - if (QUtil::is_space(ch)) {  
721 - skipped_space = true;  
722 - } else {  
723 - file->unreadCh(ch);  
724 - done = true;  
725 - }  
726 - } else {  
727 - QTC::TC("qpdf", "QPDF eof skipping spaces before xref", skipped_space ? 0 : 1);  
728 - done = true;  
729 - }  
730 - }  
731 -  
732 - file->read(buf, sizeof(buf) - 1);  
733 - // The PDF spec says xref must be followed by a line terminator, but files exist in the wild  
734 - // where it is terminated by arbitrary whitespace.  
735 - if ((strncmp(buf, "xref", 4) == 0) && QUtil::is_space(buf[4])) {  
736 - if (skipped_space) {  
737 - QTC::TC("qpdf", "QPDF xref skipped space");  
738 - warn_damaged("extraneous whitespace seen before xref");  
739 - }  
740 - QTC::TC(  
741 - "qpdf",  
742 - "QPDF xref space",  
743 - ((buf[4] == '\n') ? 0  
744 - : (buf[4] == '\r') ? 1  
745 - : (buf[4] == ' ') ? 2  
746 - : 9999));  
747 - int skip = 4;  
748 - // buf is null-terminated, and QUtil::is_space('\0') is false, so this won't overrun.  
749 - while (QUtil::is_space(buf[skip])) {  
750 - ++skip;  
751 - }  
752 - xref_offset = process_section(xref_offset + skip);  
753 - } else {  
754 - xref_offset = read_stream(xref_offset);  
755 - }  
756 - if (visited.count(xref_offset) != 0) {  
757 - QTC::TC("qpdf", "QPDF xref loop");  
758 - throw damaged_pdf("loop detected following xref tables");  
759 - }  
760 - }  
761 -  
762 - if (!trailer_) {  
763 - throw damaged_pdf("unable to find trailer while reading xref");  
764 - }  
765 - int size = trailer_.getKey("/Size").getIntValueAsInt();  
766 -  
767 - if (size < 3) {  
768 - throw damaged_pdf("too few objects - file can't have a page tree");  
769 - }  
770 -  
771 - // We are no longer reporting what the highest id in the xref table is. I don't think it adds  
772 - // anything. If we want to report more detail, we should report the total number of missing  
773 - // entries, including missing entries before the last actual entry.  
774 -}  
775 -  
776 -QPDF::Xref_table::Subsection  
777 -QPDF::Xref_table::subsection(std::string const& line)  
778 -{  
779 - auto terminate = [this]() -> void {  
780 - QTC::TC("qpdf", "QPDF invalid xref");  
781 - throw damaged_table("xref syntax invalid");  
782 - };  
783 -  
784 - // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated  
785 - // buffer.  
786 - char const* p = line.c_str();  
787 - char const* start = line.c_str();  
788 -  
789 - // Skip zero or more spaces  
790 - while (QUtil::is_space(*p)) {  
791 - ++p;  
792 - }  
793 - // Require digit  
794 - if (!QUtil::is_digit(*p)) {  
795 - terminate();  
796 - }  
797 - // Gather digits  
798 - std::string obj_str;  
799 - while (QUtil::is_digit(*p)) {  
800 - obj_str.append(1, *p++);  
801 - }  
802 - // Require space  
803 - if (!QUtil::is_space(*p)) {  
804 - terminate();  
805 - }  
806 - // Skip spaces  
807 - while (QUtil::is_space(*p)) {  
808 - ++p;  
809 - }  
810 - // Require digit  
811 - if (!QUtil::is_digit(*p)) {  
812 - terminate();  
813 - }  
814 - // Gather digits  
815 - std::string num_str;  
816 - while (QUtil::is_digit(*p)) {  
817 - num_str.append(1, *p++);  
818 - }  
819 - // Skip any space including line terminators  
820 - while (QUtil::is_space(*p)) {  
821 - ++p;  
822 - }  
823 - auto obj = QUtil::string_to_int(obj_str.c_str());  
824 - auto count = QUtil::string_to_int(num_str.c_str());  
825 - if (obj > max_id() || count > max_id() || (obj + count) > max_id()) {  
826 - throw damaged_table("xref table subsection header contains impossibly large entry");  
827 - }  
828 - return {obj, count, file->getLastOffset() + toI(p - start)};  
829 -}  
830 -  
831 -std::vector<QPDF::Xref_table::Subsection>  
832 -QPDF::Xref_table::bad_subsections(std::string& line, qpdf_offset_t start)  
833 -{  
834 - std::vector<QPDF::Xref_table::Subsection> result;  
835 - file->seek(start, SEEK_SET);  
836 -  
837 - while (true) {  
838 - line.assign(50, '\0');  
839 - file->read(line.data(), line.size());  
840 - auto [obj, num, offset] = result.emplace_back(subsection(line));  
841 - file->seek(offset, SEEK_SET);  
842 - for (qpdf_offset_t i = obj; i - num < obj; ++i) {  
843 - if (!std::get<0>(read_entry())) {  
844 - QTC::TC("qpdf", "QPDF invalid xref entry");  
845 - throw damaged_table("invalid xref entry (obj=" + std::to_string(i) + ")");  
846 - }  
847 - }  
848 - qpdf_offset_t pos = file->tell();  
849 - if (read_token().isWord("trailer")) {  
850 - return result;  
851 - } else {  
852 - file->seek(pos, SEEK_SET);  
853 - }  
854 - }  
855 -}  
856 -  
857 -// Optimistically read and parse all subsection headers. If an error is encountered return the  
858 -// result of bad_subsections.  
859 -std::vector<QPDF::Xref_table::Subsection>  
860 -QPDF::Xref_table::subsections(std::string& line)  
861 -{  
862 - auto recovery_offset = file->tell();  
863 - try {  
864 - std::vector<QPDF::Xref_table::Subsection> result;  
865 -  
866 - while (true) {  
867 - line.assign(50, '\0');  
868 - file->read(line.data(), line.size());  
869 - auto& sub = result.emplace_back(subsection(line));  
870 - auto count = std::get<1>(sub);  
871 - auto offset = std::get<2>(sub);  
872 - file->seek(offset + 20 * toO(count) - 1, SEEK_SET);  
873 - file->read(line.data(), 1);  
874 - if (!(line[0] == '\n' || line[0] == '\n')) {  
875 - return bad_subsections(line, recovery_offset);  
876 - }  
877 - qpdf_offset_t pos = file->tell();  
878 - if (read_token().isWord("trailer")) {  
879 - return result;  
880 - } else {  
881 - file->seek(pos, SEEK_SET);  
882 - }  
883 - }  
884 - } catch (...) {  
885 - return bad_subsections(line, recovery_offset);  
886 - }  
887 -}  
888 -  
889 -// Returns (success, f1, f2, type).  
890 -std::tuple<bool, qpdf_offset_t, int, char>  
891 -QPDF::Xref_table::read_bad_entry()  
892 -{  
893 - qpdf_offset_t f1{0};  
894 - int f2{0};  
895 - char type{'\0'};  
896 - // Reposition after initial read attempt and reread.  
897 - file->seek(file->getLastOffset(), SEEK_SET);  
898 - auto line = file->readLine(30);  
899 -  
900 - // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated  
901 - // buffer.  
902 - char const* p = line.data();  
903 -  
904 - // Skip zero or more spaces. There aren't supposed to be any.  
905 - bool invalid = false;  
906 - while (QUtil::is_space(*p)) {  
907 - ++p;  
908 - QTC::TC("qpdf", "QPDF ignore first space in xref entry");  
909 - invalid = true;  
910 - }  
911 - // Require digit  
912 - if (!QUtil::is_digit(*p)) {  
913 - return {false, 0, 0, '\0'};  
914 - }  
915 - // Gather digits  
916 - std::string f1_str;  
917 - while (QUtil::is_digit(*p)) {  
918 - f1_str.append(1, *p++);  
919 - }  
920 - // Require space  
921 - if (!QUtil::is_space(*p)) {  
922 - return {false, 0, 0, '\0'};  
923 - }  
924 - if (QUtil::is_space(*(p + 1))) {  
925 - QTC::TC("qpdf", "QPDF ignore first extra space in xref entry");  
926 - invalid = true;  
927 - }  
928 - // Skip spaces  
929 - while (QUtil::is_space(*p)) {  
930 - ++p;  
931 - }  
932 - // Require digit  
933 - if (!QUtil::is_digit(*p)) {  
934 - return {false, 0, 0, '\0'};  
935 - }  
936 - // Gather digits  
937 - std::string f2_str;  
938 - while (QUtil::is_digit(*p)) {  
939 - f2_str.append(1, *p++);  
940 - }  
941 - // Require space  
942 - if (!QUtil::is_space(*p)) {  
943 - return {false, 0, 0, '\0'};  
944 - }  
945 - if (QUtil::is_space(*(p + 1))) {  
946 - QTC::TC("qpdf", "QPDF ignore second extra space in xref entry");  
947 - invalid = true;  
948 - }  
949 - // Skip spaces  
950 - while (QUtil::is_space(*p)) {  
951 - ++p;  
952 - }  
953 - if ((*p == 'f') || (*p == 'n')) {  
954 - type = *p;  
955 - } else {  
956 - return {false, 0, 0, '\0'};  
957 - }  
958 - if ((f1_str.length() != 10) || (f2_str.length() != 5)) {  
959 - QTC::TC("qpdf", "QPDF ignore length error xref entry");  
960 - invalid = true;  
961 - }  
962 -  
963 - if (invalid) {  
964 - qpdf.warn(damaged_table("accepting invalid xref table entry"));  
965 - }  
966 -  
967 - f1 = QUtil::string_to_ll(f1_str.c_str());  
968 - f2 = QUtil::string_to_int(f2_str.c_str());  
969 -  
970 - return {true, f1, f2, type};  
971 -}  
972 -  
973 -// Optimistically read and parse xref entry. If entry is bad, call read_bad_xrefEntry and return  
974 -// result. Returns (success, f1, f2, type).  
975 -std::tuple<bool, qpdf_offset_t, int, char>  
976 -QPDF::Xref_table::read_entry()  
977 -{  
978 - qpdf_offset_t f1{0};  
979 - int f2{0};  
980 - char type{'\0'};  
981 - std::array<char, 21> line;  
982 - f1 = 0;  
983 - f2 = 0;  
984 - if (file->read(line.data(), 20) != 20) {  
985 - // C++20: [[unlikely]]  
986 - return {false, 0, 0, '\0'};  
987 - }  
988 - line[20] = '\0';  
989 - char const* p = line.data();  
990 -  
991 - int f1_len = 0;  
992 - int f2_len = 0;  
993 -  
994 - // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated  
995 - // buffer.  
996 -  
997 - // Gather f1 digits. NB No risk of overflow as 9'999'999'999 < max long long.  
998 - while (*p == '0') {  
999 - ++f1_len;  
1000 - ++p;  
1001 - }  
1002 - while (QUtil::is_digit(*p) && f1_len++ < 10) {  
1003 - f1 *= 10;  
1004 - f1 += *p++ - '0';  
1005 - }  
1006 - // Require space  
1007 - if (!QUtil::is_space(*p++)) {  
1008 - // Entry doesn't start with space or digit.  
1009 - // C++20: [[unlikely]]  
1010 - return {false, 0, 0, '\0'};  
1011 - }  
1012 - // Gather digits. NB No risk of overflow as 99'999 < max int.  
1013 - while (*p == '0') {  
1014 - ++f2_len;  
1015 - ++p;  
1016 - }  
1017 - while (QUtil::is_digit(*p) && f2_len++ < 5) {  
1018 - f2 *= 10;  
1019 - f2 += static_cast<int>(*p++ - '0');  
1020 - }  
1021 - if (QUtil::is_space(*p++) && (*p == 'f' || *p == 'n')) {  
1022 - // C++20: [[likely]]  
1023 - type = *p;  
1024 - // No test for valid line[19].  
1025 - if (*(++p) && *(++p) && (*p == '\n' || *p == '\r') && f1_len == 10 && f2_len == 5) {  
1026 - // C++20: [[likely]]  
1027 - return {true, f1, f2, type};  
1028 - }  
1029 - }  
1030 - return read_bad_entry();  
1031 -}  
1032 -  
1033 -// Read a single cross-reference table section and associated trailer.  
1034 -qpdf_offset_t  
1035 -QPDF::Xref_table::process_section(qpdf_offset_t xref_offset)  
1036 -{  
1037 - file->seek(xref_offset, SEEK_SET);  
1038 - std::string line;  
1039 - auto subs = subsections(line);  
1040 -  
1041 - auto cur_trailer_offset = file->tell();  
1042 - auto cur_trailer = read_trailer();  
1043 - if (!cur_trailer.isDictionary()) {  
1044 - QTC::TC("qpdf", "QPDF missing trailer");  
1045 - throw qpdf.damagedPDF("", "expected trailer dictionary");  
1046 - }  
1047 -  
1048 - if (!trailer_) {  
1049 - unsigned int sz;  
1050 - trailer_ = cur_trailer;  
1051 -  
1052 - if (!trailer_.hasKey("/Size")) {  
1053 - QTC::TC("qpdf", "QPDF trailer lacks size");  
1054 - throw qpdf.damagedPDF("trailer", "trailer dictionary lacks /Size key");  
1055 - }  
1056 - if (!trailer_.getKey("/Size").getValueAsUInt(sz)) {  
1057 - QTC::TC("qpdf", "QPDF trailer size not integer");  
1058 - throw qpdf.damagedPDF("trailer", "/Size key in trailer dictionary is not an integer");  
1059 - }  
1060 - if (sz >= static_cast<unsigned int>(max_id_)) {  
1061 - QTC::TC("qpdf", "QPDF trailer size impossibly large");  
1062 - throw qpdf.damagedPDF("trailer", "/Size key in trailer dictionary is impossibly large");  
1063 - }  
1064 - table.resize(sz);  
1065 - }  
1066 -  
1067 - for (auto [obj, num, offset]: subs) {  
1068 - file->seek(offset, SEEK_SET);  
1069 - for (qpdf_offset_t i = obj; i - num < obj; ++i) {  
1070 - if (i == 0) {  
1071 - // This is needed by checkLinearization()  
1072 - first_item_offset_ = file->tell();  
1073 - }  
1074 - // For xref_table, these will always be small enough to be ints  
1075 - auto [success, f1, f2, type] = read_entry();  
1076 - if (!success) {  
1077 - throw damaged_table("invalid xref entry (obj=" + std::to_string(i) + ")");  
1078 - }  
1079 - if (type == 'f') {  
1080 - insert_free(QPDFObjGen(toI(i), f2));  
1081 - } else {  
1082 - insert(toI(i), 1, f1, f2);  
1083 - }  
1084 - }  
1085 - qpdf_offset_t pos = file->tell();  
1086 - if (read_token().isWord("trailer")) {  
1087 - break;  
1088 - } else {  
1089 - file->seek(pos, SEEK_SET);  
1090 - }  
1091 - }  
1092 -  
1093 - if (cur_trailer.hasKey("/XRefStm")) {  
1094 - if (ignore_streams_) {  
1095 - QTC::TC("qpdf", "QPDF ignoring XRefStm in trailer");  
1096 - } else {  
1097 - if (cur_trailer.getKey("/XRefStm").isInteger()) {  
1098 - // Read the xref stream but disregard any return value -- we'll use our trailer's  
1099 - // /Prev key instead of the xref stream's.  
1100 - (void)read_stream(cur_trailer.getKey("/XRefStm").getIntValue());  
1101 - } else {  
1102 - throw qpdf.damagedPDF("xref stream", cur_trailer_offset, "invalid /XRefStm");  
1103 - }  
1104 - }  
1105 - }  
1106 -  
1107 - if (cur_trailer.hasKey("/Prev")) {  
1108 - if (!cur_trailer.getKey("/Prev").isInteger()) {  
1109 - QTC::TC("qpdf", "QPDF trailer prev not integer");  
1110 - throw qpdf.damagedPDF(  
1111 - "trailer", cur_trailer_offset, "/Prev key in trailer dictionary is not an integer");  
1112 - }  
1113 - QTC::TC("qpdf", "QPDF prev key in trailer dictionary");  
1114 - return cur_trailer.getKey("/Prev").getIntValue();  
1115 - }  
1116 -  
1117 - return 0;  
1118 -}  
1119 -  
1120 -// Read a single cross-reference stream.  
1121 -qpdf_offset_t  
1122 -QPDF::Xref_table::read_stream(qpdf_offset_t xref_offset)  
1123 -{  
1124 - if (!ignore_streams_) {  
1125 - QPDFObjGen x_og;  
1126 - QPDFObjectHandle xref_obj;  
1127 - try {  
1128 - xref_obj = qpdf.readObjectAtOffset(  
1129 - false, xref_offset, "xref stream", QPDFObjGen(0, 0), x_og, true);  
1130 - } catch (QPDFExc&) {  
1131 - // ignore -- report error below  
1132 - }  
1133 - if (xref_obj.isStreamOfType("/XRef")) {  
1134 - QTC::TC("qpdf", "QPDF found xref stream");  
1135 - return process_stream(xref_offset, xref_obj);  
1136 - }  
1137 - }  
1138 -  
1139 - QTC::TC("qpdf", "QPDF can't find xref");  
1140 - throw qpdf.damagedPDF("", xref_offset, "xref not found");  
1141 - return 0; // unreachable  
1142 -}  
1143 -  
1144 -// Return the entry size of the xref stream and the processed W array.  
1145 -std::pair<int, std::array<int, 3>>  
1146 -QPDF::Xref_table::process_W(  
1147 - QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged)  
1148 -{  
1149 - auto W_obj = dict.getKey("/W");  
1150 - if (!(W_obj.isArray() && W_obj.getArrayNItems() >= 3 && W_obj.getArrayItem(0).isInteger() &&  
1151 - W_obj.getArrayItem(1).isInteger() && W_obj.getArrayItem(2).isInteger())) {  
1152 - throw damaged("Cross-reference stream does not have a proper /W key");  
1153 - }  
1154 -  
1155 - std::array<int, 3> W;  
1156 - int entry_size = 0;  
1157 - auto w_vector = W_obj.getArrayAsVector();  
1158 - int max_bytes = sizeof(qpdf_offset_t);  
1159 - for (size_t i = 0; i < 3; ++i) {  
1160 - W[i] = w_vector[i].getIntValueAsInt();  
1161 - if (W[i] > max_bytes) {  
1162 - throw damaged("Cross-reference stream's /W contains impossibly large values");  
1163 - }  
1164 - if (W[i] < 0) {  
1165 - throw damaged("Cross-reference stream's /W contains negative values");  
1166 - }  
1167 - entry_size += W[i];  
1168 - }  
1169 - if (entry_size == 0) {  
1170 - throw damaged("Cross-reference stream's /W indicates entry size of 0");  
1171 - }  
1172 - return {entry_size, W};  
1173 -}  
1174 -  
1175 -// Validate Size entry and return the maximum number of entries that the xref stream can contain and  
1176 -// the value of the Size entry.  
1177 -std::pair<int, size_t>  
1178 -QPDF::Xref_table::process_Size(  
1179 - QPDFObjectHandle& dict, int entry_size, std::function<QPDFExc(std::string_view)> damaged)  
1180 -{  
1181 - // Number of entries is limited by the highest possible object id and stream size.  
1182 - auto max_num_entries = std::numeric_limits<int>::max();  
1183 - if (max_num_entries > (std::numeric_limits<qpdf_offset_t>::max() / entry_size)) {  
1184 - max_num_entries = toI(std::numeric_limits<qpdf_offset_t>::max() / entry_size);  
1185 - }  
1186 -  
1187 - auto Size_obj = dict.getKey("/Size");  
1188 - long long size;  
1189 - if (!dict.getKey("/Size").getValueAsInt(size)) {  
1190 - throw damaged("Cross-reference stream does not have a proper /Size key");  
1191 - } else if (size < 0) {  
1192 - throw damaged("Cross-reference stream has a negative /Size key");  
1193 - } else if (size >= max_num_entries) {  
1194 - throw damaged("Cross-reference stream has an impossibly large /Size key");  
1195 - }  
1196 - // We are not validating that Size <= (Size key of parent xref / trailer).  
1197 - return {max_num_entries, toS(size)};  
1198 -}  
1199 -  
1200 -// Return the number of entries of the xref stream and the processed Index array.  
1201 -std::pair<int, std::vector<std::pair<int, int>>>  
1202 -QPDF::Xref_table::process_Index(  
1203 - QPDFObjectHandle& dict, int max_num_entries, std::function<QPDFExc(std::string_view)> damaged)  
1204 -{  
1205 - auto size = dict.getKey("/Size").getIntValueAsInt();  
1206 - auto Index_obj = dict.getKey("/Index");  
1207 -  
1208 - if (Index_obj.isArray()) {  
1209 - std::vector<std::pair<int, int>> indx;  
1210 - int num_entries = 0;  
1211 - auto index_vec = Index_obj.getArrayAsVector();  
1212 - if ((index_vec.size() % 2) || index_vec.size() < 2) {  
1213 - throw damaged("Cross-reference stream's /Index has an invalid number of values");  
1214 - }  
1215 -  
1216 - int i = 0;  
1217 - long long first = 0;  
1218 - for (auto& val: index_vec) {  
1219 - if (val.isInteger()) {  
1220 - if (i % 2) {  
1221 - auto count = val.getIntValue();  
1222 - if (count <= 0) {  
1223 - throw damaged(  
1224 - "Cross-reference stream section claims to contain " +  
1225 - std::to_string(count) + " entries");  
1226 - }  
1227 - // We are guarding against the possibility of num_entries * entry_size  
1228 - // overflowing. We are not checking that entries are in ascending order as  
1229 - // required by the spec, which probably should generate a warning. We are also  
1230 - // not checking that for each subsection first object number + number of entries  
1231 - // <= /Size. The spec requires us to ignore object number > /Size.  
1232 - if (first > (max_num_entries - count) ||  
1233 - count > (max_num_entries - num_entries)) {  
1234 - throw damaged(  
1235 - "Cross-reference stream claims to contain too many entries: " +  
1236 - std::to_string(first) + " " + std::to_string(max_num_entries) + " " +  
1237 - std::to_string(num_entries));  
1238 - }  
1239 - indx.emplace_back(static_cast<int>(first), static_cast<int>(count));  
1240 - num_entries += static_cast<int>(count);  
1241 - } else {  
1242 - first = val.getIntValue();  
1243 - if (first < 0) {  
1244 - throw damaged(  
1245 - "Cross-reference stream's /Index contains a negative object id");  
1246 - } else if (first > max_num_entries) {  
1247 - throw damaged("Cross-reference stream's /Index contains an impossibly "  
1248 - "large object id");  
1249 - }  
1250 - }  
1251 - } else {  
1252 - throw damaged(  
1253 - "Cross-reference stream's /Index's item " + std::to_string(i) +  
1254 - " is not an integer");  
1255 - }  
1256 - i++;  
1257 - }  
1258 - QTC::TC("qpdf", "QPDF xref /Index is array", index_vec.size() == 2 ? 0 : 1);  
1259 - return {num_entries, indx};  
1260 - } else if (Index_obj.isNull()) {  
1261 - QTC::TC("qpdf", "QPDF xref /Index is null");  
1262 - return {size, {{0, size}}};  
1263 - } else {  
1264 - throw damaged("Cross-reference stream does not have a proper /Index key");  
1265 - }  
1266 -}  
1267 -  
1268 -qpdf_offset_t  
1269 -QPDF::Xref_table::process_stream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj)  
1270 -{  
1271 - auto damaged = [this, xref_offset](std::string_view msg) -> QPDFExc {  
1272 - return qpdf.damagedPDF("xref stream", xref_offset, msg.data());  
1273 - };  
1274 -  
1275 - auto dict = xref_obj.getDict();  
1276 -  
1277 - auto [entry_size, W] = process_W(dict, damaged);  
1278 - auto [max_num_entries, size] = process_Size(dict, entry_size, damaged);  
1279 - auto [num_entries, indx] = process_Index(dict, max_num_entries, damaged);  
1280 -  
1281 - std::shared_ptr<Buffer> bp = xref_obj.getStreamData(qpdf_dl_specialized);  
1282 - size_t actual_size = bp->getSize();  
1283 - auto expected_size = toS(entry_size) * toS(num_entries);  
1284 -  
1285 - if (expected_size != actual_size) {  
1286 - QPDFExc x = damaged(  
1287 - "Cross-reference stream data has the wrong size; expected = " +  
1288 - std::to_string(expected_size) + "; actual = " + std::to_string(actual_size));  
1289 - if (expected_size > actual_size) {  
1290 - throw x;  
1291 - } else {  
1292 - qpdf.warn(x);  
1293 - }  
1294 - }  
1295 -  
1296 - if (!trailer_) {  
1297 - trailer_ = dict;  
1298 - if (size > toS(max_id_)) {  
1299 - throw damaged("Cross-reference stream /Size entry is impossibly large");  
1300 - }  
1301 - table.resize(size);  
1302 - }  
1303 -  
1304 - bool saw_first_compressed_object = false;  
1305 -  
1306 - // Actual size vs. expected size check above ensures that we will not overflow any buffers here.  
1307 - // We know that entry_size * num_entries is less or equal to the size of the buffer.  
1308 - auto p = bp->getBuffer();  
1309 - for (auto [obj, sec_entries]: indx) {  
1310 - // Process a subsection.  
1311 - for (int i = 0; i < sec_entries; ++i) {  
1312 - // Read this entry  
1313 - std::array<qpdf_offset_t, 3> fields{};  
1314 - if (W[0] == 0) {  
1315 - QTC::TC("qpdf", "QPDF default for xref stream field 0");  
1316 - fields[0] = 1;  
1317 - }  
1318 - for (size_t j = 0; j < 3; ++j) {  
1319 - for (int k = 0; k < W[j]; ++k) {  
1320 - fields[j] <<= 8;  
1321 - fields[j] |= *p++;  
1322 - }  
1323 - }  
1324 -  
1325 - // Get the generation number. The generation number is 0 unless this is an uncompressed  
1326 - // object record, in which case the generation number appears as the third field.  
1327 - if (saw_first_compressed_object) {  
1328 - if (fields[0] != 2) {  
1329 - uncompressed_after_compressed_ = true;  
1330 - }  
1331 - } else if (fields[0] == 2) {  
1332 - saw_first_compressed_object = true;  
1333 - }  
1334 - if (obj == 0) {  
1335 - // This is needed by checkLinearization()  
1336 - first_item_offset_ = xref_offset;  
1337 - } else if (fields[0] == 0) {  
1338 - // Ignore fields[2], which we don't care about in this case. This works around the  
1339 - // issue of some PDF files that put invalid values, like -1, here for deleted  
1340 - // objects.  
1341 - insert_free(QPDFObjGen(obj, 0));  
1342 - } else {  
1343 - insert(obj, toI(fields[0]), fields[1], toI(fields[2]));  
1344 - }  
1345 - ++obj;  
1346 - }  
1347 - }  
1348 -  
1349 - if (dict.hasKey("/Prev")) {  
1350 - if (!dict.getKey("/Prev").isInteger()) {  
1351 - throw qpdf.damagedPDF(  
1352 - "xref stream", "/Prev key in xref stream dictionary is not an integer");  
1353 - }  
1354 - QTC::TC("qpdf", "QPDF prev key in xref stream dictionary");  
1355 - return dict.getKey("/Prev").getIntValue();  
1356 - } else {  
1357 - return 0;  
1358 - }  
1359 -}  
1360 -  
1361 -void  
1362 -QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2)  
1363 -{  
1364 - // Populate the xref table in such a way that the first reference to an object that we see,  
1365 - // which is the one in the latest xref table in which it appears, is the one that gets stored.  
1366 - // This works because we are reading more recent appends before older ones.  
1367 -  
1368 - // If there is already an entry for this object and generation in the table, it means that a  
1369 - // later xref table has registered this object. Disregard this one.  
1370 -  
1371 - int new_gen = f0 == 2 ? 0 : f2;  
1372 -  
1373 - if (!(obj > 0 && static_cast<size_t>(obj) < table.size() && 0 <= f2 && new_gen < 65535)) {  
1374 - // We are ignoring invalid objgens. Most will arrive here from xref reconstruction. There  
1375 - // is probably no point having another warning but we could count invalid items in order to  
1376 - // decide when to give up.  
1377 - QTC::TC("qpdf", "QPDF xref overwrite invalid objgen");  
1378 - return;  
1379 - }  
1380 -  
1381 - auto& entry = table[static_cast<size_t>(obj)];  
1382 - auto old_type = entry.type();  
1383 -  
1384 - if (!old_type && entry.gen() > 0) {  
1385 - // At the moment we are processing the updates last to first and therefore the gen doesn't  
1386 - // matter as long as it > 0 to distinguish it from an uninitialized entry. This will need  
1387 - // to be revisited when we want to support incremental updates or more comprhensive  
1388 - // checking.  
1389 - QTC::TC("qpdf", "QPDF xref deleted object");  
1390 - return;  
1391 - }  
1392 -  
1393 - if (f0 == 2 && static_cast<int>(f1) == obj) {  
1394 - qpdf.warn(qpdf.damagedPDF(  
1395 - "xref stream", "self-referential object stream " + std::to_string(obj)));  
1396 - return;  
1397 - }  
1398 -  
1399 - if (old_type && entry.gen() >= new_gen) {  
1400 - QTC::TC("qpdf", "QPDF xref reused object");  
1401 - return;  
1402 - }  
1403 -  
1404 - switch (f0) {  
1405 - case 1:  
1406 - // f2 is generation  
1407 - QTC::TC("qpdf", "QPDF xref gen > 0", (f2 > 0) ? 1 : 0);  
1408 - entry = {f2, Uncompressed(f1)};  
1409 - break;  
1410 -  
1411 - case 2:  
1412 - entry = {0, Compressed(toI(f1), f2)};  
1413 - object_streams_ = true;  
1414 - break;  
1415 -  
1416 - default:  
1417 - throw qpdf.damagedPDF(  
1418 - "xref stream", "unknown xref stream entry type " + std::to_string(f0));  
1419 - break;  
1420 - }  
1421 -}  
1422 -  
1423 -void  
1424 -QPDF::Xref_table::insert_free(QPDFObjGen og)  
1425 -{  
1426 - // At the moment we are processing the updates last to first and therefore the gen doesn't  
1427 - // matter as long as it > 0 to distinguish it from an uninitialized entry. This will need to be  
1428 - // revisited when we want to support incremental updates or more comprhensive checking.  
1429 - if (og.getObj() < 1) {  
1430 - return;  
1431 - }  
1432 - size_t id = static_cast<size_t>(og.getObj());  
1433 - if (id < table.size() && !type(id)) {  
1434 - table[id] = {1, {}};  
1435 - }  
1436 -}  
1437 -  
1438 -QPDFObjGen  
1439 -QPDF::Xref_table::at_offset(qpdf_offset_t offset) const noexcept  
1440 -{  
1441 - int id = 0;  
1442 - int gen = 0;  
1443 - qpdf_offset_t start = 0;  
1444 -  
1445 - int i = 0;  
1446 - for (auto const& item: table) {  
1447 - auto o = item.offset();  
1448 - if (start < o && o <= offset) {  
1449 - start = o;  
1450 - id = i;  
1451 - gen = item.gen();  
1452 - }  
1453 - ++i; 454 + *m->log->getWarn() << "WARNING: " << m->warnings.back().what() << "\n";
1454 } 455 }
1455 - return QPDFObjGen(id, gen);  
1456 } 456 }
1457 457
1458 -std::map<QPDFObjGen, QPDFXRefEntry>  
1459 -QPDF::Xref_table::as_map() const  
1460 -{  
1461 - std::map<QPDFObjGen, QPDFXRefEntry> result;  
1462 - int i{0};  
1463 - for (auto const& item: table) {  
1464 - switch (item.type()) {  
1465 - case 0:  
1466 - break;  
1467 - case 1:  
1468 - result.emplace(QPDFObjGen(i, item.gen()), item.offset());  
1469 - break;  
1470 - case 2:  
1471 - result.emplace(  
1472 - QPDFObjGen(i, 0), QPDFXRefEntry(item.stream_number(), item.stream_index()));  
1473 - break;  
1474 - default:  
1475 - throw std::logic_error("Xref_table: invalid entry type");  
1476 - }  
1477 - ++i;  
1478 - }  
1479 - return result; 458 +void
  459 +QPDF::warn(
  460 + qpdf_error_code_e error_code,
  461 + std::string const& object,
  462 + qpdf_offset_t offset,
  463 + std::string const& message)
  464 +{
  465 + warn(QPDFExc(error_code, getFilename(), object, offset, message));
1480 } 466 }
1481 467
1482 void 468 void
@@ -1485,54 +471,6 @@ QPDF::showXRefTable() @@ -1485,54 +471,6 @@ QPDF::showXRefTable()
1485 m->xref_table.show(); 471 m->xref_table.show();
1486 } 472 }
1487 473
1488 -void  
1489 -QPDF::Xref_table::show()  
1490 -{  
1491 - auto& cout = *qpdf.m->log->getInfo();  
1492 - int i = -1;  
1493 - for (auto const& item: table) {  
1494 - ++i;  
1495 - if (item.type()) {  
1496 - cout << std::to_string(i) << "/" << std::to_string(item.gen()) << ": ";  
1497 - switch (item.type()) {  
1498 - case 1:  
1499 - cout << "uncompressed; offset = " << item.offset() << "\n";  
1500 - break;  
1501 -  
1502 - case 2:  
1503 - cout << "compressed; stream = " << item.stream_number()  
1504 - << ", index = " << item.stream_index() << "\n";  
1505 - break;  
1506 -  
1507 - default:  
1508 - throw std::logic_error(  
1509 - "unknown cross-reference table type while showing xref_table");  
1510 - }  
1511 - }  
1512 - }  
1513 -}  
1514 -  
1515 -// Resolve all objects in the xref table. If this triggers a xref table reconstruction abort and  
1516 -// return false. Otherwise return true.  
1517 -bool  
1518 -QPDF::Xref_table::resolve()  
1519 -{  
1520 - bool may_change = !reconstructed_;  
1521 - int i = -1;  
1522 - for (auto& item: table) {  
1523 - ++i;  
1524 - if (item.type()) {  
1525 - if (qpdf.isUnresolved(QPDFObjGen(i, item.gen()))) {  
1526 - qpdf.resolve(QPDFObjGen(i, item.gen()));  
1527 - if (may_change && reconstructed_) {  
1528 - return false;  
1529 - }  
1530 - }  
1531 - }  
1532 - }  
1533 - return true;  
1534 -}  
1535 -  
1536 // Ensure all objects in the pdf file, including those in indirect references, appear in the object 474 // Ensure all objects in the pdf file, including those in indirect references, appear in the object
1537 // cache. 475 // cache.
1538 void 476 void
@@ -1562,18 +500,6 @@ QPDF::getObjectCount() @@ -1562,18 +500,6 @@ QPDF::getObjectCount()
1562 return toS(og.getObj()); 500 return toS(og.getObj());
1563 } 501 }
1564 502
1565 -std::vector<QPDFObjectHandle>  
1566 -QPDF::getAllObjects()  
1567 -{  
1568 - // After fixDanglingReferences is called, all objects are in the object cache.  
1569 - fixDanglingReferences();  
1570 - std::vector<QPDFObjectHandle> result;  
1571 - for (auto const& iter: m->obj_cache) {  
1572 - result.push_back(newIndirect(iter.first, iter.second.object));  
1573 - }  
1574 - return result;  
1575 -}  
1576 -  
1577 void 503 void
1578 QPDF::setLastObjectDescription(std::string const& description, QPDFObjGen const& og) 504 QPDF::setLastObjectDescription(std::string const& description, QPDFObjGen const& og)
1579 { 505 {
@@ -1589,220 +515,6 @@ QPDF::setLastObjectDescription(std::string const&amp; description, QPDFObjGen const&amp; @@ -1589,220 +515,6 @@ QPDF::setLastObjectDescription(std::string const&amp; description, QPDFObjGen const&amp;
1589 } 515 }
1590 } 516 }
1591 517
1592 -QPDFObjectHandle  
1593 -QPDF::Xref_table::read_trailer()  
1594 -{  
1595 - qpdf_offset_t offset = file->tell();  
1596 - bool empty = false;  
1597 - auto object = QPDFParser(*file, "trailer", tokenizer, nullptr, &qpdf, true).parse(empty, false);  
1598 - if (empty) {  
1599 - // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in  
1600 - // actual PDF files and Adobe Reader appears to ignore them.  
1601 - qpdf.warn(qpdf.damagedPDF("trailer", "empty object treated as null"));  
1602 - } else if (object.isDictionary() && read_token().isWord("stream")) {  
1603 - qpdf.warn(qpdf.damagedPDF("trailer", file->tell(), "stream keyword found in trailer"));  
1604 - }  
1605 - // Override last_offset so that it points to the beginning of the object we just read  
1606 - file->setLastOffset(offset);  
1607 - return object;  
1608 -}  
1609 -  
1610 -QPDFObjectHandle  
1611 -QPDF::readObject(std::string const& description, QPDFObjGen og)  
1612 -{  
1613 - setLastObjectDescription(description, og);  
1614 - qpdf_offset_t offset = m->file->tell();  
1615 - bool empty = false;  
1616 -  
1617 - StringDecrypter decrypter{this, og};  
1618 - StringDecrypter* decrypter_ptr = m->encp->encrypted ? &decrypter : nullptr;  
1619 - auto object =  
1620 - QPDFParser(*m->file, m->last_object_description, m->tokenizer, decrypter_ptr, this, true)  
1621 - .parse(empty, false);  
1622 - if (empty) {  
1623 - // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in  
1624 - // actual PDF files and Adobe Reader appears to ignore them.  
1625 - warn(damagedPDF(*m->file, m->file->getLastOffset(), "empty object treated as null"));  
1626 - return object;  
1627 - }  
1628 - auto token = readToken(*m->file);  
1629 - if (object.isDictionary() && token.isWord("stream")) {  
1630 - readStream(object, og, offset);  
1631 - token = readToken(*m->file);  
1632 - }  
1633 - if (!token.isWord("endobj")) {  
1634 - QTC::TC("qpdf", "QPDF err expected endobj");  
1635 - warn(damagedPDF("expected endobj"));  
1636 - }  
1637 - return object;  
1638 -}  
1639 -  
1640 -// After reading stream dictionary and stream keyword, read rest of stream.  
1641 -void  
1642 -QPDF::readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset)  
1643 -{  
1644 - validateStreamLineEnd(object, og, offset);  
1645 -  
1646 - // Must get offset before accessing any additional objects since resolving a previously  
1647 - // unresolved indirect object will change file position.  
1648 - qpdf_offset_t stream_offset = m->file->tell();  
1649 - size_t length = 0;  
1650 -  
1651 - try {  
1652 - auto length_obj = object.getKey("/Length");  
1653 -  
1654 - if (!length_obj.isInteger()) {  
1655 - if (length_obj.isNull()) {  
1656 - QTC::TC("qpdf", "QPDF stream without length");  
1657 - throw damagedPDF(offset, "stream dictionary lacks /Length key");  
1658 - }  
1659 - QTC::TC("qpdf", "QPDF stream length not integer");  
1660 - throw damagedPDF(offset, "/Length key in stream dictionary is not an integer");  
1661 - }  
1662 -  
1663 - length = toS(length_obj.getUIntValue());  
1664 - // Seek in two steps to avoid potential integer overflow  
1665 - m->file->seek(stream_offset, SEEK_SET);  
1666 - m->file->seek(toO(length), SEEK_CUR);  
1667 - if (!readToken(*m->file).isWord("endstream")) {  
1668 - QTC::TC("qpdf", "QPDF missing endstream");  
1669 - throw damagedPDF("expected endstream");  
1670 - }  
1671 - } catch (QPDFExc& e) {  
1672 - if (m->attempt_recovery) {  
1673 - warn(e);  
1674 - length = recoverStreamLength(m->file_sp, og, stream_offset);  
1675 - } else {  
1676 - throw;  
1677 - }  
1678 - }  
1679 - object = {QPDF_Stream::create(this, og, object, stream_offset, length)};  
1680 -}  
1681 -  
1682 -void  
1683 -QPDF::validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset)  
1684 -{  
1685 - // The PDF specification states that the word "stream" should be followed by either a carriage  
1686 - // return and a newline or by a newline alone. It specifically disallowed following it by a  
1687 - // carriage return alone since, in that case, there would be no way to tell whether the NL in a  
1688 - // CR NL sequence was part of the stream data. However, some readers, including Adobe reader,  
1689 - // accept a carriage return by itself when followed by a non-newline character, so that's what  
1690 - // we do here. We have also seen files that have extraneous whitespace between the stream  
1691 - // keyword and the newline.  
1692 - while (true) {  
1693 - char ch;  
1694 - if (m->file->read(&ch, 1) == 0) {  
1695 - // A premature EOF here will result in some other problem that will get reported at  
1696 - // another time.  
1697 - return;  
1698 - }  
1699 - if (ch == '\n') {  
1700 - // ready to read stream data  
1701 - QTC::TC("qpdf", "QPDF stream with NL only");  
1702 - return;  
1703 - }  
1704 - if (ch == '\r') {  
1705 - // Read another character  
1706 - if (m->file->read(&ch, 1) != 0) {  
1707 - if (ch == '\n') {  
1708 - // Ready to read stream data  
1709 - QTC::TC("qpdf", "QPDF stream with CRNL");  
1710 - } else {  
1711 - // Treat the \r by itself as the whitespace after endstream and start reading  
1712 - // stream data in spite of not having seen a newline.  
1713 - QTC::TC("qpdf", "QPDF stream with CR only");  
1714 - m->file->unreadCh(ch);  
1715 - warn(damagedPDF(  
1716 - m->file->tell(), "stream keyword followed by carriage return only"));  
1717 - }  
1718 - }  
1719 - return;  
1720 - }  
1721 - if (!QUtil::is_space(ch)) {  
1722 - QTC::TC("qpdf", "QPDF stream without newline");  
1723 - m->file->unreadCh(ch);  
1724 - warn(damagedPDF(  
1725 - m->file->tell(), "stream keyword not followed by proper line terminator"));  
1726 - return;  
1727 - }  
1728 - warn(damagedPDF(m->file->tell(), "stream keyword followed by extraneous whitespace"));  
1729 - }  
1730 -}  
1731 -  
1732 -QPDFObjectHandle  
1733 -QPDF::readObjectInStream(std::shared_ptr<InputSource>& input, int obj)  
1734 -{  
1735 - m->last_object_description.erase(7); // last_object_description starts with "object "  
1736 - m->last_object_description += std::to_string(obj);  
1737 - m->last_object_description += " 0";  
1738 -  
1739 - bool empty = false;  
1740 - auto object = QPDFParser(*input, m->last_object_description, m->tokenizer, nullptr, this, true)  
1741 - .parse(empty, false);  
1742 - if (empty) {  
1743 - // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in  
1744 - // actual PDF files and Adobe Reader appears to ignore them.  
1745 - warn(damagedPDF(*input, input->getLastOffset(), "empty object treated as null"));  
1746 - }  
1747 - return object;  
1748 -}  
1749 -  
1750 -bool  
1751 -QPDF::findEndstream()  
1752 -{  
1753 - // Find endstream or endobj. Position the input at that token.  
1754 - auto t = readToken(*m->file, 20);  
1755 - if (t.isWord("endobj") || t.isWord("endstream")) {  
1756 - m->file->seek(m->file->getLastOffset(), SEEK_SET);  
1757 - return true;  
1758 - }  
1759 - return false;  
1760 -}  
1761 -  
1762 -size_t  
1763 -QPDF::recoverStreamLength(  
1764 - std::shared_ptr<InputSource> input, QPDFObjGen const& og, qpdf_offset_t stream_offset)  
1765 -{  
1766 - // Try to reconstruct stream length by looking for endstream or endobj  
1767 - warn(damagedPDF(*input, stream_offset, "attempting to recover stream length"));  
1768 -  
1769 - PatternFinder ef(*this, &QPDF::findEndstream);  
1770 - size_t length = 0;  
1771 - if (m->file->findFirst("end", stream_offset, 0, ef)) {  
1772 - length = toS(m->file->tell() - stream_offset);  
1773 - // Reread endstream but, if it was endobj, don't skip that.  
1774 - QPDFTokenizer::Token t = readToken(*m->file);  
1775 - if (t.getValue() == "endobj") {  
1776 - m->file->seek(m->file->getLastOffset(), SEEK_SET);  
1777 - }  
1778 - }  
1779 -  
1780 - if (length) {  
1781 - // Make sure this is inside this object  
1782 - auto found = m->xref_table.at_offset(stream_offset + toO(length));  
1783 - if (found == QPDFObjGen() || found == og) {  
1784 - // If we are trying to recover an XRef stream the xref table will not contain and  
1785 - // won't contain any entries, therefore we cannot check the found length. Otherwise we  
1786 - // found endstream\nendobj within the space allowed for this object, so we're probably  
1787 - // in good shape.  
1788 - } else {  
1789 - QTC::TC("qpdf", "QPDF found wrong endstream in recovery");  
1790 - length = 0;  
1791 - }  
1792 - }  
1793 -  
1794 - if (length == 0) {  
1795 - warn(damagedPDF(  
1796 - *input, stream_offset, "unable to recover stream data; treating stream as empty"));  
1797 - } else {  
1798 - warn(damagedPDF(  
1799 - *input, stream_offset, "recovered stream length: " + std::to_string(length)));  
1800 - }  
1801 -  
1802 - QTC::TC("qpdf", "QPDF recovered stream length");  
1803 - return length;  
1804 -}  
1805 -  
1806 QPDFTokenizer::Token 518 QPDFTokenizer::Token
1807 QPDF::readToken(InputSource& input, size_t max_len) 519 QPDF::readToken(InputSource& input, size_t max_len)
1808 { 520 {
@@ -1810,367 +522,12 @@ QPDF::readToken(InputSource&amp; input, size_t max_len) @@ -1810,367 +522,12 @@ QPDF::readToken(InputSource&amp; input, size_t max_len)
1810 } 522 }
1811 523
1812 QPDFObjectHandle 524 QPDFObjectHandle
1813 -QPDF::readObjectAtOffset(  
1814 - bool try_recovery,  
1815 - qpdf_offset_t offset,  
1816 - std::string const& description,  
1817 - QPDFObjGen exp_og,  
1818 - QPDFObjGen& og,  
1819 - bool skip_cache_if_in_xref)  
1820 -{  
1821 - bool check_og = true;  
1822 - if (exp_og.getObj() == 0) {  
1823 - // This method uses an expect object ID of 0 to indicate that we don't know or don't care  
1824 - // what the actual object ID is at this offset. This is true when we read the xref stream  
1825 - // and linearization hint streams. In this case, we don't verify the expect object  
1826 - // ID/generation against what was read from the file. There is also no reason to attempt  
1827 - // xref recovery if we get a failure in this case since the read attempt was not triggered  
1828 - // by an xref lookup.  
1829 - check_og = false;  
1830 - try_recovery = false;  
1831 - }  
1832 - setLastObjectDescription(description, exp_og);  
1833 -  
1834 - if (!m->attempt_recovery) {  
1835 - try_recovery = false;  
1836 - }  
1837 -  
1838 - // Special case: if offset is 0, just return null. Some PDF writers, in particular  
1839 - // "Mac OS X 10.7.5 Quartz PDFContext", may store deleted objects in the xref table as  
1840 - // "0000000000 00000 n", which is not correct, but it won't hurt anything for us to ignore  
1841 - // these.  
1842 - if (offset == 0) {  
1843 - QTC::TC("qpdf", "QPDF bogus 0 offset", 0);  
1844 - warn(damagedPDF(0, "object has offset 0"));  
1845 - return QPDFObjectHandle::newNull();  
1846 - }  
1847 -  
1848 - m->file->seek(offset, SEEK_SET);  
1849 - try {  
1850 - QPDFTokenizer::Token tobjid = readToken(*m->file);  
1851 - bool objidok = tobjid.isInteger();  
1852 - QTC::TC("qpdf", "QPDF check objid", objidok ? 1 : 0);  
1853 - if (!objidok) {  
1854 - QTC::TC("qpdf", "QPDF expected n n obj");  
1855 - throw damagedPDF(offset, "expected n n obj");  
1856 - }  
1857 - QPDFTokenizer::Token tgen = readToken(*m->file);  
1858 - bool genok = tgen.isInteger();  
1859 - QTC::TC("qpdf", "QPDF check generation", genok ? 1 : 0);  
1860 - if (!genok) {  
1861 - throw damagedPDF(offset, "expected n n obj");  
1862 - }  
1863 - QPDFTokenizer::Token tobj = readToken(*m->file);  
1864 -  
1865 - bool objok = tobj.isWord("obj");  
1866 - QTC::TC("qpdf", "QPDF check obj", objok ? 1 : 0);  
1867 -  
1868 - if (!objok) {  
1869 - throw damagedPDF(offset, "expected n n obj");  
1870 - }  
1871 - int objid = QUtil::string_to_int(tobjid.getValue().c_str());  
1872 - int generation = QUtil::string_to_int(tgen.getValue().c_str());  
1873 - og = QPDFObjGen(objid, generation);  
1874 - if (objid == 0) {  
1875 - QTC::TC("qpdf", "QPDF object id 0");  
1876 - throw damagedPDF(offset, "object with ID 0");  
1877 - }  
1878 - if (check_og && (exp_og != og)) {  
1879 - QTC::TC("qpdf", "QPDF err wrong objid/generation");  
1880 - QPDFExc e = damagedPDF(offset, "expected " + exp_og.unparse(' ') + " obj");  
1881 - if (try_recovery) {  
1882 - // Will be retried below  
1883 - throw e;  
1884 - } else {  
1885 - // We can try reading the object anyway even if the ID doesn't match.  
1886 - warn(e);  
1887 - }  
1888 - }  
1889 - } catch (QPDFExc& e) {  
1890 - if (try_recovery) {  
1891 - // Try again after reconstructing xref table  
1892 - m->xref_table.reconstruct(e);  
1893 - if (m->xref_table.type(exp_og) == 1) {  
1894 - QTC::TC("qpdf", "QPDF recovered in readObjectAtOffset");  
1895 - return readObjectAtOffset(  
1896 - false, m->xref_table.offset(exp_og), description, exp_og, og, false);  
1897 - } else {  
1898 - QTC::TC("qpdf", "QPDF object gone after xref reconstruction");  
1899 - warn(damagedPDF(  
1900 - "",  
1901 - 0,  
1902 - ("object " + exp_og.unparse(' ') +  
1903 - " not found in file after regenerating cross reference table")));  
1904 - return QPDFObjectHandle::newNull();  
1905 - }  
1906 - } else {  
1907 - throw;  
1908 - }  
1909 - }  
1910 -  
1911 - QPDFObjectHandle oh = readObject(description, og);  
1912 -  
1913 - if (isUnresolved(og)) {  
1914 - // Store the object in the cache here so it gets cached whether we first know the offset or  
1915 - // whether we first know the object ID and generation (in which we case we would get here  
1916 - // through resolve).  
1917 -  
1918 - // Determine the end offset of this object before and after white space. We use these  
1919 - // numbers to validate linearization hint tables. Offsets and lengths of objects may imply  
1920 - // the end of an object to be anywhere between these values.  
1921 - qpdf_offset_t end_before_space = m->file->tell();  
1922 -  
1923 - // skip over spaces  
1924 - while (true) {  
1925 - char ch;  
1926 - if (m->file->read(&ch, 1)) {  
1927 - if (!isspace(static_cast<unsigned char>(ch))) {  
1928 - m->file->seek(-1, SEEK_CUR);  
1929 - break;  
1930 - }  
1931 - } else {  
1932 - throw damagedPDF(m->file->tell(), "EOF after endobj");  
1933 - }  
1934 - }  
1935 - qpdf_offset_t end_after_space = m->file->tell();  
1936 - if (skip_cache_if_in_xref && m->xref_table.type(og)) {  
1937 - // Ordinarily, an object gets read here when resolved through xref table or stream. In  
1938 - // the special case of the xref stream and linearization hint tables, the offset comes  
1939 - // from another source. For the specific case of xref streams, the xref stream is read  
1940 - // and loaded into the object cache very early in parsing. Ordinarily, when a file is  
1941 - // updated by appending, items inserted into the xref table in later updates take  
1942 - // precedence over earlier items. In the special case of reusing the object number  
1943 - // previously used as the xref stream, we have the following order of events:  
1944 - //  
1945 - // * reused object gets loaded into the xref table  
1946 - // * old object is read here while reading xref streams  
1947 - // * original xref entry is ignored (since already in xref table)  
1948 - //  
1949 - // It is the second step that causes a problem. Even though the xref table is correct in  
1950 - // this case, the old object is already in the cache and so effectively prevails over  
1951 - // the reused object. To work around this issue, we have a special case for the xref  
1952 - // stream (via the skip_cache_if_in_xref): if the object is already in the xref stream,  
1953 - // don't cache what we read here.  
1954 - //  
1955 - // It is likely that the same bug may exist for linearization hint tables, but the  
1956 - // existing code uses end_before_space and end_after_space from the cache, so fixing  
1957 - // that would require more significant rework. The chances of a linearization hint  
1958 - // stream being reused seems smaller because the xref stream is probably the highest  
1959 - // object in the file and the linearization hint stream would be some random place in  
1960 - // the middle, so I'm leaving that bug unfixed for now. If the bug were to be fixed, we  
1961 - // could use !check_og in place of skip_cache_if_in_xref.  
1962 - QTC::TC("qpdf", "QPDF skipping cache for known unchecked object");  
1963 - } else {  
1964 - m->xref_table.linearization_offsets(  
1965 - toS(og.getObj()), end_before_space, end_after_space);  
1966 - updateCache(og, oh.getObj());  
1967 - }  
1968 - }  
1969 -  
1970 - return oh;  
1971 -}  
1972 -  
1973 -QPDFObject*  
1974 -QPDF::resolve(QPDFObjGen og)  
1975 -{  
1976 - if (!isUnresolved(og)) {  
1977 - return m->obj_cache[og].object.get();  
1978 - }  
1979 -  
1980 - if (m->resolving.count(og)) {  
1981 - // This can happen if an object references itself directly or indirectly in some key that  
1982 - // has to be resolved during object parsing, such as stream length.  
1983 - QTC::TC("qpdf", "QPDF recursion loop in resolve");  
1984 - warn(damagedPDF("", "loop detected resolving object " + og.unparse(' ')));  
1985 - updateCache(og, QPDF_Null::create());  
1986 - return m->obj_cache[og].object.get();  
1987 - }  
1988 - ResolveRecorder rr(this, og);  
1989 -  
1990 - try {  
1991 - switch (m->xref_table.type(og)) {  
1992 - case 0:  
1993 - break;  
1994 - case 1:  
1995 - {  
1996 - // Object stored in cache by readObjectAtOffset  
1997 - QPDFObjGen a_og;  
1998 - QPDFObjectHandle oh =  
1999 - readObjectAtOffset(true, m->xref_table.offset(og), "", og, a_og, false);  
2000 - }  
2001 - break;  
2002 -  
2003 - case 2:  
2004 - resolveObjectsInStream(m->xref_table.stream_number(og.getObj()));  
2005 - break;  
2006 -  
2007 - default:  
2008 - throw damagedPDF(  
2009 - "", 0, ("object " + og.unparse('/') + " has unexpected xref entry type"));  
2010 - }  
2011 - } catch (QPDFExc& e) {  
2012 - warn(e);  
2013 - } catch (std::exception& e) {  
2014 - warn(damagedPDF(  
2015 - "", 0, ("object " + og.unparse('/') + ": error reading object: " + e.what())));  
2016 - }  
2017 -  
2018 - if (isUnresolved(og)) {  
2019 - // PDF spec says unknown objects resolve to the null object.  
2020 - QTC::TC("qpdf", "QPDF resolve failure to null");  
2021 - updateCache(og, QPDF_Null::create());  
2022 - }  
2023 -  
2024 - auto result(m->obj_cache[og].object);  
2025 - result->setDefaultDescription(this, og);  
2026 - return result.get();  
2027 -}  
2028 -  
2029 -void  
2030 -QPDF::resolveObjectsInStream(int obj_stream_number)  
2031 -{  
2032 - if (m->resolved_object_streams.count(obj_stream_number)) {  
2033 - return;  
2034 - }  
2035 - m->resolved_object_streams.insert(obj_stream_number);  
2036 - // Force resolution of object stream  
2037 - QPDFObjectHandle obj_stream = getObjectByID(obj_stream_number, 0);  
2038 - if (!obj_stream.isStream()) {  
2039 - throw damagedPDF(  
2040 - "supposed object stream " + std::to_string(obj_stream_number) + " is not a stream");  
2041 - }  
2042 -  
2043 - QPDFObjectHandle dict = obj_stream.getDict();  
2044 - if (!dict.isDictionaryOfType("/ObjStm")) {  
2045 - QTC::TC("qpdf", "QPDF ERR object stream with wrong type");  
2046 - warn(damagedPDF(  
2047 - "supposed object stream " + std::to_string(obj_stream_number) + " has wrong type"));  
2048 - }  
2049 -  
2050 - if (!(dict.getKey("/N").isInteger() && dict.getKey("/First").isInteger())) {  
2051 - throw damagedPDF(  
2052 - ("object stream " + std::to_string(obj_stream_number) + " has incorrect keys"));  
2053 - }  
2054 -  
2055 - int n = dict.getKey("/N").getIntValueAsInt();  
2056 - int first = dict.getKey("/First").getIntValueAsInt();  
2057 -  
2058 - std::map<int, int> offsets;  
2059 -  
2060 - std::shared_ptr<Buffer> bp = obj_stream.getStreamData(qpdf_dl_specialized);  
2061 - auto input = std::shared_ptr<InputSource>(  
2062 - // line-break  
2063 - new BufferInputSource(  
2064 - (m->file->getName() + " object stream " + std::to_string(obj_stream_number)),  
2065 - bp.get()));  
2066 -  
2067 - qpdf_offset_t last_offset = -1;  
2068 - for (int i = 0; i < n; ++i) {  
2069 - QPDFTokenizer::Token tnum = readToken(*input);  
2070 - QPDFTokenizer::Token toffset = readToken(*input);  
2071 - if (!(tnum.isInteger() && toffset.isInteger())) {  
2072 - throw damagedPDF(  
2073 - *input,  
2074 - m->last_object_description,  
2075 - input->getLastOffset(),  
2076 - "expected integer in object stream header");  
2077 - }  
2078 -  
2079 - int num = QUtil::string_to_int(tnum.getValue().c_str());  
2080 - long long offset = QUtil::string_to_int(toffset.getValue().c_str());  
2081 - if (num > m->xref_table.max_id()) {  
2082 - continue;  
2083 - }  
2084 - if (num == obj_stream_number) {  
2085 - QTC::TC("qpdf", "QPDF ignore self-referential object stream");  
2086 - warn(damagedPDF(  
2087 - *input,  
2088 - m->last_object_description,  
2089 - input->getLastOffset(),  
2090 - "object stream claims to contain itself"));  
2091 - continue;  
2092 - }  
2093 - if (offset <= last_offset) {  
2094 - throw damagedPDF(  
2095 - *input,  
2096 - m->last_object_description,  
2097 - input->getLastOffset(),  
2098 - "expected offsets in object stream to be increasing");  
2099 - }  
2100 - last_offset = offset;  
2101 -  
2102 - offsets[num] = toI(offset + first);  
2103 - }  
2104 -  
2105 - // To avoid having to read the object stream multiple times, store all objects that would be  
2106 - // found here in the cache. Remember that some objects stored here might have been overridden  
2107 - // by new objects appended to the file, so it is necessary to recheck the xref table and only  
2108 - // cache what would actually be resolved here.  
2109 - m->last_object_description.clear();  
2110 - m->last_object_description += "object ";  
2111 - for (auto const& iter: offsets) {  
2112 - QPDFObjGen og(iter.first, 0);  
2113 - if (m->xref_table.type(og) == 2 &&  
2114 - m->xref_table.stream_number(og.getObj()) == obj_stream_number) {  
2115 - int offset = iter.second;  
2116 - input->seek(offset, SEEK_SET);  
2117 - QPDFObjectHandle oh = readObjectInStream(input, iter.first);  
2118 - updateCache(og, oh.getObj());  
2119 - } else {  
2120 - QTC::TC("qpdf", "QPDF not caching overridden objstm object");  
2121 - }  
2122 - }  
2123 -}  
2124 -  
2125 -QPDFObjectHandle  
2126 QPDF::newIndirect(QPDFObjGen const& og, std::shared_ptr<QPDFObject> const& obj) 525 QPDF::newIndirect(QPDFObjGen const& og, std::shared_ptr<QPDFObject> const& obj)
2127 { 526 {
2128 obj->setDefaultDescription(this, og); 527 obj->setDefaultDescription(this, og);
2129 return {obj}; 528 return {obj};
2130 } 529 }
2131 530
2132 -void  
2133 -QPDF::updateCache(QPDFObjGen const& og, std::shared_ptr<QPDFObject> const& object)  
2134 -{  
2135 - object->setObjGen(this, og);  
2136 - if (isCached(og)) {  
2137 - auto& cache = m->obj_cache[og];  
2138 - cache.object->assign(object);  
2139 - } else {  
2140 - m->obj_cache[og] = ObjCache(object);  
2141 - }  
2142 -}  
2143 -  
2144 -bool  
2145 -QPDF::isCached(QPDFObjGen const& og)  
2146 -{  
2147 - return m->obj_cache.count(og) != 0;  
2148 -}  
2149 -  
2150 -bool  
2151 -QPDF::isUnresolved(QPDFObjGen const& og)  
2152 -{  
2153 - return !isCached(og) || m->obj_cache[og].object->isUnresolved();  
2154 -}  
2155 -  
2156 -QPDFObjGen  
2157 -QPDF::nextObjGen()  
2158 -{  
2159 - int max_objid = toI(getObjectCount());  
2160 - if (max_objid == std::numeric_limits<int>::max()) {  
2161 - throw std::range_error("max object id is too high to create new objects");  
2162 - }  
2163 - return QPDFObjGen(max_objid + 1, 0);  
2164 -}  
2165 -  
2166 -QPDFObjectHandle  
2167 -QPDF::makeIndirectFromQPDFObject(std::shared_ptr<QPDFObject> const& obj)  
2168 -{  
2169 - QPDFObjGen next{nextObjGen()};  
2170 - m->obj_cache[next] = ObjCache(obj);  
2171 - return newIndirect(next, m->obj_cache[next].object);  
2172 -}  
2173 -  
2174 QPDFObjectHandle 531 QPDFObjectHandle
2175 QPDF::makeIndirectObject(QPDFObjectHandle oh) 532 QPDF::makeIndirectObject(QPDFObjectHandle oh)
2176 { 533 {
@@ -2215,37 +572,6 @@ QPDF::newStream(std::string const&amp; data) @@ -2215,37 +572,6 @@ QPDF::newStream(std::string const&amp; data)
2215 return result; 572 return result;
2216 } 573 }
2217 574
2218 -std::shared_ptr<QPDFObject>  
2219 -QPDF::getObjectForParser(int id, int gen, bool parse_pdf)  
2220 -{  
2221 - // This method is called by the parser and therefore must not resolve any objects.  
2222 - auto og = QPDFObjGen(id, gen);  
2223 - if (auto iter = m->obj_cache.find(og); iter != m->obj_cache.end()) {  
2224 - return iter->second.object;  
2225 - }  
2226 - if (m->xref_table.type(og) || !m->xref_table.initialized()) {  
2227 - return m->obj_cache.insert({og, QPDF_Unresolved::create(this, og)}).first->second.object;  
2228 - }  
2229 - if (parse_pdf) {  
2230 - return QPDF_Null::create();  
2231 - }  
2232 - return m->obj_cache.insert({og, QPDF_Null::create(this, og)}).first->second.object;  
2233 -}  
2234 -  
2235 -std::shared_ptr<QPDFObject>  
2236 -QPDF::getObjectForJSON(int id, int gen)  
2237 -{  
2238 - auto og = QPDFObjGen(id, gen);  
2239 - auto [it, inserted] = m->obj_cache.try_emplace(og);  
2240 - auto& obj = it->second.object;  
2241 - if (inserted) {  
2242 - obj = (m->xref_table.initialized() && !m->xref_table.type(og))  
2243 - ? QPDF_Null::create(this, og)  
2244 - : QPDF_Unresolved::create(this, og);  
2245 - }  
2246 - return obj;  
2247 -}  
2248 -  
2249 QPDFObjectHandle 575 QPDFObjectHandle
2250 QPDF::getObject(QPDFObjGen const& og) 576 QPDF::getObject(QPDFObjGen const& og)
2251 { 577 {
@@ -2284,27 +610,6 @@ QPDF::replaceObject(int objid, int generation, QPDFObjectHandle oh) @@ -2284,27 +610,6 @@ QPDF::replaceObject(int objid, int generation, QPDFObjectHandle oh)
2284 } 610 }
2285 611
2286 void 612 void
2287 -QPDF::replaceObject(QPDFObjGen const& og, QPDFObjectHandle oh)  
2288 -{  
2289 - if (!oh || (oh.isIndirect() && !(oh.isStream() && oh.getObjGen() == og))) {  
2290 - QTC::TC("qpdf", "QPDF replaceObject called with indirect object");  
2291 - throw std::logic_error("QPDF::replaceObject called with indirect object handle");  
2292 - }  
2293 - updateCache(og, oh.getObj());  
2294 -}  
2295 -  
2296 -void  
2297 -QPDF::removeObject(QPDFObjGen og)  
2298 -{  
2299 - if (auto cached = m->obj_cache.find(og); cached != m->obj_cache.end()) {  
2300 - // Take care of any object handles that may be floating around.  
2301 - cached->second.object->assign(QPDF_Null::create());  
2302 - cached->second.object->setObjGen(nullptr, QPDFObjGen());  
2303 - m->obj_cache.erase(cached);  
2304 - }  
2305 -}  
2306 -  
2307 -void  
2308 QPDF::replaceReserved(QPDFObjectHandle reserved, QPDFObjectHandle replacement) 613 QPDF::replaceReserved(QPDFObjectHandle reserved, QPDFObjectHandle replacement)
2309 { 614 {
2310 QTC::TC("qpdf", "QPDF replaceReserved"); 615 QTC::TC("qpdf", "QPDF replaceReserved");
@@ -2581,15 +886,6 @@ QPDF::swapObjects(int objid1, int generation1, int objid2, int generation2) @@ -2581,15 +886,6 @@ QPDF::swapObjects(int objid1, int generation1, int objid2, int generation2)
2581 swapObjects(QPDFObjGen(objid1, generation1), QPDFObjGen(objid2, generation2)); 886 swapObjects(QPDFObjGen(objid1, generation1), QPDFObjGen(objid2, generation2));
2582 } 887 }
2583 888
2584 -void  
2585 -QPDF::swapObjects(QPDFObjGen const& og1, QPDFObjGen const& og2)  
2586 -{  
2587 - // Force objects to be read from the input source if needed, then swap them in the cache.  
2588 - resolve(og1);  
2589 - resolve(og2);  
2590 - m->obj_cache[og1].object->swapWith(m->obj_cache[og2].object);  
2591 -}  
2592 -  
2593 unsigned long long 889 unsigned long long
2594 QPDF::getUniqueId() const 890 QPDF::getUniqueId() const
2595 { 891 {
@@ -2676,137 +972,6 @@ QPDF::getXRefTable() @@ -2676,137 +972,6 @@ QPDF::getXRefTable()
2676 return m->xref_table.as_map(); 972 return m->xref_table.as_map();
2677 } 973 }
2678 974
2679 -size_t  
2680 -QPDF::tableSize()  
2681 -{  
2682 - // If obj_cache is dense, accommodate all object in tables,else accommodate only original  
2683 - // objects.  
2684 - auto max_xref = toI(m->xref_table.size());  
2685 - if (max_xref > 0) {  
2686 - --max_xref;  
2687 - }  
2688 - auto max_obj = m->obj_cache.size() ? m->obj_cache.crbegin()->first.getObj() : 0;  
2689 - auto max_id = std::numeric_limits<int>::max() - 1;  
2690 - if (max_obj >= max_id || max_xref >= max_id) {  
2691 - // Temporary fix. Long-term solution is  
2692 - // - QPDFObjGen to enforce objgens are valid and sensible  
2693 - // - xref table and obj cache to protect against insertion of impossibly large obj ids  
2694 - stopOnError("Impossibly large object id encountered.");  
2695 - }  
2696 - if (max_obj < 1.1 * std::max(toI(m->obj_cache.size()), max_xref)) {  
2697 - return toS(++max_obj);  
2698 - }  
2699 - return toS(++max_xref);  
2700 -}  
2701 -  
2702 -std::vector<QPDFObjGen>  
2703 -QPDF::getCompressibleObjVector()  
2704 -{  
2705 - return getCompressibleObjGens<QPDFObjGen>();  
2706 -}  
2707 -  
2708 -std::vector<bool>  
2709 -QPDF::getCompressibleObjSet()  
2710 -{  
2711 - return getCompressibleObjGens<bool>();  
2712 -}  
2713 -  
2714 -template <typename T>  
2715 -std::vector<T>  
2716 -QPDF::getCompressibleObjGens()  
2717 -{  
2718 - // Return a list of objects that are allowed to be in object streams. Walk through the objects  
2719 - // by traversing the document from the root, including a traversal of the pages tree. This  
2720 - // makes that objects that are on the same page are more likely to be in the same object stream,  
2721 - // which is slightly more efficient, particularly with linearized files. This is better than  
2722 - // iterating through the xref table since it avoids preserving orphaned items.  
2723 -  
2724 - // Exclude encryption dictionary, if any  
2725 - QPDFObjectHandle encryption_dict = m->xref_table.trailer().getKey("/Encrypt");  
2726 - QPDFObjGen encryption_dict_og = encryption_dict.getObjGen();  
2727 -  
2728 - const size_t max_obj = getObjectCount();  
2729 - std::vector<bool> visited(max_obj, false);  
2730 - std::vector<QPDFObjectHandle> queue;  
2731 - queue.reserve(512);  
2732 - queue.push_back(m->xref_table.trailer());  
2733 - std::vector<T> result;  
2734 - if constexpr (std::is_same_v<T, QPDFObjGen>) {  
2735 - result.reserve(m->obj_cache.size());  
2736 - } else if constexpr (std::is_same_v<T, bool>) {  
2737 - result.resize(max_obj + 1U, false);  
2738 - } else {  
2739 - throw std::logic_error("Unsupported type in QPDF::getCompressibleObjGens");  
2740 - }  
2741 - while (!queue.empty()) {  
2742 - auto obj = queue.back();  
2743 - queue.pop_back();  
2744 - if (obj.getObjectID() > 0) {  
2745 - QPDFObjGen og = obj.getObjGen();  
2746 - const size_t id = toS(og.getObj() - 1);  
2747 - if (id >= max_obj) {  
2748 - throw std::logic_error(  
2749 - "unexpected object id encountered in getCompressibleObjGens");  
2750 - }  
2751 - if (visited[id]) {  
2752 - QTC::TC("qpdf", "QPDF loop detected traversing objects");  
2753 - continue;  
2754 - }  
2755 -  
2756 - // Check whether this is the current object. If not, remove it (which changes it into a  
2757 - // direct null and therefore stops us from revisiting it) and move on to the next object  
2758 - // in the queue.  
2759 - auto upper = m->obj_cache.upper_bound(og);  
2760 - if (upper != m->obj_cache.end() && upper->first.getObj() == og.getObj()) {  
2761 - removeObject(og);  
2762 - continue;  
2763 - }  
2764 -  
2765 - visited[id] = true;  
2766 -  
2767 - if (og == encryption_dict_og) {  
2768 - QTC::TC("qpdf", "QPDF exclude encryption dictionary");  
2769 - } else if (!(obj.isStream() ||  
2770 - (obj.isDictionaryOfType("/Sig") && obj.hasKey("/ByteRange") &&  
2771 - obj.hasKey("/Contents")))) {  
2772 - if constexpr (std::is_same_v<T, QPDFObjGen>) {  
2773 - result.push_back(og);  
2774 - } else if constexpr (std::is_same_v<T, bool>) {  
2775 - result[id + 1U] = true;  
2776 - }  
2777 - }  
2778 - }  
2779 - if (obj.isStream()) {  
2780 - QPDFObjectHandle dict = obj.getDict();  
2781 - std::set<std::string> keys = dict.getKeys();  
2782 - for (auto iter = keys.rbegin(); iter != keys.rend(); ++iter) {  
2783 - std::string const& key = *iter;  
2784 - QPDFObjectHandle value = dict.getKey(key);  
2785 - if (key == "/Length") {  
2786 - // omit stream lengths  
2787 - if (value.isIndirect()) {  
2788 - QTC::TC("qpdf", "QPDF exclude indirect length");  
2789 - }  
2790 - } else {  
2791 - queue.push_back(value);  
2792 - }  
2793 - }  
2794 - } else if (obj.isDictionary()) {  
2795 - std::set<std::string> keys = obj.getKeys();  
2796 - for (auto iter = keys.rbegin(); iter != keys.rend(); ++iter) {  
2797 - queue.push_back(obj.getKey(*iter));  
2798 - }  
2799 - } else if (obj.isArray()) {  
2800 - int n = obj.getArrayNItems();  
2801 - for (int i = 1; i <= n; ++i) {  
2802 - queue.push_back(obj.getArrayItem(n - i));  
2803 - }  
2804 - }  
2805 - }  
2806 -  
2807 - return result;  
2808 -}  
2809 -  
2810 bool 975 bool
2811 QPDF::pipeStreamData( 976 QPDF::pipeStreamData(
2812 std::shared_ptr<EncryptionParameters> encp, 977 std::shared_ptr<EncryptionParameters> encp,
libqpdf/QPDF_objects.cc
@@ -3,16 +3,12 @@ @@ -3,16 +3,12 @@
3 #include <qpdf/QPDF_private.hh> 3 #include <qpdf/QPDF_private.hh>
4 4
5 #include <array> 5 #include <array>
6 -#include <atomic>  
7 #include <cstring> 6 #include <cstring>
8 #include <limits> 7 #include <limits>
9 #include <map> 8 #include <map>
10 -#include <regex>  
11 -#include <sstream>  
12 #include <vector> 9 #include <vector>
13 10
14 #include <qpdf/BufferInputSource.hh> 11 #include <qpdf/BufferInputSource.hh>
15 -#include <qpdf/FileInputSource.hh>  
16 #include <qpdf/OffsetInputSource.hh> 12 #include <qpdf/OffsetInputSource.hh>
17 #include <qpdf/Pipeline.hh> 13 #include <qpdf/Pipeline.hh>
18 #include <qpdf/QPDFExc.hh> 14 #include <qpdf/QPDFExc.hh>
@@ -28,10 +24,6 @@ @@ -28,10 +24,6 @@
28 #include <qpdf/QTC.hh> 24 #include <qpdf/QTC.hh>
29 #include <qpdf/QUtil.hh> 25 #include <qpdf/QUtil.hh>
30 26
31 -// This must be a fixed value. This API returns a const reference to it, and the C API relies on its  
32 -// being static as well.  
33 -std::string const QPDF::qpdf_version(QPDF_VERSION);  
34 -  
35 namespace 27 namespace
36 { 28 {
37 class InvalidInputSource final: public InputSource 29 class InvalidInputSource final: public InputSource
@@ -94,321 +86,6 @@ namespace @@ -94,321 +86,6 @@ namespace
94 }; 86 };
95 } // namespace 87 } // namespace
96 88
97 -QPDF::ForeignStreamData::ForeignStreamData(  
98 - std::shared_ptr<EncryptionParameters> encp,  
99 - std::shared_ptr<InputSource> file,  
100 - QPDFObjGen const& foreign_og,  
101 - qpdf_offset_t offset,  
102 - size_t length,  
103 - QPDFObjectHandle local_dict) :  
104 - encp(encp),  
105 - file(file),  
106 - foreign_og(foreign_og),  
107 - offset(offset),  
108 - length(length),  
109 - local_dict(local_dict)  
110 -{  
111 -}  
112 -  
113 -QPDF::CopiedStreamDataProvider::CopiedStreamDataProvider(QPDF& destination_qpdf) :  
114 - QPDFObjectHandle::StreamDataProvider(true),  
115 - destination_qpdf(destination_qpdf)  
116 -{  
117 -}  
118 -  
119 -bool  
120 -QPDF::CopiedStreamDataProvider::provideStreamData(  
121 - QPDFObjGen const& og, Pipeline* pipeline, bool suppress_warnings, bool will_retry)  
122 -{  
123 - std::shared_ptr<ForeignStreamData> foreign_data = foreign_stream_data[og];  
124 - bool result = false;  
125 - if (foreign_data.get()) {  
126 - result = destination_qpdf.pipeForeignStreamData(  
127 - foreign_data, pipeline, suppress_warnings, will_retry);  
128 - QTC::TC("qpdf", "QPDF copy foreign with data", result ? 0 : 1);  
129 - } else {  
130 - auto foreign_stream = foreign_streams[og];  
131 - result = foreign_stream.pipeStreamData(  
132 - pipeline, nullptr, 0, qpdf_dl_none, suppress_warnings, will_retry);  
133 - QTC::TC("qpdf", "QPDF copy foreign with foreign_stream", result ? 0 : 1);  
134 - }  
135 - return result;  
136 -}  
137 -  
138 -void  
139 -QPDF::CopiedStreamDataProvider::registerForeignStream(  
140 - QPDFObjGen const& local_og, QPDFObjectHandle foreign_stream)  
141 -{  
142 - this->foreign_streams[local_og] = foreign_stream;  
143 -}  
144 -  
145 -void  
146 -QPDF::CopiedStreamDataProvider::registerForeignStream(  
147 - QPDFObjGen const& local_og, std::shared_ptr<ForeignStreamData> foreign_stream)  
148 -{  
149 - this->foreign_stream_data[local_og] = foreign_stream;  
150 -}  
151 -  
152 -QPDF::StringDecrypter::StringDecrypter(QPDF* qpdf, QPDFObjGen const& og) :  
153 - qpdf(qpdf),  
154 - og(og)  
155 -{  
156 -}  
157 -  
158 -void  
159 -QPDF::StringDecrypter::decryptString(std::string& val)  
160 -{  
161 - qpdf->decryptString(val, og);  
162 -}  
163 -  
164 -std::string const&  
165 -QPDF::QPDFVersion()  
166 -{  
167 - // The C API relies on this being a static value.  
168 - return QPDF::qpdf_version;  
169 -}  
170 -  
171 -QPDF::EncryptionParameters::EncryptionParameters() :  
172 - encrypted(false),  
173 - encryption_initialized(false),  
174 - encryption_V(0),  
175 - encryption_R(0),  
176 - encrypt_metadata(true),  
177 - cf_stream(e_none),  
178 - cf_string(e_none),  
179 - cf_file(e_none),  
180 - user_password_matched(false),  
181 - owner_password_matched(false)  
182 -{  
183 -}  
184 -  
185 -QPDF::Members::Members(QPDF& qpdf) :  
186 - log(QPDFLogger::defaultLogger()),  
187 - file_sp(new InvalidInputSource(no_input_name)),  
188 - file(file_sp.get()),  
189 - encp(new EncryptionParameters),  
190 - xref_table(qpdf, file)  
191 -{  
192 -}  
193 -  
194 -QPDF::QPDF() :  
195 - m(new Members(*this))  
196 -{  
197 - m->tokenizer.allowEOF();  
198 - // Generate a unique ID. It just has to be unique among all QPDF objects allocated throughout  
199 - // the lifetime of this running application.  
200 - static std::atomic<unsigned long long> unique_id{0};  
201 - m->unique_id = unique_id.fetch_add(1ULL);  
202 -}  
203 -  
204 -QPDF::~QPDF()  
205 -{  
206 - // If two objects are mutually referential (through each object having an array or dictionary  
207 - // that contains an indirect reference to the other), the circular references in the  
208 - // std::shared_ptr objects will prevent the objects from being deleted. Walk through all objects  
209 - // in the object cache, which is those objects that we read from the file, and break all  
210 - // resolved indirect references by replacing them with an internal object type representing that  
211 - // they have been destroyed. Note that we can't break references like this at any time when the  
212 - // QPDF object is active. The call to reset also causes all direct QPDFObjectHandle objects that  
213 - // are reachable from this object to release their association with this QPDF. Direct objects  
214 - // are not destroyed since they can be moved to other QPDF objects safely.  
215 -  
216 - for (auto const& iter: m->obj_cache) {  
217 - iter.second.object->disconnect();  
218 - if (iter.second.object->getTypeCode() != ::ot_null) {  
219 - iter.second.object->destroy();  
220 - }  
221 - }  
222 -}  
223 -  
224 -std::shared_ptr<QPDF>  
225 -QPDF::create()  
226 -{  
227 - return std::make_shared<QPDF>();  
228 -}  
229 -  
230 -void  
231 -QPDF::processFile(char const* filename, char const* password)  
232 -{  
233 - auto* fi = new FileInputSource(filename);  
234 - processInputSource(std::shared_ptr<InputSource>(fi), password);  
235 -}  
236 -  
237 -void  
238 -QPDF::processFile(char const* description, FILE* filep, bool close_file, char const* password)  
239 -{  
240 - auto* fi = new FileInputSource(description, filep, close_file);  
241 - processInputSource(std::shared_ptr<InputSource>(fi), password);  
242 -}  
243 -  
244 -void  
245 -QPDF::processMemoryFile(  
246 - char const* description, char const* buf, size_t length, char const* password)  
247 -{  
248 - processInputSource(  
249 - std::shared_ptr<InputSource>(  
250 - // line-break  
251 - new BufferInputSource(  
252 - description, new Buffer(QUtil::unsigned_char_pointer(buf), length), true)),  
253 - password);  
254 -}  
255 -  
256 -void  
257 -QPDF::processInputSource(std::shared_ptr<InputSource> source, char const* password)  
258 -{  
259 - m->file_sp = source;  
260 - m->file = source.get();  
261 - parse(password);  
262 -}  
263 -  
264 -void  
265 -QPDF::closeInputSource()  
266 -{  
267 - m->no_input_name = "closed input source";  
268 - m->file_sp = std::shared_ptr<InputSource>(new InvalidInputSource(m->no_input_name));  
269 - m->file = m->file_sp.get();  
270 -}  
271 -  
272 -void  
273 -QPDF::setPasswordIsHexKey(bool val)  
274 -{  
275 - m->provided_password_is_hex_key = val;  
276 -}  
277 -  
278 -void  
279 -QPDF::emptyPDF()  
280 -{  
281 - m->pdf_version = "1.3";  
282 - m->no_input_name = "empty PDF";  
283 - m->xref_table.initialize_empty();  
284 -}  
285 -  
286 -void  
287 -QPDF::registerStreamFilter(  
288 - std::string const& filter_name, std::function<std::shared_ptr<QPDFStreamFilter>()> factory)  
289 -{  
290 - QPDF_Stream::registerStreamFilter(filter_name, factory);  
291 -}  
292 -  
293 -void  
294 -QPDF::setIgnoreXRefStreams(bool val)  
295 -{  
296 - m->xref_table.ignore_streams(val);  
297 -}  
298 -  
299 -std::shared_ptr<QPDFLogger>  
300 -QPDF::getLogger()  
301 -{  
302 - return m->log;  
303 -}  
304 -  
305 -void  
306 -QPDF::setLogger(std::shared_ptr<QPDFLogger> l)  
307 -{  
308 - m->log = l;  
309 -}  
310 -  
311 -void  
312 -QPDF::setOutputStreams(std::ostream* out, std::ostream* err)  
313 -{  
314 - setLogger(QPDFLogger::create());  
315 - m->log->setOutputStreams(out, err);  
316 -}  
317 -  
318 -void  
319 -QPDF::setSuppressWarnings(bool val)  
320 -{  
321 - m->suppress_warnings = val;  
322 -}  
323 -  
324 -void  
325 -QPDF::setMaxWarnings(size_t val)  
326 -{  
327 - m->max_warnings = val;  
328 -}  
329 -  
330 -void  
331 -QPDF::setAttemptRecovery(bool val)  
332 -{  
333 - m->attempt_recovery = val;  
334 - m->xref_table.attempt_recovery(val);  
335 -}  
336 -  
337 -void  
338 -QPDF::setImmediateCopyFrom(bool val)  
339 -{  
340 - m->immediate_copy_from = val;  
341 -}  
342 -  
343 -std::vector<QPDFExc>  
344 -QPDF::getWarnings()  
345 -{  
346 - std::vector<QPDFExc> result = m->warnings;  
347 - m->warnings.clear();  
348 - return result;  
349 -}  
350 -  
351 -bool  
352 -QPDF::anyWarnings() const  
353 -{  
354 - return !m->warnings.empty();  
355 -}  
356 -  
357 -size_t  
358 -QPDF::numWarnings() const  
359 -{  
360 - return m->warnings.size();  
361 -}  
362 -  
363 -bool  
364 -QPDF::validatePDFVersion(char const*& p, std::string& version)  
365 -{  
366 - bool valid = QUtil::is_digit(*p);  
367 - if (valid) {  
368 - while (QUtil::is_digit(*p)) {  
369 - version.append(1, *p++);  
370 - }  
371 - if ((*p == '.') && QUtil::is_digit(*(p + 1))) {  
372 - version.append(1, *p++);  
373 - while (QUtil::is_digit(*p)) {  
374 - version.append(1, *p++);  
375 - }  
376 - } else {  
377 - valid = false;  
378 - }  
379 - }  
380 - return valid;  
381 -}  
382 -  
383 -bool  
384 -QPDF::findHeader()  
385 -{  
386 - qpdf_offset_t global_offset = m->file->tell();  
387 - std::string line = m->file->readLine(1024);  
388 - char const* p = line.c_str();  
389 - if (strncmp(p, "%PDF-", 5) != 0) {  
390 - throw std::logic_error("findHeader is not looking at %PDF-");  
391 - }  
392 - p += 5;  
393 - std::string version;  
394 - // Note: The string returned by line.c_str() is always null-terminated. The code below never  
395 - // overruns the buffer because a null character always short-circuits further advancement.  
396 - bool valid = validatePDFVersion(p, version);  
397 - if (valid) {  
398 - m->pdf_version = version;  
399 - if (global_offset != 0) {  
400 - // Empirical evidence strongly suggests that when there is leading material prior to the  
401 - // PDF header, all explicit offsets in the file are such that 0 points to the beginning  
402 - // of the header.  
403 - QTC::TC("qpdf", "QPDF global offset");  
404 - m->file_sp =  
405 - std::shared_ptr<InputSource>(new OffsetInputSource(m->file_sp, global_offset));  
406 - m->file = m->file_sp.get();  
407 - }  
408 - }  
409 - return valid;  
410 -}  
411 -  
412 bool 89 bool
413 QPDF::findStartxref() 90 QPDF::findStartxref()
414 { 91 {
@@ -421,64 +98,6 @@ QPDF::findStartxref() @@ -421,64 +98,6 @@ QPDF::findStartxref()
421 } 98 }
422 99
423 void 100 void
424 -QPDF::parse(char const* password)  
425 -{  
426 - if (password) {  
427 - m->encp->provided_password = password;  
428 - }  
429 -  
430 - // Find the header anywhere in the first 1024 bytes of the file.  
431 - PatternFinder hf(*this, &QPDF::findHeader);  
432 - if (!m->file->findFirst("%PDF-", 0, 1024, hf)) {  
433 - QTC::TC("qpdf", "QPDF not a pdf file");  
434 - warn(damagedPDF("", 0, "can't find PDF header"));  
435 - // QPDFWriter writes files that usually require at least version 1.2 for /FlateDecode  
436 - m->pdf_version = "1.2";  
437 - }  
438 -  
439 - m->xref_table.initialize();  
440 - initializeEncryption();  
441 - if (m->xref_table.size() > 0 && !getRoot().getKey("/Pages").isDictionary()) {  
442 - // QPDFs created from JSON have an empty xref table and no root object yet.  
443 - throw damagedPDF("", 0, "unable to find page tree");  
444 - }  
445 -}  
446 -  
447 -void  
448 -QPDF::inParse(bool v)  
449 -{  
450 - if (m->in_parse == v) {  
451 - // This happens if QPDFParser::parse tries to resolve an indirect object while it is  
452 - // parsing.  
453 - throw std::logic_error("QPDF: re-entrant parsing detected. This is a qpdf bug."  
454 - " Please report at https://github.com/qpdf/qpdf/issues.");  
455 - }  
456 - m->in_parse = v;  
457 -}  
458 -  
459 -void  
460 -QPDF::warn(QPDFExc const& e)  
461 -{  
462 - if (m->max_warnings > 0 && m->warnings.size() >= m->max_warnings) {  
463 - stopOnError("Too many warnings - file is too badly damaged");  
464 - }  
465 - m->warnings.push_back(e);  
466 - if (!m->suppress_warnings) {  
467 - *m->log->getWarn() << "WARNING: " << m->warnings.back().what() << "\n";  
468 - }  
469 -}  
470 -  
471 -void  
472 -QPDF::warn(  
473 - qpdf_error_code_e error_code,  
474 - std::string const& object,  
475 - qpdf_offset_t offset,  
476 - std::string const& message)  
477 -{  
478 - warn(QPDFExc(error_code, getFilename(), object, offset, message));  
479 -}  
480 -  
481 -void  
482 QPDF::Xref_table::initialize_empty() 101 QPDF::Xref_table::initialize_empty()
483 { 102 {
484 initialized_ = true; 103 initialized_ = true;
@@ -1384,7 +1003,7 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2) @@ -1384,7 +1003,7 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2)
1384 if (!old_type && entry.gen() > 0) { 1003 if (!old_type && entry.gen() > 0) {
1385 // At the moment we are processing the updates last to first and therefore the gen doesn't 1004 // At the moment we are processing the updates last to first and therefore the gen doesn't
1386 // matter as long as it > 0 to distinguish it from an uninitialized entry. This will need 1005 // matter as long as it > 0 to distinguish it from an uninitialized entry. This will need
1387 - // to be revisited when we want to support incremental updates or more comprhensive 1006 + // to be revisited when we want to support incremental updates or more comprehensive
1388 // checking. 1007 // checking.
1389 QTC::TC("qpdf", "QPDF xref deleted object"); 1008 QTC::TC("qpdf", "QPDF xref deleted object");
1390 return; 1009 return;
@@ -1425,7 +1044,7 @@ QPDF::Xref_table::insert_free(QPDFObjGen og) @@ -1425,7 +1044,7 @@ QPDF::Xref_table::insert_free(QPDFObjGen og)
1425 { 1044 {
1426 // At the moment we are processing the updates last to first and therefore the gen doesn't 1045 // At the moment we are processing the updates last to first and therefore the gen doesn't
1427 // matter as long as it > 0 to distinguish it from an uninitialized entry. This will need to be 1046 // matter as long as it > 0 to distinguish it from an uninitialized entry. This will need to be
1428 - // revisited when we want to support incremental updates or more comprhensive checking. 1047 + // revisited when we want to support incremental updates or more comprehensive checking.
1429 if (og.getObj() < 1) { 1048 if (og.getObj() < 1) {
1430 return; 1049 return;
1431 } 1050 }
@@ -1480,12 +1099,6 @@ QPDF::Xref_table::as_map() const @@ -1480,12 +1099,6 @@ QPDF::Xref_table::as_map() const
1480 } 1099 }
1481 1100
1482 void 1101 void
1483 -QPDF::showXRefTable()  
1484 -{  
1485 - m->xref_table.show();  
1486 -}  
1487 -  
1488 -void  
1489 QPDF::Xref_table::show() 1102 QPDF::Xref_table::show()
1490 { 1103 {
1491 auto& cout = *qpdf.m->log->getInfo(); 1104 auto& cout = *qpdf.m->log->getInfo();
@@ -1533,35 +1146,6 @@ QPDF::Xref_table::resolve() @@ -1533,35 +1146,6 @@ QPDF::Xref_table::resolve()
1533 return true; 1146 return true;
1534 } 1147 }
1535 1148
1536 -// Ensure all objects in the pdf file, including those in indirect references, appear in the object  
1537 -// cache.  
1538 -void  
1539 -QPDF::fixDanglingReferences(bool force)  
1540 -{  
1541 - if (m->fixed_dangling_refs) {  
1542 - return;  
1543 - }  
1544 - if (!m->xref_table.resolve()) {  
1545 - QTC::TC("qpdf", "QPDF fix dangling triggered xref reconstruction");  
1546 - m->xref_table.resolve();  
1547 - }  
1548 - m->fixed_dangling_refs = true;  
1549 -}  
1550 -  
1551 -size_t  
1552 -QPDF::getObjectCount()  
1553 -{  
1554 - // This method returns the next available indirect object number. makeIndirectObject uses it for  
1555 - // this purpose. After fixDanglingReferences is called, all objects in the xref table will also  
1556 - // be in obj_cache.  
1557 - fixDanglingReferences();  
1558 - QPDFObjGen og;  
1559 - if (!m->obj_cache.empty()) {  
1560 - og = (*(m->obj_cache.rbegin())).first;  
1561 - }  
1562 - return toS(og.getObj());  
1563 -}  
1564 -  
1565 std::vector<QPDFObjectHandle> 1149 std::vector<QPDFObjectHandle>
1566 QPDF::getAllObjects() 1150 QPDF::getAllObjects()
1567 { 1151 {
@@ -1574,21 +1158,6 @@ QPDF::getAllObjects() @@ -1574,21 +1158,6 @@ QPDF::getAllObjects()
1574 return result; 1158 return result;
1575 } 1159 }
1576 1160
1577 -void  
1578 -QPDF::setLastObjectDescription(std::string const& description, QPDFObjGen const& og)  
1579 -{  
1580 - m->last_object_description.clear();  
1581 - if (!description.empty()) {  
1582 - m->last_object_description += description;  
1583 - if (og.isIndirect()) {  
1584 - m->last_object_description += ": ";  
1585 - }  
1586 - }  
1587 - if (og.isIndirect()) {  
1588 - m->last_object_description += "object " + og.unparse(' ');  
1589 - }  
1590 -}  
1591 -  
1592 QPDFObjectHandle 1161 QPDFObjectHandle
1593 QPDF::Xref_table::read_trailer() 1162 QPDF::Xref_table::read_trailer()
1594 { 1163 {
@@ -1783,7 +1352,7 @@ QPDF::recoverStreamLength( @@ -1783,7 +1352,7 @@ QPDF::recoverStreamLength(
1783 if (found == QPDFObjGen() || found == og) { 1352 if (found == QPDFObjGen() || found == og) {
1784 // If we are trying to recover an XRef stream the xref table will not contain and 1353 // If we are trying to recover an XRef stream the xref table will not contain and
1785 // won't contain any entries, therefore we cannot check the found length. Otherwise we 1354 // won't contain any entries, therefore we cannot check the found length. Otherwise we
1786 - // found endstream\nendobj within the space allowed for this object, so we're probably 1355 + // found endstream\endobj within the space allowed for this object, so we're probably
1787 // in good shape. 1356 // in good shape.
1788 } else { 1357 } else {
1789 QTC::TC("qpdf", "QPDF found wrong endstream in recovery"); 1358 QTC::TC("qpdf", "QPDF found wrong endstream in recovery");
@@ -1803,12 +1372,6 @@ QPDF::recoverStreamLength( @@ -1803,12 +1372,6 @@ QPDF::recoverStreamLength(
1803 return length; 1372 return length;
1804 } 1373 }
1805 1374
1806 -QPDFTokenizer::Token  
1807 -QPDF::readToken(InputSource& input, size_t max_len)  
1808 -{  
1809 - return m->tokenizer.readToken(input, m->last_object_description, true, max_len);  
1810 -}  
1811 -  
1812 QPDFObjectHandle 1375 QPDFObjectHandle
1813 QPDF::readObjectAtOffset( 1376 QPDF::readObjectAtOffset(
1814 bool try_recovery, 1377 bool try_recovery,
@@ -2122,13 +1685,6 @@ QPDF::resolveObjectsInStream(int obj_stream_number) @@ -2122,13 +1685,6 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
2122 } 1685 }
2123 } 1686 }
2124 1687
2125 -QPDFObjectHandle  
2126 -QPDF::newIndirect(QPDFObjGen const& og, std::shared_ptr<QPDFObject> const& obj)  
2127 -{  
2128 - obj->setDefaultDescription(this, og);  
2129 - return {obj};  
2130 -}  
2131 -  
2132 void 1688 void
2133 QPDF::updateCache(QPDFObjGen const& og, std::shared_ptr<QPDFObject> const& object) 1689 QPDF::updateCache(QPDFObjGen const& og, std::shared_ptr<QPDFObject> const& object)
2134 { 1690 {
@@ -2171,50 +1727,6 @@ QPDF::makeIndirectFromQPDFObject(std::shared_ptr&lt;QPDFObject&gt; const&amp; obj) @@ -2171,50 +1727,6 @@ QPDF::makeIndirectFromQPDFObject(std::shared_ptr&lt;QPDFObject&gt; const&amp; obj)
2171 return newIndirect(next, m->obj_cache[next].object); 1727 return newIndirect(next, m->obj_cache[next].object);
2172 } 1728 }
2173 1729
2174 -QPDFObjectHandle  
2175 -QPDF::makeIndirectObject(QPDFObjectHandle oh)  
2176 -{  
2177 - if (!oh) {  
2178 - throw std::logic_error("attempted to make an uninitialized QPDFObjectHandle indirect");  
2179 - }  
2180 - return makeIndirectFromQPDFObject(oh.getObj());  
2181 -}  
2182 -  
2183 -QPDFObjectHandle  
2184 -QPDF::newReserved()  
2185 -{  
2186 - return makeIndirectFromQPDFObject(QPDF_Reserved::create());  
2187 -}  
2188 -  
2189 -QPDFObjectHandle  
2190 -QPDF::newIndirectNull()  
2191 -{  
2192 - return makeIndirectFromQPDFObject(QPDF_Null::create());  
2193 -}  
2194 -  
2195 -QPDFObjectHandle  
2196 -QPDF::newStream()  
2197 -{  
2198 - return makeIndirectFromQPDFObject(  
2199 - QPDF_Stream::create(this, nextObjGen(), QPDFObjectHandle::newDictionary(), 0, 0));  
2200 -}  
2201 -  
2202 -QPDFObjectHandle  
2203 -QPDF::newStream(std::shared_ptr<Buffer> data)  
2204 -{  
2205 - auto result = newStream();  
2206 - result.replaceStreamData(data, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());  
2207 - return result;  
2208 -}  
2209 -  
2210 -QPDFObjectHandle  
2211 -QPDF::newStream(std::string const& data)  
2212 -{  
2213 - auto result = newStream();  
2214 - result.replaceStreamData(data, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());  
2215 - return result;  
2216 -}  
2217 -  
2218 std::shared_ptr<QPDFObject> 1730 std::shared_ptr<QPDFObject>
2219 QPDF::getObjectForParser(int id, int gen, bool parse_pdf) 1731 QPDF::getObjectForParser(int id, int gen, bool parse_pdf)
2220 { 1732 {
@@ -2246,43 +1758,6 @@ QPDF::getObjectForJSON(int id, int gen) @@ -2246,43 +1758,6 @@ QPDF::getObjectForJSON(int id, int gen)
2246 return obj; 1758 return obj;
2247 } 1759 }
2248 1760
2249 -QPDFObjectHandle  
2250 -QPDF::getObject(QPDFObjGen const& og)  
2251 -{  
2252 - if (auto it = m->obj_cache.find(og); it != m->obj_cache.end()) {  
2253 - return {it->second.object};  
2254 - } else if (m->xref_table.initialized() && !m->xref_table.type(og)) {  
2255 - return QPDF_Null::create();  
2256 - } else {  
2257 - auto result = m->obj_cache.try_emplace(og, QPDF_Unresolved::create(this, og));  
2258 - return {result.first->second.object};  
2259 - }  
2260 -}  
2261 -  
2262 -QPDFObjectHandle  
2263 -QPDF::getObject(int objid, int generation)  
2264 -{  
2265 - return getObject(QPDFObjGen(objid, generation));  
2266 -}  
2267 -  
2268 -QPDFObjectHandle  
2269 -QPDF::getObjectByObjGen(QPDFObjGen const& og)  
2270 -{  
2271 - return getObject(og);  
2272 -}  
2273 -  
2274 -QPDFObjectHandle  
2275 -QPDF::getObjectByID(int objid, int generation)  
2276 -{  
2277 - return getObject(QPDFObjGen(objid, generation));  
2278 -}  
2279 -  
2280 -void  
2281 -QPDF::replaceObject(int objid, int generation, QPDFObjectHandle oh)  
2282 -{  
2283 - replaceObject(QPDFObjGen(objid, generation), oh);  
2284 -}  
2285 -  
2286 void 1761 void
2287 QPDF::replaceObject(QPDFObjGen const& og, QPDFObjectHandle oh) 1762 QPDF::replaceObject(QPDFObjGen const& og, QPDFObjectHandle oh)
2288 { 1763 {
@@ -2305,283 +1780,6 @@ QPDF::removeObject(QPDFObjGen og) @@ -2305,283 +1780,6 @@ QPDF::removeObject(QPDFObjGen og)
2305 } 1780 }
2306 1781
2307 void 1782 void
2308 -QPDF::replaceReserved(QPDFObjectHandle reserved, QPDFObjectHandle replacement)  
2309 -{  
2310 - QTC::TC("qpdf", "QPDF replaceReserved");  
2311 - auto tc = reserved.getTypeCode();  
2312 - if (!(tc == ::ot_reserved || tc == ::ot_null)) {  
2313 - throw std::logic_error("replaceReserved called with non-reserved object");  
2314 - }  
2315 - replaceObject(reserved.getObjGen(), replacement);  
2316 -}  
2317 -  
2318 -QPDFObjectHandle  
2319 -QPDF::copyForeignObject(QPDFObjectHandle foreign)  
2320 -{  
2321 - // Here's an explanation of what's going on here.  
2322 - //  
2323 - // A QPDFObjectHandle that is an indirect object has an owning QPDF. The object ID and  
2324 - // generation refers to an object in the owning QPDF. When we copy the QPDFObjectHandle from a  
2325 - // foreign QPDF into the local QPDF, we have to replace all indirect object references with  
2326 - // references to the corresponding object in the local file.  
2327 - //  
2328 - // To do this, we maintain mappings from foreign object IDs to local object IDs for each foreign  
2329 - // QPDF that we are copying from. The mapping is stored in an ObjCopier, which contains a  
2330 - // mapping from the foreign ObjGen to the local QPDFObjectHandle.  
2331 - //  
2332 - // To copy, we do a deep traversal of the foreign object with loop detection to discover all  
2333 - // indirect objects that are encountered, stopping at page boundaries. Whenever we encounter an  
2334 - // indirect object, we check to see if we have already created a local copy of it. If not, we  
2335 - // allocate a "reserved" object (or, for a stream, just a new stream) and store in the map the  
2336 - // mapping from the foreign object ID to the new object. While we  
2337 - // do this, we keep a list of objects to copy.  
2338 - //  
2339 - // Once we are done with the traversal, we copy all the objects that we need to copy. However,  
2340 - // the copies will contain indirect object IDs that refer to objects in the foreign file. We  
2341 - // need to replace them with references to objects in the local file. This is what  
2342 - // replaceForeignIndirectObjects does. Once we have created a copy of the foreign object with  
2343 - // all the indirect references replaced with new ones in the local context, we can replace the  
2344 - // local reserved object with the copy. This mechanism allows us to copy objects with circular  
2345 - // references in any order.  
2346 -  
2347 - // For streams, rather than copying the objects, we set up the stream data to pull from the  
2348 - // original stream by using a stream data provider. This is done in a manner that doesn't  
2349 - // require the original QPDF object but may require the original source of the stream data with  
2350 - // special handling for immediate_copy_from. This logic is also in  
2351 - // replaceForeignIndirectObjects.  
2352 -  
2353 - // Note that we explicitly allow use of copyForeignObject on page objects. It is a documented  
2354 - // use case to copy pages this way if the intention is to not update the pages tree.  
2355 - if (!foreign.isIndirect()) {  
2356 - QTC::TC("qpdf", "QPDF copyForeign direct");  
2357 - throw std::logic_error("QPDF::copyForeign called with direct object handle");  
2358 - }  
2359 - QPDF& other = foreign.getQPDF();  
2360 - if (&other == this) {  
2361 - QTC::TC("qpdf", "QPDF copyForeign not foreign");  
2362 - throw std::logic_error("QPDF::copyForeign called with object from this QPDF");  
2363 - }  
2364 -  
2365 - ObjCopier& obj_copier = m->object_copiers[other.m->unique_id];  
2366 - if (!obj_copier.visiting.empty()) {  
2367 - throw std::logic_error("obj_copier.visiting is not empty"  
2368 - " at the beginning of copyForeignObject");  
2369 - }  
2370 -  
2371 - // Make sure we have an object in this file for every referenced object in the old file.  
2372 - // obj_copier.object_map maps foreign QPDFObjGen to local objects. For everything new that we  
2373 - // have to copy, the local object will be a reservation, unless it is a stream, in which case  
2374 - // the local object will already be a stream.  
2375 - reserveObjects(foreign, obj_copier, true);  
2376 -  
2377 - if (!obj_copier.visiting.empty()) {  
2378 - throw std::logic_error("obj_copier.visiting is not empty after reserving objects");  
2379 - }  
2380 -  
2381 - // Copy any new objects and replace the reservations.  
2382 - for (auto& to_copy: obj_copier.to_copy) {  
2383 - QPDFObjectHandle copy = replaceForeignIndirectObjects(to_copy, obj_copier, true);  
2384 - if (!to_copy.isStream()) {  
2385 - QPDFObjGen og(to_copy.getObjGen());  
2386 - replaceReserved(obj_copier.object_map[og], copy);  
2387 - }  
2388 - }  
2389 - obj_copier.to_copy.clear();  
2390 -  
2391 - auto og = foreign.getObjGen();  
2392 - if (!obj_copier.object_map.count(og)) {  
2393 - warn(damagedPDF("unexpected reference to /Pages object while copying foreign object; "  
2394 - "replacing with null"));  
2395 - return QPDFObjectHandle::newNull();  
2396 - }  
2397 - return obj_copier.object_map[foreign.getObjGen()];  
2398 -}  
2399 -  
2400 -void  
2401 -QPDF::reserveObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top)  
2402 -{  
2403 - auto foreign_tc = foreign.getTypeCode();  
2404 - if (foreign_tc == ::ot_reserved) {  
2405 - throw std::logic_error("QPDF: attempting to copy a foreign reserved object");  
2406 - }  
2407 -  
2408 - if (foreign.isPagesObject()) {  
2409 - QTC::TC("qpdf", "QPDF not copying pages object");  
2410 - return;  
2411 - }  
2412 -  
2413 - if (foreign.isIndirect()) {  
2414 - QPDFObjGen foreign_og(foreign.getObjGen());  
2415 - if (!obj_copier.visiting.add(foreign_og)) {  
2416 - QTC::TC("qpdf", "QPDF loop reserving objects");  
2417 - return;  
2418 - }  
2419 - if (obj_copier.object_map.count(foreign_og) > 0) {  
2420 - QTC::TC("qpdf", "QPDF already reserved object");  
2421 - if (!(top && foreign.isPageObject() && obj_copier.object_map[foreign_og].isNull())) {  
2422 - obj_copier.visiting.erase(foreign);  
2423 - return;  
2424 - }  
2425 - } else {  
2426 - QTC::TC("qpdf", "QPDF copy indirect");  
2427 - obj_copier.object_map[foreign_og] =  
2428 - foreign.isStream() ? newStream() : newIndirectNull();  
2429 - if ((!top) && foreign.isPageObject()) {  
2430 - QTC::TC("qpdf", "QPDF not crossing page boundary");  
2431 - obj_copier.visiting.erase(foreign_og);  
2432 - return;  
2433 - }  
2434 - }  
2435 - obj_copier.to_copy.push_back(foreign);  
2436 - }  
2437 -  
2438 - if (foreign_tc == ::ot_array) {  
2439 - QTC::TC("qpdf", "QPDF reserve array");  
2440 - int n = foreign.getArrayNItems();  
2441 - for (int i = 0; i < n; ++i) {  
2442 - reserveObjects(foreign.getArrayItem(i), obj_copier, false);  
2443 - }  
2444 - } else if (foreign_tc == ::ot_dictionary) {  
2445 - QTC::TC("qpdf", "QPDF reserve dictionary");  
2446 - for (auto const& key: foreign.getKeys()) {  
2447 - reserveObjects(foreign.getKey(key), obj_copier, false);  
2448 - }  
2449 - } else if (foreign_tc == ::ot_stream) {  
2450 - QTC::TC("qpdf", "QPDF reserve stream");  
2451 - reserveObjects(foreign.getDict(), obj_copier, false);  
2452 - }  
2453 -  
2454 - obj_copier.visiting.erase(foreign);  
2455 -}  
2456 -  
2457 -QPDFObjectHandle  
2458 -QPDF::replaceForeignIndirectObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top)  
2459 -{  
2460 - auto foreign_tc = foreign.getTypeCode();  
2461 - QPDFObjectHandle result;  
2462 - if ((!top) && foreign.isIndirect()) {  
2463 - QTC::TC("qpdf", "QPDF replace indirect");  
2464 - auto mapping = obj_copier.object_map.find(foreign.getObjGen());  
2465 - if (mapping == obj_copier.object_map.end()) {  
2466 - // This case would occur if this is a reference to a Pages object that we didn't  
2467 - // traverse into.  
2468 - QTC::TC("qpdf", "QPDF replace foreign indirect with null");  
2469 - result = QPDFObjectHandle::newNull();  
2470 - } else {  
2471 - result = mapping->second;  
2472 - }  
2473 - } else if (foreign_tc == ::ot_array) {  
2474 - QTC::TC("qpdf", "QPDF replace array");  
2475 - result = QPDFObjectHandle::newArray();  
2476 - int n = foreign.getArrayNItems();  
2477 - for (int i = 0; i < n; ++i) {  
2478 - result.appendItem(  
2479 - // line-break  
2480 - replaceForeignIndirectObjects(foreign.getArrayItem(i), obj_copier, false));  
2481 - }  
2482 - } else if (foreign_tc == ::ot_dictionary) {  
2483 - QTC::TC("qpdf", "QPDF replace dictionary");  
2484 - result = QPDFObjectHandle::newDictionary();  
2485 - std::set<std::string> keys = foreign.getKeys();  
2486 - for (auto const& iter: keys) {  
2487 - result.replaceKey(  
2488 - iter, replaceForeignIndirectObjects(foreign.getKey(iter), obj_copier, false));  
2489 - }  
2490 - } else if (foreign_tc == ::ot_stream) {  
2491 - QTC::TC("qpdf", "QPDF replace stream");  
2492 - result = obj_copier.object_map[foreign.getObjGen()];  
2493 - result.assertStream();  
2494 - QPDFObjectHandle dict = result.getDict();  
2495 - QPDFObjectHandle old_dict = foreign.getDict();  
2496 - std::set<std::string> keys = old_dict.getKeys();  
2497 - for (auto const& iter: keys) {  
2498 - dict.replaceKey(  
2499 - iter, replaceForeignIndirectObjects(old_dict.getKey(iter), obj_copier, false));  
2500 - }  
2501 - copyStreamData(result, foreign);  
2502 - } else {  
2503 - foreign.assertScalar();  
2504 - result = foreign;  
2505 - result.makeDirect();  
2506 - }  
2507 -  
2508 - if (top && (!result.isStream()) && result.isIndirect()) {  
2509 - throw std::logic_error("replacement for foreign object is indirect");  
2510 - }  
2511 -  
2512 - return result;  
2513 -}  
2514 -  
2515 -void  
2516 -QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign)  
2517 -{  
2518 - // This method was originally written for copying foreign streams, but it is used by  
2519 - // QPDFObjectHandle to copy streams from the same QPDF object as well.  
2520 -  
2521 - QPDFObjectHandle dict = result.getDict();  
2522 - QPDFObjectHandle old_dict = foreign.getDict();  
2523 - if (m->copied_stream_data_provider == nullptr) {  
2524 - m->copied_stream_data_provider = new CopiedStreamDataProvider(*this);  
2525 - m->copied_streams =  
2526 - std::shared_ptr<QPDFObjectHandle::StreamDataProvider>(m->copied_stream_data_provider);  
2527 - }  
2528 - QPDFObjGen local_og(result.getObjGen());  
2529 - // Copy information from the foreign stream so we can pipe its data later without keeping the  
2530 - // original QPDF object around.  
2531 -  
2532 - QPDF& foreign_stream_qpdf =  
2533 - foreign.getQPDF("unable to retrieve owning qpdf from foreign stream");  
2534 -  
2535 - auto stream = foreign.getObjectPtr()->as<QPDF_Stream>();  
2536 - if (stream == nullptr) {  
2537 - throw std::logic_error("unable to retrieve underlying"  
2538 - " stream object from foreign stream");  
2539 - }  
2540 - std::shared_ptr<Buffer> stream_buffer = stream->getStreamDataBuffer();  
2541 - if ((foreign_stream_qpdf.m->immediate_copy_from) && (stream_buffer == nullptr)) {  
2542 - // Pull the stream data into a buffer before attempting the copy operation. Do it on the  
2543 - // source stream so that if the source stream is copied multiple times, we don't have to  
2544 - // keep duplicating the memory.  
2545 - QTC::TC("qpdf", "QPDF immediate copy stream data");  
2546 - foreign.replaceStreamData(  
2547 - foreign.getRawStreamData(),  
2548 - old_dict.getKey("/Filter"),  
2549 - old_dict.getKey("/DecodeParms"));  
2550 - stream_buffer = stream->getStreamDataBuffer();  
2551 - }  
2552 - std::shared_ptr<QPDFObjectHandle::StreamDataProvider> stream_provider =  
2553 - stream->getStreamDataProvider();  
2554 - if (stream_buffer.get()) {  
2555 - QTC::TC("qpdf", "QPDF copy foreign stream with buffer");  
2556 - result.replaceStreamData(  
2557 - stream_buffer, dict.getKey("/Filter"), dict.getKey("/DecodeParms"));  
2558 - } else if (stream_provider.get()) {  
2559 - // In this case, the remote stream's QPDF must stay in scope.  
2560 - QTC::TC("qpdf", "QPDF copy foreign stream with provider");  
2561 - m->copied_stream_data_provider->registerForeignStream(local_og, foreign);  
2562 - result.replaceStreamData(  
2563 - m->copied_streams, dict.getKey("/Filter"), dict.getKey("/DecodeParms"));  
2564 - } else {  
2565 - auto foreign_stream_data = std::make_shared<ForeignStreamData>(  
2566 - foreign_stream_qpdf.m->encp,  
2567 - foreign_stream_qpdf.m->file_sp,  
2568 - foreign.getObjGen(),  
2569 - stream->getParsedOffset(),  
2570 - stream->getLength(),  
2571 - dict);  
2572 - m->copied_stream_data_provider->registerForeignStream(local_og, foreign_stream_data);  
2573 - result.replaceStreamData(  
2574 - m->copied_streams, dict.getKey("/Filter"), dict.getKey("/DecodeParms"));  
2575 - }  
2576 -}  
2577 -  
2578 -void  
2579 -QPDF::swapObjects(int objid1, int generation1, int objid2, int generation2)  
2580 -{  
2581 - swapObjects(QPDFObjGen(objid1, generation1), QPDFObjGen(objid2, generation2));  
2582 -}  
2583 -  
2584 -void  
2585 QPDF::swapObjects(QPDFObjGen const& og1, QPDFObjGen const& og2) 1783 QPDF::swapObjects(QPDFObjGen const& og1, QPDFObjGen const& og2)
2586 { 1784 {
2587 // Force objects to be read from the input source if needed, then swap them in the cache. 1785 // Force objects to be read from the input source if needed, then swap them in the cache.
@@ -2590,92 +1788,6 @@ QPDF::swapObjects(QPDFObjGen const&amp; og1, QPDFObjGen const&amp; og2) @@ -2590,92 +1788,6 @@ QPDF::swapObjects(QPDFObjGen const&amp; og1, QPDFObjGen const&amp; og2)
2590 m->obj_cache[og1].object->swapWith(m->obj_cache[og2].object); 1788 m->obj_cache[og1].object->swapWith(m->obj_cache[og2].object);
2591 } 1789 }
2592 1790
2593 -unsigned long long  
2594 -QPDF::getUniqueId() const  
2595 -{  
2596 - return m->unique_id;  
2597 -}  
2598 -  
2599 -std::string  
2600 -QPDF::getFilename() const  
2601 -{  
2602 - return m->file->getName();  
2603 -}  
2604 -  
2605 -PDFVersion  
2606 -QPDF::getVersionAsPDFVersion()  
2607 -{  
2608 - int major = 1;  
2609 - int minor = 3;  
2610 - int extension_level = getExtensionLevel();  
2611 -  
2612 - std::regex v("^[[:space:]]*([0-9]+)\\.([0-9]+)");  
2613 - std::smatch match;  
2614 - if (std::regex_search(m->pdf_version, match, v)) {  
2615 - major = QUtil::string_to_int(match[1].str().c_str());  
2616 - minor = QUtil::string_to_int(match[2].str().c_str());  
2617 - }  
2618 -  
2619 - return {major, minor, extension_level};  
2620 -}  
2621 -  
2622 -std::string  
2623 -QPDF::getPDFVersion() const  
2624 -{  
2625 - return m->pdf_version;  
2626 -}  
2627 -  
2628 -int  
2629 -QPDF::getExtensionLevel()  
2630 -{  
2631 - int result = 0;  
2632 - QPDFObjectHandle obj = getRoot();  
2633 - if (obj.hasKey("/Extensions")) {  
2634 - obj = obj.getKey("/Extensions");  
2635 - if (obj.isDictionary() && obj.hasKey("/ADBE")) {  
2636 - obj = obj.getKey("/ADBE");  
2637 - if (obj.isDictionary() && obj.hasKey("/ExtensionLevel")) {  
2638 - obj = obj.getKey("/ExtensionLevel");  
2639 - if (obj.isInteger()) {  
2640 - result = obj.getIntValueAsInt();  
2641 - }  
2642 - }  
2643 - }  
2644 - }  
2645 - return result;  
2646 -}  
2647 -  
2648 -QPDFObjectHandle  
2649 -QPDF::getTrailer()  
2650 -{  
2651 - return m->xref_table.trailer();  
2652 -}  
2653 -  
2654 -QPDFObjectHandle  
2655 -QPDF::getRoot()  
2656 -{  
2657 - QPDFObjectHandle root = m->xref_table.trailer().getKey("/Root");  
2658 - if (!root.isDictionary()) {  
2659 - throw damagedPDF("", 0, "unable to find /Root dictionary");  
2660 - } else if (  
2661 - // Check_mode is an interim solution to request #810 pending a more comprehensive review of  
2662 - // the approach to more extensive checks and warning levels.  
2663 - m->check_mode && !root.getKey("/Type").isNameAndEquals("/Catalog")) {  
2664 - warn(damagedPDF("", 0, "catalog /Type entry missing or invalid"));  
2665 - root.replaceKey("/Type", "/Catalog"_qpdf);  
2666 - }  
2667 - return root;  
2668 -}  
2669 -  
2670 -std::map<QPDFObjGen, QPDFXRefEntry>  
2671 -QPDF::getXRefTable()  
2672 -{  
2673 - if (!m->xref_table.initialized()) {  
2674 - throw std::logic_error("QPDF::getXRefTable called before parsing.");  
2675 - }  
2676 - return m->xref_table.as_map();  
2677 -}  
2678 -  
2679 size_t 1791 size_t
2680 QPDF::tableSize() 1792 QPDF::tableSize()
2681 { 1793 {
@@ -2806,192 +1918,3 @@ QPDF::getCompressibleObjGens() @@ -2806,192 +1918,3 @@ QPDF::getCompressibleObjGens()
2806 1918
2807 return result; 1919 return result;
2808 } 1920 }
2809 -  
2810 -bool  
2811 -QPDF::pipeStreamData(  
2812 - std::shared_ptr<EncryptionParameters> encp,  
2813 - std::shared_ptr<InputSource> file,  
2814 - QPDF& qpdf_for_warning,  
2815 - QPDFObjGen const& og,  
2816 - qpdf_offset_t offset,  
2817 - size_t length,  
2818 - QPDFObjectHandle stream_dict,  
2819 - Pipeline* pipeline,  
2820 - bool suppress_warnings,  
2821 - bool will_retry)  
2822 -{  
2823 - std::unique_ptr<Pipeline> to_delete;  
2824 - if (encp->encrypted) {  
2825 - decryptStream(encp, file, qpdf_for_warning, pipeline, og, stream_dict, to_delete);  
2826 - }  
2827 -  
2828 - bool attempted_finish = false;  
2829 - try {  
2830 - file->seek(offset, SEEK_SET);  
2831 - auto buf = std::make_unique<char[]>(length);  
2832 - if (auto read = file->read(buf.get(), length); read != length) {  
2833 - throw damagedPDF(*file, "", offset + toO(read), "unexpected EOF reading stream data");  
2834 - }  
2835 - pipeline->write(buf.get(), length);  
2836 - attempted_finish = true;  
2837 - pipeline->finish();  
2838 - return true;  
2839 - } catch (QPDFExc& e) {  
2840 - if (!suppress_warnings) {  
2841 - qpdf_for_warning.warn(e);  
2842 - }  
2843 - } catch (std::exception& e) {  
2844 - if (!suppress_warnings) {  
2845 - QTC::TC("qpdf", "QPDF decoding error warning");  
2846 - qpdf_for_warning.warn(  
2847 - // line-break  
2848 - damagedPDF(  
2849 - *file,  
2850 - "",  
2851 - file->getLastOffset(),  
2852 - ("error decoding stream data for object " + og.unparse(' ') + ": " +  
2853 - e.what())));  
2854 - if (will_retry) {  
2855 - qpdf_for_warning.warn(  
2856 - // line-break  
2857 - damagedPDF(  
2858 - *file,  
2859 - "",  
2860 - file->getLastOffset(),  
2861 - "stream will be re-processed without filtering to avoid data loss"));  
2862 - }  
2863 - }  
2864 - }  
2865 - if (!attempted_finish) {  
2866 - try {  
2867 - pipeline->finish();  
2868 - } catch (std::exception&) {  
2869 - // ignore  
2870 - }  
2871 - }  
2872 - return false;  
2873 -}  
2874 -  
2875 -bool  
2876 -QPDF::pipeStreamData(  
2877 - QPDFObjGen const& og,  
2878 - qpdf_offset_t offset,  
2879 - size_t length,  
2880 - QPDFObjectHandle stream_dict,  
2881 - Pipeline* pipeline,  
2882 - bool suppress_warnings,  
2883 - bool will_retry)  
2884 -{  
2885 - return pipeStreamData(  
2886 - m->encp,  
2887 - m->file_sp,  
2888 - *this,  
2889 - og,  
2890 - offset,  
2891 - length,  
2892 - stream_dict,  
2893 - pipeline,  
2894 - suppress_warnings,  
2895 - will_retry);  
2896 -}  
2897 -  
2898 -bool  
2899 -QPDF::pipeForeignStreamData(  
2900 - std::shared_ptr<ForeignStreamData> foreign,  
2901 - Pipeline* pipeline,  
2902 - bool suppress_warnings,  
2903 - bool will_retry)  
2904 -{  
2905 - if (foreign->encp->encrypted) {  
2906 - QTC::TC("qpdf", "QPDF pipe foreign encrypted stream");  
2907 - }  
2908 - return pipeStreamData(  
2909 - foreign->encp,  
2910 - foreign->file,  
2911 - *this,  
2912 - foreign->foreign_og,  
2913 - foreign->offset,  
2914 - foreign->length,  
2915 - foreign->local_dict,  
2916 - pipeline,  
2917 - suppress_warnings,  
2918 - will_retry);  
2919 -}  
2920 -  
2921 -// Throw a generic exception when we lack context for something more specific. New code should not  
2922 -// use this. This method exists to improve somewhat from calling assert in very old code.  
2923 -void  
2924 -QPDF::stopOnError(std::string const& message)  
2925 -{  
2926 - throw damagedPDF("", message);  
2927 -}  
2928 -  
2929 -// Return an exception of type qpdf_e_damaged_pdf.  
2930 -QPDFExc  
2931 -QPDF::damagedPDF(  
2932 - InputSource& input, std::string const& object, qpdf_offset_t offset, std::string const& message)  
2933 -{  
2934 - return {qpdf_e_damaged_pdf, input.getName(), object, offset, message};  
2935 -}  
2936 -  
2937 -// Return an exception of type qpdf_e_damaged_pdf. The object is taken from  
2938 -// m->last_object_description.  
2939 -QPDFExc  
2940 -QPDF::damagedPDF(InputSource& input, qpdf_offset_t offset, std::string const& message)  
2941 -{  
2942 - return damagedPDF(input, m->last_object_description, offset, message);  
2943 -}  
2944 -  
2945 -// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file.  
2946 -QPDFExc  
2947 -QPDF::damagedPDF(std::string const& object, qpdf_offset_t offset, std::string const& message)  
2948 -{  
2949 - return {qpdf_e_damaged_pdf, m->file->getName(), object, offset, message};  
2950 -}  
2951 -  
2952 -// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the  
2953 -// offset from .m->file->getLastOffset().  
2954 -QPDFExc  
2955 -QPDF::damagedPDF(std::string const& object, std::string const& message)  
2956 -{  
2957 - return damagedPDF(object, m->file->getLastOffset(), message);  
2958 -}  
2959 -  
2960 -// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the object  
2961 -// from .m->last_object_description.  
2962 -QPDFExc  
2963 -QPDF::damagedPDF(qpdf_offset_t offset, std::string const& message)  
2964 -{  
2965 - return damagedPDF(m->last_object_description, offset, message);  
2966 -}  
2967 -  
2968 -// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file, the object  
2969 -// from m->last_object_description and the offset from m->file->getLastOffset().  
2970 -QPDFExc  
2971 -QPDF::damagedPDF(std::string const& message)  
2972 -{  
2973 - return damagedPDF(m->last_object_description, m->file->getLastOffset(), message);  
2974 -}  
2975 -  
2976 -bool  
2977 -QPDF::everCalledGetAllPages() const  
2978 -{  
2979 - return m->ever_called_get_all_pages;  
2980 -}  
2981 -  
2982 -bool  
2983 -QPDF::everPushedInheritedAttributesToPages() const  
2984 -{  
2985 - return m->ever_pushed_inherited_attributes_to_pages;  
2986 -}  
2987 -  
2988 -void  
2989 -QPDF::removeSecurityRestrictions()  
2990 -{  
2991 - auto root = getRoot();  
2992 - root.removeKey("/Perms");  
2993 - auto acroform = root.getKey("/AcroForm");  
2994 - if (acroform.isDictionary() && acroform.hasKey("/SigFlags")) {  
2995 - acroform.replaceKey("/SigFlags", QPDFObjectHandle::newInteger(0));  
2996 - }  
2997 -}