Commit 92fbc6fdf56bad6aff8c9f3f1a3032d5ad36ec51

Authored by Jay Berkenbilt
1 parent 60afe414

QPDFObjectHandle::copyStream

ChangeLog
1 2021-02-21 Jay Berkenbilt <ejb@ql.org> 1 2021-02-21 Jay Berkenbilt <ejb@ql.org>
2 2
  3 + * Add QPDFObjectHandle::copyStream() for making a copy of a stream
  4 + within the same QPDF instance.
  5 +
3 * Allow QPDFObjectHandle::newArray and 6 * Allow QPDFObjectHandle::newArray and
4 QPDFObjectHandle::newFromMatrix take QPDFMatrix as well as 7 QPDFObjectHandle::newFromMatrix take QPDFMatrix as well as
5 QPDFObjectHandle::Matrix 8 QPDFObjectHandle::Matrix
include/qpdf/QPDF.hh
@@ -700,6 +700,21 @@ class QPDF @@ -700,6 +700,21 @@ class QPDF
700 }; 700 };
701 friend class Resolver; 701 friend class Resolver;
702 702
  703 + // StreamCopier class is restricted to QPDFObjectHandle so it can
  704 + // copy stream data.
  705 + class StreamCopier
  706 + {
  707 + friend class QPDFObjectHandle;
  708 + private:
  709 + static void copyStreamData(QPDF* qpdf,
  710 + QPDFObjectHandle const& dest,
  711 + QPDFObjectHandle const& src)
  712 + {
  713 + qpdf->copyStreamData(dest, src);
  714 + }
  715 + };
  716 + friend class Resolver;
  717 +
703 // ParseGuard class allows QPDFObjectHandle to detect re-entrant 718 // ParseGuard class allows QPDFObjectHandle to detect re-entrant
704 // resolution 719 // resolution
705 class ParseGuard 720 class ParseGuard
include/qpdf/QPDFObjectHandle.hh
@@ -761,7 +761,8 @@ class QPDFObjectHandle @@ -761,7 +761,8 @@ class QPDFObjectHandle
761 // the same place. In the strictest sense, this is not a shallow 761 // the same place. In the strictest sense, this is not a shallow
762 // copy because it recursively descends arrays and dictionaries; 762 // copy because it recursively descends arrays and dictionaries;
763 // it just doesn't cross over indirect objects. See also 763 // it just doesn't cross over indirect objects. See also
764 - // unsafeShallowCopy(). 764 + // unsafeShallowCopy(). You can't copy a stream this way. See
  765 + // copyStream() instead.
765 QPDF_DLL 766 QPDF_DLL
766 QPDFObjectHandle shallowCopy(); 767 QPDFObjectHandle shallowCopy();
767 768
@@ -776,6 +777,19 @@ class QPDFObjectHandle @@ -776,6 +777,19 @@ class QPDFObjectHandle
776 QPDF_DLL 777 QPDF_DLL
777 QPDFObjectHandle unsafeShallowCopy(); 778 QPDFObjectHandle unsafeShallowCopy();
778 779
  780 + // Create a copy of this stream. The new stream and the old stream
  781 + // are independent: after the copy, either the original or the
  782 + // copy's dictionary or data can be modified without affecting the
  783 + // other. This uses StreamDataProvider internally, so no
  784 + // unnecessary copies of the stream's data are made. If the source
  785 + // stream's data is already being provided by a
  786 + // StreamDataProvider, the new stream will use the same one, so
  787 + // you have to make sure your StreamDataProvider can handle that
  788 + // case. But if you're already using a StreamDataProvider, you
  789 + // probably don't need to call this method.
  790 + QPDF_DLL
  791 + QPDFObjectHandle copyStream();
  792 +
779 // Mutator methods. Use with caution. 793 // Mutator methods. Use with caution.
780 794
781 // Recursively copy this object, making it direct. An exception is 795 // Recursively copy this object, making it direct. An exception is
libqpdf/QPDF.cc
@@ -2596,6 +2596,10 @@ QPDF::replaceForeignIndirectObjects( @@ -2596,6 +2596,10 @@ QPDF::replaceForeignIndirectObjects(
2596 void 2596 void
2597 QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign) 2597 QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign)
2598 { 2598 {
  2599 + // This method was originally written for copying foreign streams,
  2600 + // but it is used by QPDFObjectHandle to copy streams from the
  2601 + // same QPDF object as well.
  2602 +
2599 QPDFObjectHandle dict = result.getDict(); 2603 QPDFObjectHandle dict = result.getDict();
2600 QPDFObjectHandle old_dict = foreign.getDict(); 2604 QPDFObjectHandle old_dict = foreign.getDict();
2601 if (this->m->copied_stream_data_provider == 0) 2605 if (this->m->copied_stream_data_provider == 0)
libqpdf/QPDFObjectHandle.cc
@@ -2877,6 +2877,28 @@ QPDFObjectHandle::copyObject(std::set&lt;QPDFObjGen&gt;&amp; visited, @@ -2877,6 +2877,28 @@ QPDFObjectHandle::copyObject(std::set&lt;QPDFObjGen&gt;&amp; visited,
2877 } 2877 }
2878 } 2878 }
2879 2879
  2880 +QPDFObjectHandle
  2881 +QPDFObjectHandle::copyStream()
  2882 +{
  2883 + assertStream();
  2884 + QPDFObjectHandle result = newStream(this->getOwningQPDF());
  2885 + QPDFObjectHandle dict = result.getDict();
  2886 + QPDFObjectHandle old_dict = getDict();
  2887 + for (auto& iter: QPDFDictItems(old_dict))
  2888 + {
  2889 + if (iter.second.isIndirect())
  2890 + {
  2891 + dict.replaceKey(iter.first, iter.second);
  2892 + }
  2893 + else
  2894 + {
  2895 + dict.replaceKey(iter.first, iter.second.shallowCopy());
  2896 + }
  2897 + }
  2898 + QPDF::StreamCopier::copyStreamData(getOwningQPDF(), result, *this);
  2899 + return result;
  2900 +}
  2901 +
2880 void 2902 void
2881 QPDFObjectHandle::makeDirect() 2903 QPDFObjectHandle::makeDirect()
2882 { 2904 {
manual/qpdf-manual.xml
@@ -5202,6 +5202,13 @@ print &quot;\n&quot;; @@ -5202,6 +5202,13 @@ print &quot;\n&quot;;
5202 </listitem> 5202 </listitem>
5203 <listitem> 5203 <listitem>
5204 <para> 5204 <para>
  5205 + Add <function>QPDFObjectHandle::copyStream</function> for
  5206 + making a copy of a stream within the same
  5207 + <classname>QPDF</classname> instance.
  5208 + </para>
  5209 + </listitem>
  5210 + <listitem>
  5211 + <para>
5205 Add <function>QUtil::get_current_qpdf_time</function>, 5212 Add <function>QUtil::get_current_qpdf_time</function>,
5206 <function>QUtil::pdf_time_to_qpdf_time</function>, and 5213 <function>QUtil::pdf_time_to_qpdf_time</function>, and
5207 <function>QUtil::qpdf_time_to_pdf_time</function> for 5214 <function>QUtil::qpdf_time_to_pdf_time</function> for
qpdf/qtest/qpdf.test
@@ -1549,7 +1549,7 @@ unlink &quot;a.pdf&quot; or die; @@ -1549,7 +1549,7 @@ unlink &quot;a.pdf&quot; or die;
1549 show_ntests(); 1549 show_ntests();
1550 # ---------- 1550 # ----------
1551 $td->notify("--- Object copying ---"); 1551 $td->notify("--- Object copying ---");
1552 -$n_tests += 7; 1552 +$n_tests += 9;
1553 1553
1554 $td->runtest("shallow copy an array", 1554 $td->runtest("shallow copy an array",
1555 {$td->COMMAND => "test_driver 20 shallow_array.pdf"}, 1555 {$td->COMMAND => "test_driver 20 shallow_array.pdf"},
@@ -1578,6 +1578,13 @@ $td-&gt;runtest(&quot;detect foreign object in write&quot;, @@ -1578,6 +1578,13 @@ $td-&gt;runtest(&quot;detect foreign object in write&quot;,
1578 " copy-foreign-objects-in.pdf minimal.pdf"}, 1578 " copy-foreign-objects-in.pdf minimal.pdf"},
1579 {$td->FILE => "foreign-in-write.out", $td->EXIT_STATUS => 0}, 1579 {$td->FILE => "foreign-in-write.out", $td->EXIT_STATUS => 0},
1580 $td->NORMALIZE_NEWLINES); 1580 $td->NORMALIZE_NEWLINES);
  1581 +$td->runtest("copy a stream",
  1582 + {$td->COMMAND => "test_driver 79 minimal.pdf"},
  1583 + {$td->STRING => "test 79 done\n", $td->EXIT_STATUS => 0},
  1584 + $td->NORMALIZE_NEWLINES);
  1585 +$td->runtest("check output",
  1586 + {$td->FILE => "a.pdf"},
  1587 + {$td->FILE => "test79.pdf"});
1581 1588
1582 show_ntests(); 1589 show_ntests();
1583 # ---------- 1590 # ----------
qpdf/qtest/qpdf/test79.pdf 0 โ†’ 100644
  1 +%PDF-1.3
  2 +%ยฟรทยขรพ
  3 +%QDF-1.0
  4 +
  5 +%% Original object ID: 1 0
  6 +1 0 obj
  7 +<<
  8 + /Pages 14 0 R
  9 + /Type /Catalog
  10 +>>
  11 +endobj
  12 +
  13 +%% Original object ID: 10 0
  14 +2 0 obj
  15 +<<
  16 + /Other (other: 1)
  17 + /Length 3 0 R
  18 +>>
  19 +stream
  20 +BT
  21 + /F1 24 Tf
  22 + 72 720 Td
  23 + (Potato) Tj
  24 +ET
  25 +endstream
  26 +endobj
  27 +
  28 +3 0 obj
  29 +44
  30 +endobj
  31 +
  32 +%% Original object ID: 11 0
  33 +4 0 obj
  34 +<<
  35 + /Other (other: 2)
  36 + /Stuff <<
  37 + /Direct 3
  38 + /Indirect 15 0 R
  39 + >>
  40 + /Length 5 0 R
  41 +>>
  42 +stream
  43 +from string
  44 +endstream
  45 +endobj
  46 +
  47 +%QDF: ignore_newline
  48 +5 0 obj
  49 +11
  50 +endobj
  51 +
  52 +%% Original object ID: 12 0
  53 +6 0 obj
  54 +<<
  55 + /Other (other: 3)
  56 + /Length 7 0 R
  57 +>>
  58 +stream
  59 +from buffer
  60 +endstream
  61 +endobj
  62 +
  63 +%QDF: ignore_newline
  64 +7 0 obj
  65 +11
  66 +endobj
  67 +
  68 +%% Contents for page 1
  69 +%% Original object ID: 4 0
  70 +8 0 obj
  71 +<<
  72 + /Length 9 0 R
  73 +>>
  74 +stream
  75 +something new 1
  76 +endstream
  77 +endobj
  78 +
  79 +%QDF: ignore_newline
  80 +9 0 obj
  81 +15
  82 +endobj
  83 +
  84 +%% Original object ID: 7 0
  85 +10 0 obj
  86 +<<
  87 + /Other (other stuff)
  88 + /Stuff <<
  89 + /Direct 3
  90 + /Indirect 15 0 R
  91 + >>
  92 + /Length 11 0 R
  93 +>>
  94 +stream
  95 +something new 2
  96 +endstream
  97 +endobj
  98 +
  99 +%QDF: ignore_newline
  100 +11 0 obj
  101 +15
  102 +endobj
  103 +
  104 +%% Original object ID: 9 0
  105 +12 0 obj
  106 +<<
  107 + /Length 13 0 R
  108 +>>
  109 +stream
  110 +something new 3
  111 +endstream
  112 +endobj
  113 +
  114 +%QDF: ignore_newline
  115 +13 0 obj
  116 +15
  117 +endobj
  118 +
  119 +%% Original object ID: 2 0
  120 +14 0 obj
  121 +<<
  122 + /Count 1
  123 + /Kids [
  124 + 16 0 R
  125 + ]
  126 + /Type /Pages
  127 +>>
  128 +endobj
  129 +
  130 +%% Original object ID: 8 0
  131 +15 0 obj
  132 +16059
  133 +endobj
  134 +
  135 +%% Page 1
  136 +%% Original object ID: 3 0
  137 +16 0 obj
  138 +<<
  139 + /Contents 8 0 R
  140 + /MediaBox [
  141 + 0
  142 + 0
  143 + 612
  144 + 792
  145 + ]
  146 + /Parent 14 0 R
  147 + /Resources <<
  148 + /Font <<
  149 + /F1 17 0 R
  150 + >>
  151 + /ProcSet 18 0 R
  152 + >>
  153 + /Type /Page
  154 +>>
  155 +endobj
  156 +
  157 +%% Original object ID: 6 0
  158 +17 0 obj
  159 +<<
  160 + /BaseFont /Helvetica
  161 + /Encoding /WinAnsiEncoding
  162 + /Name /F1
  163 + /Subtype /Type1
  164 + /Type /Font
  165 +>>
  166 +endobj
  167 +
  168 +%% Original object ID: 5 0
  169 +18 0 obj
  170 +[
  171 + /PDF
  172 + /Text
  173 +]
  174 +endobj
  175 +
  176 +xref
  177 +0 19
  178 +0000000000 65535 f
  179 +0000000052 00000 n
  180 +0000000135 00000 n
  181 +0000000254 00000 n
  182 +0000000301 00000 n
  183 +0000000461 00000 n
  184 +0000000508 00000 n
  185 +0000000616 00000 n
  186 +0000000685 00000 n
  187 +0000000777 00000 n
  188 +0000000823 00000 n
  189 +0000000992 00000 n
  190 +0000001039 00000 n
  191 +0000001133 00000 n
  192 +0000001180 00000 n
  193 +0000001281 00000 n
  194 +0000001341 00000 n
  195 +0000001564 00000 n
  196 +0000001710 00000 n
  197 +trailer <<
  198 + /Copies [
  199 + 2 0 R
  200 + 4 0 R
  201 + 6 0 R
  202 + ]
  203 + /Originals [
  204 + 8 0 R
  205 + 10 0 R
  206 + 12 0 R
  207 + ]
  208 + /Root 1 0 R
  209 + /Size 19
  210 + /ID [<31415926535897932384626433832795><31415926535897932384626433832795>]
  211 +>>
  212 +startxref
  213 +1746
  214 +%%EOF
qpdf/test_driver.cc
@@ -2844,6 +2844,67 @@ void runtest(int n, char const* filename1, char const* arg2) @@ -2844,6 +2844,67 @@ void runtest(int n, char const* filename1, char const* arg2)
2844 w.setQDFMode(true); 2844 w.setQDFMode(true);
2845 w.write(); 2845 w.write();
2846 } 2846 }
  2847 + else if (n == 79)
  2848 + {
  2849 + // Exercise stream copier
  2850 +
  2851 + // Copy streams. Modify the original and make sure the copy is
  2852 + // unaffected.
  2853 + auto copies = QPDFObjectHandle::newArray();
  2854 + pdf.getTrailer().replaceKey("/Copies", copies);
  2855 + auto null = QPDFObjectHandle::newNull();
  2856 +
  2857 + // Get a regular stream from the file
  2858 + auto p1 = pdf.getAllPages().at(0);
  2859 + auto s1 = p1.getKey("/Contents");
  2860 +
  2861 + // Create a stream from a string
  2862 + auto s2 = QPDFObjectHandle::newStream(&pdf, "from string");
  2863 + // Add direct and indirect objects to the dictionary
  2864 + s2.getDict().replaceKey(
  2865 + "/Stuff",
  2866 + QPDFObjectHandle::parse(
  2867 + &pdf,
  2868 + "<< /Direct 3 /Indirect " +
  2869 + pdf.makeIndirectObject(
  2870 + QPDFObjectHandle::newInteger(16059)).unparse() + ">>"));
  2871 + s2.getDict().replaceKey(
  2872 + "/Other", QPDFObjectHandle::newString("other stuff"));
  2873 +
  2874 + // Use a provider
  2875 + Pl_Buffer b("buffer");
  2876 + b.write(QUtil::unsigned_char_pointer("from buffer"), 11);
  2877 + b.finish();
  2878 + PointerHolder<Buffer> bp = b.getBuffer();
  2879 + auto s3 = QPDFObjectHandle::newStream(&pdf, bp);
  2880 +
  2881 + std::vector<QPDFObjectHandle> streams = {s1, s2, s3};
  2882 + pdf.getTrailer().replaceKey(
  2883 + "/Originals", QPDFObjectHandle::newArray(streams));
  2884 +
  2885 + int i = 0;
  2886 + for (auto orig: streams)
  2887 + {
  2888 + ++i;
  2889 + auto istr = QUtil::int_to_string(i);
  2890 + auto orig_data = orig.getStreamData();
  2891 + auto copy = orig.copyStream();
  2892 + copy.getDict().replaceKey(
  2893 + "/Other", QPDFObjectHandle::newString("other: " + istr));
  2894 + orig.replaceStreamData("something new " + istr, null, null);
  2895 + auto copy_data = copy.getStreamData();
  2896 + assert(orig_data->getSize() == copy_data->getSize());
  2897 + assert(memcmp(orig_data->getBuffer(),
  2898 + copy_data->getBuffer(),
  2899 + orig_data->getSize()) == 0);
  2900 + copies.appendItem(copy);
  2901 + }
  2902 +
  2903 + QPDFWriter w(pdf, "a.pdf");
  2904 + w.setStaticID(true);
  2905 + w.setQDFMode(true);
  2906 + w.write();
  2907 + }
2847 else 2908 else
2848 { 2909 {
2849 throw std::runtime_error(std::string("invalid test ") + 2910 throw std::runtime_error(std::string("invalid test ") +