Commit 5a842792b69550cf441d4598feb1daff2fa8c83f

Authored by Masamichi Hosoda
Committed by Jay Berkenbilt
1 parent cdc46d78

Parse Contents in signature dictionary without encryption

Various PDF digital signing tools do not encrypt /Contents value in
signature dictionary. Adobe Acrobat Reader DC can handle a PDF with
the /Contents value not encrypted.

Write Contents in signature dictionary without encryption

Tests ensure that string /Contents are not handled specially when not
found in sig dicts.
include/qpdf/QPDFWriter.hh
... ... @@ -481,6 +481,7 @@ class QPDFWriter
481 481 static int const f_filtered = 1 << 1;
482 482 static int const f_in_ostream = 1 << 2;
483 483 static int const f_hex_string = 1 << 3;
  484 + static int const f_no_encryption = 1 << 4;
484 485  
485 486 enum trailer_e { t_normal, t_lin_first, t_lin_second };
486 487  
... ...
libqpdf/QPDFObjectHandle.cc
... ... @@ -1779,12 +1779,19 @@ QPDFObjectHandle::parseInternal(PointerHolder&lt;InputSource&gt; input,
1779 1779 bool done = false;
1780 1780 int bad_count = 0;
1781 1781 int good_count = 0;
  1782 + bool b_contents = false;
  1783 + std::vector<std::string> contents_string_stack;
  1784 + contents_string_stack.push_back("");
  1785 + std::vector<qpdf_offset_t> contents_offset_stack;
  1786 + contents_offset_stack.push_back(-1);
1782 1787 while (! done)
1783 1788 {
1784 1789 bool bad = false;
1785 1790 SparseOHArray& olist = olist_stack.back();
1786 1791 parser_state_e state = state_stack.back();
1787 1792 offset = offset_stack.back();
  1793 + std::string& contents_string = contents_string_stack.back();
  1794 + qpdf_offset_t& contents_offset = contents_offset_stack.back();
1788 1795  
1789 1796 object = QPDFObjectHandle();
1790 1797 set_offset = false;
... ... @@ -1894,6 +1901,9 @@ QPDFObjectHandle::parseInternal(PointerHolder&lt;InputSource&gt; input,
1894 1901 state_stack.push_back(
1895 1902 (token.getType() == QPDFTokenizer::tt_array_open) ?
1896 1903 st_array : st_dictionary);
  1904 + b_contents = false;
  1905 + contents_string_stack.push_back("");
  1906 + contents_offset_stack.push_back(-1);
1897 1907 }
1898 1908 break;
1899 1909  
... ... @@ -1914,7 +1924,19 @@ QPDFObjectHandle::parseInternal(PointerHolder&lt;InputSource&gt; input,
1914 1924 break;
1915 1925  
1916 1926 case QPDFTokenizer::tt_name:
1917   - object = newName(token.getValue());
  1927 + {
  1928 + std::string name = token.getValue();
  1929 + object = newName(name);
  1930 +
  1931 + if (name == "/Contents")
  1932 + {
  1933 + b_contents = true;
  1934 + }
  1935 + else
  1936 + {
  1937 + b_contents = false;
  1938 + }
  1939 + }
1918 1940 break;
1919 1941  
1920 1942 case QPDFTokenizer::tt_word:
... ... @@ -1975,6 +1997,12 @@ QPDFObjectHandle::parseInternal(PointerHolder&lt;InputSource&gt; input,
1975 1997 std::string val = token.getValue();
1976 1998 if (decrypter)
1977 1999 {
  2000 + if (b_contents)
  2001 + {
  2002 + contents_string = val;
  2003 + contents_offset = input->getLastOffset();
  2004 + b_contents = false;
  2005 + }
1978 2006 decrypter->decryptString(val);
1979 2007 }
1980 2008 object = QPDFObjectHandle::newString(val);
... ... @@ -2168,6 +2196,18 @@ QPDFObjectHandle::parseInternal(PointerHolder&lt;InputSource&gt; input,
2168 2196 }
2169 2197 dict[key] = val;
2170 2198 }
  2199 + if (!contents_string.empty() &&
  2200 + dict.count("/Type") &&
  2201 + dict["/Type"].isName() &&
  2202 + dict["/Type"].getName() == "/Sig" &&
  2203 + dict.count("/ByteRange") &&
  2204 + dict.count("/Contents") &&
  2205 + dict["/Contents"].isString())
  2206 + {
  2207 + dict["/Contents"]
  2208 + = QPDFObjectHandle::newString(contents_string);
  2209 + dict["/Contents"].setParsedOffset(contents_offset);
  2210 + }
2171 2211 object = newDictionary(dict);
2172 2212 setObjectDescriptionFromInput(
2173 2213 object, context, object_description, input, offset);
... ... @@ -2190,6 +2230,8 @@ QPDFObjectHandle::parseInternal(PointerHolder&lt;InputSource&gt; input,
2190 2230 {
2191 2231 olist_stack.back().append(object);
2192 2232 }
  2233 + contents_string_stack.pop_back();
  2234 + contents_offset_stack.pop_back();
2193 2235 }
2194 2236 }
2195 2237  
... ...
libqpdf/QPDFWriter.cc
... ... @@ -1695,7 +1695,7 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
1695 1695 {
1696 1696 QTC::TC("qpdf", "QPDFWriter no encryption sig contents");
1697 1697 unparseChild(object.getKey(key), level + 1,
1698   - child_flags | f_hex_string);
  1698 + child_flags | f_hex_string | f_no_encryption);
1699 1699 }
1700 1700 else
1701 1701 {
... ... @@ -1866,6 +1866,7 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
1866 1866 std::string val;
1867 1867 if (this->m->encrypted &&
1868 1868 (! (flags & f_in_ostream)) &&
  1869 + (! (flags & f_no_encryption)) &&
1869 1870 (! this->m->cur_data_key.empty()))
1870 1871 {
1871 1872 val = object.getStringValue();
... ...
qpdf/qtest/qpdf.test
... ... @@ -3999,7 +3999,6 @@ show_ntests();
3999 3999 # ----------
4000 4000 $td->notify("--- Signature Dictionary ---");
4001 4001 $n_tests += 6;
4002   -
4003 4002 foreach my $i (qw(preserve disable generate))
4004 4003 {
4005 4004 $td->runtest("sig dict contents hex (object-streams=$i)",
... ... @@ -4017,6 +4016,88 @@ foreach my $i (qw(preserve disable generate))
4017 4016 $td->EXIT_STATUS => 0});
4018 4017 }
4019 4018  
  4019 +$n_tests += 4;
  4020 +foreach my $i (qw(preserve disable))
  4021 +{
  4022 + $td->runtest("non sig dict contents text string (object-streams=$i)",
  4023 + {$td->COMMAND =>
  4024 + "qpdf --object-streams=$i comment-annotation.pdf a.pdf"},
  4025 + {$td->STRING => "",
  4026 + $td->EXIT_STATUS => 0});
  4027 + $td->runtest("find desired contents as non hex (object-streams=$i)",
  4028 + {$td->COMMAND =>
  4029 + "grep \"/Contents (Salad)\" a.pdf"},
  4030 + {$td->REGEXP => ".*",
  4031 + $td->EXIT_STATUS => 0});
  4032 +}
  4033 +
  4034 +$n_tests += 2;
  4035 + $td->runtest("non sig dict contents text string (object-streams=generate)",
  4036 + {$td->COMMAND =>
  4037 + "qpdf --object-streams=generate comment-annotation.pdf a.pdf"},
  4038 + {$td->STRING => "",
  4039 + $td->EXIT_STATUS => 0});
  4040 + $td->runtest("plain text not found due to compression (object-streams=generate)",
  4041 + {$td->COMMAND =>
  4042 + "grep \"/Contents (Salad)\" a.pdf"},
  4043 + {$td->REGEXP => ".*",
  4044 + $td->EXIT_STATUS => 1});
  4045 +
  4046 +$n_tests += 12;
  4047 +foreach my $i (qw(40 128 256))
  4048 +{
  4049 + $td->runtest("encrypt $i",
  4050 + {$td->COMMAND =>
  4051 + "qpdf --encrypt '' o $i -- digitally-signed.pdf a.pdf"},
  4052 + {$td->STRING => "",
  4053 + $td->EXIT_STATUS => 0});
  4054 + $td->runtest("find desired contents (encrypt $i)",
  4055 + {$td->COMMAND =>
  4056 + "grep -f digitally-signed-sig-dict-contents.out a.pdf"},
  4057 + {$td->REGEXP => ".*",
  4058 + $td->EXIT_STATUS => 0});
  4059 + $td->runtest("decrypt",
  4060 + {$td->COMMAND =>
  4061 + "qpdf --decrypt a.pdf b.pdf"},
  4062 + {$td->REGEXP => ".*",
  4063 + $td->EXIT_STATUS => 0});
  4064 + $td->runtest("find desired contents (decrypt $i)",
  4065 + {$td->COMMAND =>
  4066 + "grep -f digitally-signed-sig-dict-contents.out b.pdf"},
  4067 + {$td->REGEXP => ".*",
  4068 + $td->EXIT_STATUS => 0});
  4069 +}
  4070 +
  4071 +$n_tests += 15;
  4072 +foreach my $i (qw(40 128 256))
  4073 +{
  4074 + $td->runtest("non sig dict encrypt $i",
  4075 + {$td->COMMAND =>
  4076 + "qpdf --encrypt '' o $i -- comment-annotation.pdf a.pdf"},
  4077 + {$td->STRING => "",
  4078 + $td->EXIT_STATUS => 0});
  4079 + $td->runtest("plain text not found due to encryption (non sig dict encrypt $i)",
  4080 + {$td->COMMAND =>
  4081 + "grep \"/Contents (Salad)\" a.pdf"},
  4082 + {$td->REGEXP => ".*",
  4083 + $td->EXIT_STATUS => 1});
  4084 + $td->runtest("find encrypted contents (non sig dict encrypt $i)",
  4085 + {$td->COMMAND =>
  4086 + "grep \"/Contents <.*>\" a.pdf"},
  4087 + {$td->REGEXP => ".*",
  4088 + $td->EXIT_STATUS => 0});
  4089 + $td->runtest("non sig dict decrypt",
  4090 + {$td->COMMAND =>
  4091 + "qpdf --decrypt a.pdf b.pdf"},
  4092 + {$td->REGEXP => ".*",
  4093 + $td->EXIT_STATUS => 0});
  4094 + $td->runtest("find desired contents (non sig dict decrypt $i)",
  4095 + {$td->COMMAND =>
  4096 + "grep \"/Contents (Salad)\" b.pdf"},
  4097 + {$td->REGEXP => ".*",
  4098 + $td->EXIT_STATUS => 0});
  4099 +}
  4100 +
4020 4101 show_ntests();
4021 4102 # ----------
4022 4103 $td->notify("--- Get XRef Table ---");
... ...