Commit 27e8d4bbfffef1072043ef21725ab85eabaee63b

Authored by Jay Berkenbilt
1 parent 55e40037

tweak when we decide to use hex strings vs literal strings

git-svn-id: svn+q:///qpdf/trunk@810 71b93d88-0707-0410-a8cf-f5a4172ac649
libqpdf/QPDF_String.cc
@@ -9,6 +9,10 @@ @@ -9,6 +9,10 @@
9 #include <string.h> 9 #include <string.h>
10 10
11 // See above about ctype. 11 // See above about ctype.
  12 +static bool is_ascii_printable(unsigned char ch)
  13 +{
  14 + return ((ch >= 32) && (ch <= 126));
  15 +}
12 static bool is_iso_latin1_printable(unsigned char ch) 16 static bool is_iso_latin1_printable(unsigned char ch)
13 { 17 {
14 return (((ch >= 32) && (ch <= 126)) || (ch >= 160)); 18 return (((ch >= 32) && (ch <= 126)) || (ch >= 160));
@@ -40,12 +44,13 @@ QPDF_String::unparse(bool force_binary) @@ -40,12 +44,13 @@ QPDF_String::unparse(bool force_binary)
40 for (unsigned int i = 0; i < this->val.length(); ++i) 44 for (unsigned int i = 0; i < this->val.length(); ++i)
41 { 45 {
42 char ch = this->val[i]; 46 char ch = this->val[i];
43 - // Note: do not use locale to determine printability. The PDF  
44 - // specification accepts arbitrary binary data. Some locales  
45 - // imply multibyte characters. We'll consider something  
46 - // printable if it is printable in ISO-Latin-1. We'll code  
47 - // this manually rather than being rude and setting locale.  
48 - if ((ch == 0) || (! (is_iso_latin1_printable(ch) || 47 + // Note: do not use locale to determine printability. The
  48 + // PDF specification accepts arbitrary binary data. Some
  49 + // locales imply multibyte characters. We'll consider
  50 + // something printable if it is printable in 7-bit ASCII.
  51 + // We'll code this manually rather than being rude and
  52 + // setting locale.
  53 + if ((ch == 0) || (! (is_ascii_printable(ch) ||
49 strchr("\n\r\t\b\f", ch)))) 54 strchr("\n\r\t\b\f", ch))))
50 { 55 {
51 ++nonprintable; 56 ++nonprintable;
@@ -64,10 +69,7 @@ QPDF_String::unparse(bool force_binary) @@ -64,10 +69,7 @@ QPDF_String::unparse(bool force_binary)
64 } 69 }
65 70
66 // Use hex notation if more than 20% of the characters are not 71 // Use hex notation if more than 20% of the characters are not
67 - // printable in the current locale. Uniformly distributed random  
68 - // characters will not pass this test even with ISO-Latin-1 in  
69 - // which 76% are either printable or in the set of standard  
70 - // escaped characters. 72 + // printable in plain ASCII.
71 if (5 * nonprintable > val.length()) 73 if (5 * nonprintable > val.length())
72 { 74 {
73 use_hexstring = true; 75 use_hexstring = true;
qpdf/qtest/qpdf/encrypted1.out
No preview for this file type
qpdf/qtest/qpdf/good13.out
@@ -5,5 +5,5 @@ @@ -5,5 +5,5 @@
5 /nesting is direct 5 /nesting is direct
6 /strings is direct 6 /strings is direct
7 unparse: 7 0 R 7 unparse: 7 0 R
8 -unparseResolved: << /hex#20strings [ (Potato) <01020300040560> (AB) ] /indirect 8 0 R /nesting << /a [ 1 2 << /x (y) >> [ (z) ] ] /b << / (legal) /a [ 1 2 ] >> >> /strings [ (one) ($¢) () (\(\)) (\() (\)) (a\f\b\t\r\nb) <410042> (a\nb) (a b) ] >> 8 +unparseResolved: << /hex#20strings [ (Potato) <01020300040560> (AB) ] /indirect 8 0 R /nesting << /a [ 1 2 << /x (y) >> [ (z) ] ] /b << / (legal) /a [ 1 2 ] >> >> /strings [ (one) <24a2> () (\(\)) (\() (\)) (a\f\b\t\r\nb) <410042> (a\nb) (a b) ] >>
9 test 1 done 9 test 1 done
qpdf/qtest/qpdf/good13.qdf
@@ -40,7 +40,7 @@ endobj @@ -40,7 +40,7 @@ endobj
40 >> 40 >>
41 /strings [ 41 /strings [
42 (one) 42 (one)
43 - ($¢) 43 + <24a2>
44 () 44 ()
45 (\(\)) 45 (\(\))
46 (\() 46 (\()
@@ -129,12 +129,12 @@ xref @@ -129,12 +129,12 @@ xref
129 0000000000 65535 f 129 0000000000 65535 f
130 0000000052 00000 n 130 0000000052 00000 n
131 0000000133 00000 n 131 0000000133 00000 n
132 -0000000576 00000 n  
133 -0000000685 00000 n  
134 -0000000927 00000 n  
135 -0000001026 00000 n  
136 -0000001072 00000 n  
137 -0000001217 00000 n 132 +0000000578 00000 n
  133 +0000000687 00000 n
  134 +0000000929 00000 n
  135 +0000001028 00000 n
  136 +0000001074 00000 n
  137 +0000001219 00000 n
138 trailer << 138 trailer <<
139 /QTest 2 0 R 139 /QTest 2 0 R
140 /Root 1 0 R 140 /Root 1 0 R
@@ -142,5 +142,5 @@ trailer &lt;&lt; @@ -142,5 +142,5 @@ trailer &lt;&lt;
142 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] 142 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>]
143 >> 143 >>
144 startxref 144 startxref
145 -1252 145 +1254
146 %%EOF 146 %%EOF
qpdf/qtest/qpdf/good14.out
@@ -12,7 +12,7 @@ three lines @@ -12,7 +12,7 @@ three lines
12 (\001B%DEF)<01> 12 (\001B%DEF)<01>
13 <8a8b> 13 <8a8b>
14 (ab) 14 (ab)
15 -<8c>(Ý) ) > 15 +<8c><dd> ) >
16 <610062> (MOO) 16 <610062> (MOO)
17 -- stream 1 -- 17 -- stream 1 --
18 This stream does end with a newline. 18 This stream does end with a newline.
qpdf/qtest/qpdf/good9.qdf
@@ -95,7 +95,7 @@ trailer &lt;&lt; @@ -95,7 +95,7 @@ trailer &lt;&lt;
95 /QTest (¡Hola!) 95 /QTest (¡Hola!)
96 /Root 1 0 R 96 /Root 1 0 R
97 /Size 8 97 /Size 8
98 - /Z ('\236yÔ\005\037ãjø×¸B^Q\n\2139\224\rD|kã¢JZm:½l\231\002\\) 98 + /Z <279e79d4051fe36af8d7b8425e510a8b39940d447c6be3a24a5a6d3abd6c99025c>
99 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] 99 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>]
100 >> 100 >>
101 startxref 101 startxref
qpdf/qtest/qpdf/show-xref-by-id.out
1 Object is stream. Dictionary: 1 Object is stream. Dictionary:
2 -<< /DecodeParms << /Columns 4 /Predictor 12 >> /Encrypt 11 0 R /Filter /FlateDecode /ID [ (ª&\237þÂ\226±?>ø5ª¡:\n\b) (ím\023\021OµUt\bµ\026Ê9'\025\033) ] /Info 3 0 R /Length 52 /Root 1 0 R /Size 13 /Type /XRef /W [ 1 2 1 ] >> 2 +<< /DecodeParms << /Columns 4 /Predictor 12 >> /Encrypt 11 0 R /Filter /FlateDecode /ID [ <aa269ffec296b13f3ef835aaa13a0a08> <ed6d13114fb5557408b516ca3927151b> ] /Info 3 0 R /Length 52 /Root 1 0 R /Size 13 /Type /XRef /W [ 1 2 1 ] >>