Commit b670565abc579de5bda946b7538545aa967e6cd2

Authored by Jay Berkenbilt
1 parent 4400ce84

Convert scientific notation in JSON to fixed point (fixes #1079)

JSON accepts scientific notation, but PDF doesn't.
ChangeLog
1 2023-12-21 Jay Berkenbilt <ejb@ql.org> 1 2023-12-21 Jay Berkenbilt <ejb@ql.org>
2 2
  3 + * Fix to QPDF JSON: a floating point number that appears in
  4 + scientific notation will be converted to fixed-point notation,
  5 + rounded to six digits after the decimal point. Fixes #1079.
  6 +
3 * Fix to QPDF JSON: the syntax "n:/pdf-syntax" is now accepted as 7 * Fix to QPDF JSON: the syntax "n:/pdf-syntax" is now accepted as
4 an alternative way to represent names. This can be used for any 8 an alternative way to represent names. This can be used for any
5 name (e.g. "n:/text#2fplain"), but it is necessary when the name 9 name (e.g. "n:/text#2fplain"), but it is necessary when the name
libqpdf/QPDF_json.cc
@@ -732,6 +732,15 @@ QPDF::JSONReactor::makeObject(JSON const&amp; value) @@ -732,6 +732,15 @@ QPDF::JSONReactor::makeObject(JSON const&amp; value)
732 if (QUtil::is_long_long(str_v.c_str())) { 732 if (QUtil::is_long_long(str_v.c_str())) {
733 result = QPDFObjectHandle::newInteger(QUtil::string_to_ll(str_v.c_str())); 733 result = QPDFObjectHandle::newInteger(QUtil::string_to_ll(str_v.c_str()));
734 } else { 734 } else {
  735 + // JSON allows scientific notation, but PDF does not.
  736 + if (str_v.find('e') != std::string::npos || str_v.find('E') != std::string::npos) {
  737 + try {
  738 + auto v = std::stod(str_v);
  739 + str_v = QUtil::double_to_string(v);
  740 + } catch (std::exception&) {
  741 + // Keep it as it was
  742 + }
  743 + }
735 result = QPDFObjectHandle::newReal(str_v); 744 result = QPDFObjectHandle::newReal(str_v);
736 } 745 }
737 } else if (value.getString(str_v)) { 746 } else if (value.getString(str_v)) {
manual/release-notes.rst
@@ -52,6 +52,10 @@ Planned changes for future 12.x (subject to change): @@ -52,6 +52,10 @@ Planned changes for future 12.x (subject to change):
52 must be represented as ``"n:/one#a0two"`` since the single byte 52 must be represented as ``"n:/one#a0two"`` since the single byte
53 ``a0`` is not valid in JSON. 53 ``a0`` is not valid in JSON.
54 54
  55 + - QPDF JSON will convert floating numbers that appear in the JSON
  56 + in scientific notation to fixed-point notation since PDF doesn't
  57 + accept scientific notation.
  58 +
55 - Build Enhancements: 59 - Build Enhancements:
56 60
57 - The qpdf test suite now passes when qpdf is linked with an 61 - The qpdf test suite now passes when qpdf is linked with an
qpdf/qtest/qpdf-json.test
@@ -347,16 +347,27 @@ $td-&gt;runtest(&quot;check C API write to JSON stream&quot;, @@ -347,16 +347,27 @@ $td-&gt;runtest(&quot;check C API write to JSON stream&quot;,
347 # (using #xx) would generate invalid JSON, even though qpdf's own JSON 347 # (using #xx) would generate invalid JSON, even though qpdf's own JSON
348 # parser would accept it. Also, the JSON spec allows real numbers in 348 # parser would accept it. Also, the JSON spec allows real numbers in
349 # scientific notation, but the PDF spec does not. 349 # scientific notation, but the PDF spec does not.
350 -$n_tests += 2; 350 +$n_tests += 4;
351 $td->runtest("handle binary names", 351 $td->runtest("handle binary names",
352 {$td->COMMAND => 352 {$td->COMMAND =>
353 "qpdf --json-output weird-tokens.pdf a.json"}, 353 "qpdf --json-output weird-tokens.pdf a.json"},
354 {$td->STRING => "", $td->EXIT_STATUS => 0}); 354 {$td->STRING => "", $td->EXIT_STATUS => 0});
355 -# Round-trip is tested above. 355 +# Round-trip back to PDF is tested above.
356 $td->runtest("check json", 356 $td->runtest("check json",
357 {$td->FILE => "a.json"}, 357 {$td->FILE => "a.json"},
358 {$td->FILE => "weird-tokens.json"}, 358 {$td->FILE => "weird-tokens.json"},
359 $td->NORMALIZE_NEWLINES); 359 $td->NORMALIZE_NEWLINES);
  360 +# Make sure we can properly handle JSON with scientific notation.
  361 +$td->runtest("weird tokens round trip json",
  362 + {$td->COMMAND =>
  363 + "qpdf --json-input --json-output weird-tokens.json -"},
  364 + {$td->FILE => "weird-tokens.json", $td->EXIT_STATUS => 0},
  365 + $td->NORMALIZE_NEWLINES);
  366 +$td->runtest("weird tokens with scientific notation",
  367 + {$td->COMMAND =>
  368 + "qpdf --json-input --json-output weird-tokens-alt.json -"},
  369 + {$td->FILE => "weird-tokens.json", $td->EXIT_STATUS => 0},
  370 + $td->NORMALIZE_NEWLINES);
360 371
361 cleanup(); 372 cleanup();
362 $td->report($n_tests); 373 $td->report($n_tests);
qpdf/qtest/qpdf/weird-tokens-alt.json 0 → 100644
  1 +{
  2 + "qpdf": [
  3 + {
  4 + "jsonversion": 2,
  5 + "pdfversion": "2.0",
  6 + "pushedinheritedpageresources": false,
  7 + "calledgetallpages": false,
  8 + "maxobjectid": 6
  9 + },
  10 + {
  11 + "obj:1 0 R": {
  12 + "value": {
  13 + "/Extra": [
  14 + "u:Names with binary data",
  15 + "n:/ABCDEF+#ba#da#cc#e5",
  16 + "/ABCEDEF+π",
  17 + "n:/one+#a0two",
  18 + "n:/text#2fplain",
  19 + "u:Very small/large reals",
  20 + 1e-05,
  21 + 1e12
  22 + ],
  23 + "/Pages": "2 0 R",
  24 + "/Type": "/Catalog"
  25 + }
  26 + },
  27 + "obj:2 0 R": {
  28 + "value": {
  29 + "/Count": 1,
  30 + "/Kids": [
  31 + "3 0 R"
  32 + ],
  33 + "/Type": "/Pages"
  34 + }
  35 + },
  36 + "obj:3 0 R": {
  37 + "value": {
  38 + "/Contents": "4 0 R",
  39 + "/MediaBox": [
  40 + 0,
  41 + 0,
  42 + 612,
  43 + 792
  44 + ],
  45 + "/Parent": "2 0 R",
  46 + "/Resources": {
  47 + "/Font": {
  48 + "/F1": "6 0 R"
  49 + }
  50 + },
  51 + "/Type": "/Page"
  52 + }
  53 + },
  54 + "obj:4 0 R": {
  55 + "stream": {
  56 + "data": "QlQKICAvRjEgMjQgVGYKICA3MiA3MjAgVGQKICAoUG90YXRvKSBUagpFVAo=",
  57 + "dict": {}
  58 + }
  59 + },
  60 + "obj:5 0 R": {
  61 + "value": 44
  62 + },
  63 + "obj:6 0 R": {
  64 + "value": {
  65 + "/BaseFont": "/Helvetica",
  66 + "/Encoding": "/WinAnsiEncoding",
  67 + "/Subtype": "/Type1",
  68 + "/Type": "/Font"
  69 + }
  70 + },
  71 + "trailer": {
  72 + "value": {
  73 + "/ID": [
  74 + "b:42841c13bbf709d79a200fa1691836f8",
  75 + "b:728c020f464c3cf7e02c12605fa7d88b"
  76 + ],
  77 + "/Root": "1 0 R",
  78 + "/Size": 7
  79 + }
  80 + }
  81 + }
  82 + ]
  83 +}