Commit 3d9bac43da5937235c962a53e68475f796c370aa

Authored by Jay Berkenbilt
1 parent f07284da

Add internal Pl_Base64

Bidirectional base64; will be used by JSON v2.
... ... @@ -45,7 +45,6 @@ notes from 5/2:
45 45 Need new pipelines:
46 46 * Pl_OStream(std::ostream) with semantics like Pl_StdioFile
47 47 * Pl_String to std::string with semantics like Pl_Buffer
48   -* Pl_Base64
49 48  
50 49 New Pipeline methods:
51 50 * writeString(std::string const&)
... ...
libqpdf/CMakeLists.txt
... ... @@ -35,6 +35,7 @@ set(libqpdf_SOURCES
35 35 Pl_AES_PDF.cc
36 36 Pl_ASCII85Decoder.cc
37 37 Pl_ASCIIHexDecoder.cc
  38 + Pl_Base64.cc
38 39 Pl_Buffer.cc
39 40 Pl_Concatenate.cc
40 41 Pl_Count.cc
... ...
libqpdf/Pl_Base64.cc 0 → 100644
  1 +#include <qpdf/Pl_Base64.hh>
  2 +
  3 +#include <qpdf/QIntC.hh>
  4 +#include <qpdf/QUtil.hh>
  5 +#include <algorithm>
  6 +#include <cstring>
  7 +#include <stdexcept>
  8 +
  9 +static char
  10 +to_c(unsigned int ch)
  11 +{
  12 + return static_cast<char>(ch);
  13 +}
  14 +
  15 +static unsigned char
  16 +to_uc(int ch)
  17 +{
  18 + return static_cast<unsigned char>(ch);
  19 +}
  20 +
  21 +static int
  22 +to_i(int i)
  23 +{
  24 + return static_cast<int>(i);
  25 +}
  26 +
  27 +Pl_Base64::Pl_Base64(char const* identifier, Pipeline* next, action_e action) :
  28 + Pipeline(identifier, next),
  29 + action(action),
  30 + pos(0),
  31 + end_of_data(false),
  32 + finished(false)
  33 +{
  34 + reset();
  35 +}
  36 +
  37 +void
  38 +Pl_Base64::write(unsigned char* data, size_t len)
  39 +{
  40 + if (finished) {
  41 + throw std::logic_error("Pl_Base64 used after finished");
  42 + }
  43 + if (this->action == a_decode) {
  44 + decode(data, len);
  45 + } else {
  46 + encode(data, len);
  47 + }
  48 +}
  49 +
  50 +void
  51 +Pl_Base64::decode(unsigned char* data, size_t len)
  52 +{
  53 + unsigned char* p = data;
  54 + while (len > 0) {
  55 + if (!QUtil::is_space(to_c(*p))) {
  56 + this->buf[this->pos++] = *p;
  57 + if (this->pos == 4) {
  58 + flush();
  59 + }
  60 + }
  61 + ++p;
  62 + --len;
  63 + }
  64 +}
  65 +
  66 +void
  67 +Pl_Base64::encode(unsigned char* data, size_t len)
  68 +{
  69 + unsigned char* p = data;
  70 + while (len > 0) {
  71 + this->buf[this->pos++] = *p;
  72 + if (this->pos == 3) {
  73 + flush();
  74 + }
  75 + ++p;
  76 + --len;
  77 + }
  78 +}
  79 +
  80 +void
  81 +Pl_Base64::flush()
  82 +{
  83 + if (this->action == a_decode) {
  84 + flush_decode();
  85 + } else {
  86 + flush_encode();
  87 + }
  88 + reset();
  89 +}
  90 +
  91 +void
  92 +Pl_Base64::flush_decode()
  93 +{
  94 + if (this->end_of_data) {
  95 + throw std::runtime_error(
  96 + getIdentifier() + ": base64 decode: data follows pad characters");
  97 + }
  98 + int pad = 0;
  99 + int shift = 18;
  100 + int outval = 0;
  101 + for (size_t i = 0; i < 4; ++i) {
  102 + int v = 0;
  103 + char ch = to_c(this->buf[i]);
  104 + if ((ch >= 'A') && (ch <= 'Z')) {
  105 + v = ch - 'A';
  106 + } else if ((ch >= 'a') && (ch <= 'z')) {
  107 + v = ch - 'a' + 26;
  108 + } else if ((ch >= '0') && (ch <= '9')) {
  109 + v = ch - '0' + 52;
  110 + } else if ((ch == '+') || (ch == '-')) {
  111 + v = 62;
  112 + } else if ((ch == '/') || (ch == '_')) {
  113 + v = 63;
  114 + } else if (
  115 + (ch == '=') && ((i == 3) || ((i == 2) && (this->buf[3] == '=')))) {
  116 + ++pad;
  117 + this->end_of_data = true;
  118 + v = 0;
  119 + } else {
  120 + throw std::runtime_error(
  121 + getIdentifier() + ": base64 decode: invalid input");
  122 + }
  123 + outval |= v << shift;
  124 + shift -= 6;
  125 + }
  126 + unsigned char out[3] = {
  127 + to_uc(outval >> 16),
  128 + to_uc(0xff & (outval >> 8)),
  129 + to_uc(0xff & outval),
  130 + };
  131 +
  132 + getNext()->write(out, QIntC::to_size(3 - pad));
  133 +}
  134 +
  135 +void
  136 +Pl_Base64::flush_encode()
  137 +{
  138 + int outval = ((this->buf[0] << 16) | (this->buf[1] << 8) | (this->buf[2]));
  139 + unsigned char out[4] = {
  140 + to_uc(outval >> 18),
  141 + to_uc(0x3f & (outval >> 12)),
  142 + to_uc(0x3f & (outval >> 6)),
  143 + to_uc(0x3f & outval),
  144 + };
  145 + for (size_t i = 0; i < 4; ++i) {
  146 + int ch = to_i(out[i]);
  147 + if (ch < 26) {
  148 + ch += 'A';
  149 + } else if (ch < 52) {
  150 + ch -= 26;
  151 + ch += 'a';
  152 + } else if (ch < 62) {
  153 + ch -= 52;
  154 + ch += '0';
  155 + } else if (ch == 62) {
  156 + ch = '+';
  157 + } else if (ch == 63) {
  158 + ch = '/';
  159 + }
  160 + out[i] = to_uc(ch);
  161 + }
  162 + for (size_t i = 0; i < 3 - this->pos; ++i) {
  163 + out[3 - i] = '=';
  164 + }
  165 + getNext()->write(out, 4);
  166 +}
  167 +
  168 +void
  169 +Pl_Base64::finish()
  170 +{
  171 + if (this->pos > 0) {
  172 + if (finished) {
  173 + throw std::logic_error("Pl_Base64 used after finished");
  174 + }
  175 + if (this->action == a_decode) {
  176 + for (size_t i = this->pos; i < 4; ++i) {
  177 + this->buf[i] = '=';
  178 + }
  179 + }
  180 + flush();
  181 + }
  182 + this->finished = true;
  183 + getNext()->finish();
  184 +}
  185 +
  186 +void
  187 +Pl_Base64::reset()
  188 +{
  189 + this->pos = 0;
  190 + memset(buf, 0, 4);
  191 +}
... ...
libqpdf/qpdf/Pl_Base64.hh 0 → 100644
  1 +#ifndef PL_BASE64_HH
  2 +#define PL_BASE64_HH
  3 +
  4 +#include <qpdf/Pipeline.hh>
  5 +
  6 +class Pl_Base64: public Pipeline
  7 +{
  8 + public:
  9 + enum action_e { a_encode, a_decode };
  10 + Pl_Base64(char const* identifier, Pipeline* next, action_e);
  11 + virtual ~Pl_Base64() = default;
  12 + virtual void write(unsigned char* buf, size_t len) override;
  13 + virtual void finish() override;
  14 +
  15 + private:
  16 + void decode(unsigned char* buf, size_t len);
  17 + void encode(unsigned char* buf, size_t len);
  18 + void flush();
  19 + void flush_decode();
  20 + void flush_encode();
  21 + void reset();
  22 +
  23 + action_e action;
  24 + unsigned char buf[4];
  25 + size_t pos;
  26 + bool end_of_data;
  27 + bool finished;
  28 +};
  29 +
  30 +#endif // PL_BASE64_HH
... ...
libtests/CMakeLists.txt
... ... @@ -3,6 +3,7 @@ set(TEST_PROGRAMS
3 3 aes
4 4 arg_parser
5 5 ascii85
  6 + base64
6 7 bits
7 8 buffer
8 9 closed_file_input_source
... ...
libtests/base64.cc 0 → 100644
  1 +#include <qpdf/Pl_Base64.hh>
  2 +
  3 +#include <qpdf/Pl_StdioFile.hh>
  4 +#include <qpdf/QUtil.hh>
  5 +#include <cassert>
  6 +#include <cstdlib>
  7 +#include <cstring>
  8 +#include <iostream>
  9 +#include <stdexcept>
  10 +
  11 +static bool
  12 +write_some(FILE* f, size_t bytes, Pipeline* p)
  13 +{
  14 + unsigned char buf[1000];
  15 + assert(bytes <= sizeof(buf));
  16 + size_t len = fread(buf, 1, bytes, f);
  17 + if (len > 0) {
  18 + p->write(buf, len);
  19 + }
  20 + if (len < bytes) {
  21 + if (ferror(f)) {
  22 + std::cerr << "error reading file" << std::endl;
  23 + exit(2);
  24 + }
  25 + p->finish();
  26 + return false;
  27 + }
  28 + return (len == bytes);
  29 +}
  30 +
  31 +static void
  32 +usage()
  33 +{
  34 + std::cerr << "Usage: base64 encode|decode" << std::endl;
  35 + exit(2);
  36 +}
  37 +
  38 +int
  39 +main(int argc, char* argv[])
  40 +{
  41 + if (argc != 2) {
  42 + usage();
  43 + }
  44 + QUtil::binary_stdout();
  45 + QUtil::binary_stdin();
  46 + Pl_Base64::action_e action = Pl_Base64::a_decode;
  47 + if (strcmp(argv[1], "encode") == 0) {
  48 + action = Pl_Base64::a_encode;
  49 + } else if (strcmp(argv[1], "decode") != 0) {
  50 + usage();
  51 + }
  52 +
  53 + try {
  54 + Pl_StdioFile out("stdout", stdout);
  55 + Pl_Base64 decode("decode", &out, action);
  56 + // The comments are "n: n%4 n%3", where n is the number of
  57 + // bytes read at the end of the call, and are there to
  58 + // indicate that we are reading in chunks that exercise
  59 + // various boundary conditions around subsequent writes and
  60 + // the state of buf and pos. There are some writes that don't
  61 + // do flush at all, some that call flush multiple times, and
  62 + // some that start in the middle and do flush, and this is
  63 + // true for both encode and decode.
  64 + if (write_some(stdin, 1, &decode) && // 1: 1 1
  65 + write_some(stdin, 4, &decode) && // 5: 1 2
  66 + write_some(stdin, 2, &decode) && // 7: 3 1
  67 + write_some(stdin, 2, &decode) && // 9: 1 0
  68 + write_some(stdin, 7, &decode) && // 16: 0 1
  69 + write_some(stdin, 1, &decode) && // 17: 1 2
  70 + write_some(stdin, 9, &decode) && // 26: 2 2
  71 + write_some(stdin, 2, &decode)) { // 28: 0 1
  72 + while (write_some(stdin, 1000, &decode)) {
  73 + }
  74 + }
  75 + } catch (std::exception& e) {
  76 + std::cout << "exception: " << e.what() << std::endl;
  77 + exit(2);
  78 + }
  79 +
  80 + return 0;
  81 +}
... ...
libtests/qtest/base64.test 0 → 100644
  1 +#!/usr/bin/env perl
  2 +require 5.008;
  3 +use warnings;
  4 +use strict;
  5 +
  6 +chdir("base64") or die "chdir testdir failed: $!\n";
  7 +
  8 +require TestDriver;
  9 +
  10 +my $td = new TestDriver('base64');
  11 +
  12 +cleanup();
  13 +
  14 +# ** Do not use normalize newlines on these tests. **
  15 +
  16 +my $n = 5;
  17 +for (my $i = 1; $i <= $n; ++$i)
  18 +{
  19 + $td->runtest("encode $i",
  20 + {$td->COMMAND => "base64 encode < $i.dec"},
  21 + {$td->FILE => "$i.enc", $td->EXIT_STATUS => 0});
  22 + $td->runtest("code $i",
  23 + {$td->COMMAND => "base64 decode < $i.enc"},
  24 + {$td->FILE => "$i.dec", $td->EXIT_STATUS => 0});
  25 +}
  26 +
  27 +$td->runtest("non-zero discard bits",
  28 + {$td->COMMAND => "echo c2FsYWR= | base64 decode"},
  29 + {$td->STRING => "salad", $td->EXIT_STATUS => 0});
  30 +$td->runtest("write with +/",
  31 + {$td->COMMAND => "echo +/== | base64 decode > a"},
  32 + {$td->STRING => "", $td->EXIT_STATUS => 0});
  33 +$td->runtest("write with -_",
  34 + {$td->COMMAND => "echo -_== | base64 decode > b"},
  35 + {$td->STRING => "", $td->EXIT_STATUS => 0});
  36 +$td->runtest("interchangeability of +/ and -_",
  37 + {$td->FILE => "a"},
  38 + {$td->FILE => "b"});
  39 +
  40 +$td->runtest("invalid characters",
  41 + {$td->COMMAND => "echo aaaaa! | base64 decode"},
  42 + {$td->REGEXP => ".*invalid input.*", $td->EXIT_STATUS => 2});
  43 +$td->runtest("invalid pad",
  44 + {$td->COMMAND => "echo a= | base64 decode"},
  45 + {$td->REGEXP => ".*invalid input.*", $td->EXIT_STATUS => 2});
  46 +$td->runtest("data after pad",
  47 + {$td->COMMAND => "echo aa==potato | base64 decode"},
  48 + {$td->REGEXP => ".*data follows pad characters.*",
  49 + $td->EXIT_STATUS => 2});
  50 +
  51 +cleanup();
  52 +
  53 +$td->report(7 + (2 * $n));
  54 +
  55 +sub cleanup
  56 +{
  57 + unlink('a', 'b');
  58 +}
... ...
libtests/qtest/base64/1.dec 0 → 100644
  1 +123
0 2 \ No newline at end of file
... ...
libtests/qtest/base64/1.enc 0 → 100644
  1 +MTIz
0 2 \ No newline at end of file
... ...
libtests/qtest/base64/2.dec 0 → 100644
  1 +1234
0 2 \ No newline at end of file
... ...
libtests/qtest/base64/2.enc 0 → 100644
  1 +MTIzNA==
0 2 \ No newline at end of file
... ...
libtests/qtest/base64/3.dec 0 → 100644
  1 +This file has a multiple of four bytes and is longer than four bytes...
... ...
libtests/qtest/base64/3.enc 0 → 100644
  1 +VGhpcyBmaWxlIGhhcyBhIG11bHRpcGxlIG9mIGZvdXIgYnl0ZXMgYW5kIGlzIGxvbmdlciB0aGFuIGZvdXIgYnl0ZXMuLi4K
0 2 \ No newline at end of file
... ...
libtests/qtest/base64/4.dec 0 → 100644
  1 +This file has a non-multiple of four bytes and is longer than four bytes.
... ...
libtests/qtest/base64/4.enc 0 → 100644
  1 +VGhpcyBmaWxlIGhhcyBhIG5vbi1tdWx0aXBsZSBvZiBmb3VyIGJ5dGVzIGFuZCBpcyBsb25nZXIgdGhhbiBmb3VyIGJ5dGVzLgo=
0 2 \ No newline at end of file
... ...
libtests/qtest/base64/5.dec 0 → 100644
No preview for this file type
libtests/qtest/base64/5.enc 0 → 100644
  1 +VGhpcyBmaWxlIGFzIG9uZSBvZiBldmVyeSBieXRlIGluIGl0LiBOb3QgdGhhdCBpdCByZWFsbHkgbWFrZXMgYW55CmRpZmZlcmVuY2UsIGJ1dCB3aHkgbm90LgoKAAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fn+AgYKDhIWGh4iJiouMjY6PkJGSk5SVlpeYmZqbnJ2en6ChoqOkpaanqKmqq6ytrq+wsbKztLW2t7i5uru8vb6/wMHCw8TFxsfIycrLzM3Oz9DR0tPU1dbX2Nna29zd3t/g4eLj5OXm5+jp6uvs7e7v8PHy8/T19vf4+fr7/P3+/wo=
0 2 \ No newline at end of file
... ...