Commit 3d9bac43da5937235c962a53e68475f796c370aa
1 parent
f07284da
Add internal Pl_Base64
Bidirectional base64; will be used by JSON v2.
Showing
17 changed files
with
371 additions
and
1 deletions
TODO
libqpdf/CMakeLists.txt
libqpdf/Pl_Base64.cc
0 → 100644
| 1 | +#include <qpdf/Pl_Base64.hh> | |
| 2 | + | |
| 3 | +#include <qpdf/QIntC.hh> | |
| 4 | +#include <qpdf/QUtil.hh> | |
| 5 | +#include <algorithm> | |
| 6 | +#include <cstring> | |
| 7 | +#include <stdexcept> | |
| 8 | + | |
| 9 | +static char | |
| 10 | +to_c(unsigned int ch) | |
| 11 | +{ | |
| 12 | + return static_cast<char>(ch); | |
| 13 | +} | |
| 14 | + | |
| 15 | +static unsigned char | |
| 16 | +to_uc(int ch) | |
| 17 | +{ | |
| 18 | + return static_cast<unsigned char>(ch); | |
| 19 | +} | |
| 20 | + | |
| 21 | +static int | |
| 22 | +to_i(int i) | |
| 23 | +{ | |
| 24 | + return static_cast<int>(i); | |
| 25 | +} | |
| 26 | + | |
| 27 | +Pl_Base64::Pl_Base64(char const* identifier, Pipeline* next, action_e action) : | |
| 28 | + Pipeline(identifier, next), | |
| 29 | + action(action), | |
| 30 | + pos(0), | |
| 31 | + end_of_data(false), | |
| 32 | + finished(false) | |
| 33 | +{ | |
| 34 | + reset(); | |
| 35 | +} | |
| 36 | + | |
| 37 | +void | |
| 38 | +Pl_Base64::write(unsigned char* data, size_t len) | |
| 39 | +{ | |
| 40 | + if (finished) { | |
| 41 | + throw std::logic_error("Pl_Base64 used after finished"); | |
| 42 | + } | |
| 43 | + if (this->action == a_decode) { | |
| 44 | + decode(data, len); | |
| 45 | + } else { | |
| 46 | + encode(data, len); | |
| 47 | + } | |
| 48 | +} | |
| 49 | + | |
| 50 | +void | |
| 51 | +Pl_Base64::decode(unsigned char* data, size_t len) | |
| 52 | +{ | |
| 53 | + unsigned char* p = data; | |
| 54 | + while (len > 0) { | |
| 55 | + if (!QUtil::is_space(to_c(*p))) { | |
| 56 | + this->buf[this->pos++] = *p; | |
| 57 | + if (this->pos == 4) { | |
| 58 | + flush(); | |
| 59 | + } | |
| 60 | + } | |
| 61 | + ++p; | |
| 62 | + --len; | |
| 63 | + } | |
| 64 | +} | |
| 65 | + | |
| 66 | +void | |
| 67 | +Pl_Base64::encode(unsigned char* data, size_t len) | |
| 68 | +{ | |
| 69 | + unsigned char* p = data; | |
| 70 | + while (len > 0) { | |
| 71 | + this->buf[this->pos++] = *p; | |
| 72 | + if (this->pos == 3) { | |
| 73 | + flush(); | |
| 74 | + } | |
| 75 | + ++p; | |
| 76 | + --len; | |
| 77 | + } | |
| 78 | +} | |
| 79 | + | |
| 80 | +void | |
| 81 | +Pl_Base64::flush() | |
| 82 | +{ | |
| 83 | + if (this->action == a_decode) { | |
| 84 | + flush_decode(); | |
| 85 | + } else { | |
| 86 | + flush_encode(); | |
| 87 | + } | |
| 88 | + reset(); | |
| 89 | +} | |
| 90 | + | |
| 91 | +void | |
| 92 | +Pl_Base64::flush_decode() | |
| 93 | +{ | |
| 94 | + if (this->end_of_data) { | |
| 95 | + throw std::runtime_error( | |
| 96 | + getIdentifier() + ": base64 decode: data follows pad characters"); | |
| 97 | + } | |
| 98 | + int pad = 0; | |
| 99 | + int shift = 18; | |
| 100 | + int outval = 0; | |
| 101 | + for (size_t i = 0; i < 4; ++i) { | |
| 102 | + int v = 0; | |
| 103 | + char ch = to_c(this->buf[i]); | |
| 104 | + if ((ch >= 'A') && (ch <= 'Z')) { | |
| 105 | + v = ch - 'A'; | |
| 106 | + } else if ((ch >= 'a') && (ch <= 'z')) { | |
| 107 | + v = ch - 'a' + 26; | |
| 108 | + } else if ((ch >= '0') && (ch <= '9')) { | |
| 109 | + v = ch - '0' + 52; | |
| 110 | + } else if ((ch == '+') || (ch == '-')) { | |
| 111 | + v = 62; | |
| 112 | + } else if ((ch == '/') || (ch == '_')) { | |
| 113 | + v = 63; | |
| 114 | + } else if ( | |
| 115 | + (ch == '=') && ((i == 3) || ((i == 2) && (this->buf[3] == '=')))) { | |
| 116 | + ++pad; | |
| 117 | + this->end_of_data = true; | |
| 118 | + v = 0; | |
| 119 | + } else { | |
| 120 | + throw std::runtime_error( | |
| 121 | + getIdentifier() + ": base64 decode: invalid input"); | |
| 122 | + } | |
| 123 | + outval |= v << shift; | |
| 124 | + shift -= 6; | |
| 125 | + } | |
| 126 | + unsigned char out[3] = { | |
| 127 | + to_uc(outval >> 16), | |
| 128 | + to_uc(0xff & (outval >> 8)), | |
| 129 | + to_uc(0xff & outval), | |
| 130 | + }; | |
| 131 | + | |
| 132 | + getNext()->write(out, QIntC::to_size(3 - pad)); | |
| 133 | +} | |
| 134 | + | |
| 135 | +void | |
| 136 | +Pl_Base64::flush_encode() | |
| 137 | +{ | |
| 138 | + int outval = ((this->buf[0] << 16) | (this->buf[1] << 8) | (this->buf[2])); | |
| 139 | + unsigned char out[4] = { | |
| 140 | + to_uc(outval >> 18), | |
| 141 | + to_uc(0x3f & (outval >> 12)), | |
| 142 | + to_uc(0x3f & (outval >> 6)), | |
| 143 | + to_uc(0x3f & outval), | |
| 144 | + }; | |
| 145 | + for (size_t i = 0; i < 4; ++i) { | |
| 146 | + int ch = to_i(out[i]); | |
| 147 | + if (ch < 26) { | |
| 148 | + ch += 'A'; | |
| 149 | + } else if (ch < 52) { | |
| 150 | + ch -= 26; | |
| 151 | + ch += 'a'; | |
| 152 | + } else if (ch < 62) { | |
| 153 | + ch -= 52; | |
| 154 | + ch += '0'; | |
| 155 | + } else if (ch == 62) { | |
| 156 | + ch = '+'; | |
| 157 | + } else if (ch == 63) { | |
| 158 | + ch = '/'; | |
| 159 | + } | |
| 160 | + out[i] = to_uc(ch); | |
| 161 | + } | |
| 162 | + for (size_t i = 0; i < 3 - this->pos; ++i) { | |
| 163 | + out[3 - i] = '='; | |
| 164 | + } | |
| 165 | + getNext()->write(out, 4); | |
| 166 | +} | |
| 167 | + | |
| 168 | +void | |
| 169 | +Pl_Base64::finish() | |
| 170 | +{ | |
| 171 | + if (this->pos > 0) { | |
| 172 | + if (finished) { | |
| 173 | + throw std::logic_error("Pl_Base64 used after finished"); | |
| 174 | + } | |
| 175 | + if (this->action == a_decode) { | |
| 176 | + for (size_t i = this->pos; i < 4; ++i) { | |
| 177 | + this->buf[i] = '='; | |
| 178 | + } | |
| 179 | + } | |
| 180 | + flush(); | |
| 181 | + } | |
| 182 | + this->finished = true; | |
| 183 | + getNext()->finish(); | |
| 184 | +} | |
| 185 | + | |
| 186 | +void | |
| 187 | +Pl_Base64::reset() | |
| 188 | +{ | |
| 189 | + this->pos = 0; | |
| 190 | + memset(buf, 0, 4); | |
| 191 | +} | ... | ... |
libqpdf/qpdf/Pl_Base64.hh
0 → 100644
| 1 | +#ifndef PL_BASE64_HH | |
| 2 | +#define PL_BASE64_HH | |
| 3 | + | |
| 4 | +#include <qpdf/Pipeline.hh> | |
| 5 | + | |
| 6 | +class Pl_Base64: public Pipeline | |
| 7 | +{ | |
| 8 | + public: | |
| 9 | + enum action_e { a_encode, a_decode }; | |
| 10 | + Pl_Base64(char const* identifier, Pipeline* next, action_e); | |
| 11 | + virtual ~Pl_Base64() = default; | |
| 12 | + virtual void write(unsigned char* buf, size_t len) override; | |
| 13 | + virtual void finish() override; | |
| 14 | + | |
| 15 | + private: | |
| 16 | + void decode(unsigned char* buf, size_t len); | |
| 17 | + void encode(unsigned char* buf, size_t len); | |
| 18 | + void flush(); | |
| 19 | + void flush_decode(); | |
| 20 | + void flush_encode(); | |
| 21 | + void reset(); | |
| 22 | + | |
| 23 | + action_e action; | |
| 24 | + unsigned char buf[4]; | |
| 25 | + size_t pos; | |
| 26 | + bool end_of_data; | |
| 27 | + bool finished; | |
| 28 | +}; | |
| 29 | + | |
| 30 | +#endif // PL_BASE64_HH | ... | ... |
libtests/CMakeLists.txt
libtests/base64.cc
0 → 100644
| 1 | +#include <qpdf/Pl_Base64.hh> | |
| 2 | + | |
| 3 | +#include <qpdf/Pl_StdioFile.hh> | |
| 4 | +#include <qpdf/QUtil.hh> | |
| 5 | +#include <cassert> | |
| 6 | +#include <cstdlib> | |
| 7 | +#include <cstring> | |
| 8 | +#include <iostream> | |
| 9 | +#include <stdexcept> | |
| 10 | + | |
| 11 | +static bool | |
| 12 | +write_some(FILE* f, size_t bytes, Pipeline* p) | |
| 13 | +{ | |
| 14 | + unsigned char buf[1000]; | |
| 15 | + assert(bytes <= sizeof(buf)); | |
| 16 | + size_t len = fread(buf, 1, bytes, f); | |
| 17 | + if (len > 0) { | |
| 18 | + p->write(buf, len); | |
| 19 | + } | |
| 20 | + if (len < bytes) { | |
| 21 | + if (ferror(f)) { | |
| 22 | + std::cerr << "error reading file" << std::endl; | |
| 23 | + exit(2); | |
| 24 | + } | |
| 25 | + p->finish(); | |
| 26 | + return false; | |
| 27 | + } | |
| 28 | + return (len == bytes); | |
| 29 | +} | |
| 30 | + | |
| 31 | +static void | |
| 32 | +usage() | |
| 33 | +{ | |
| 34 | + std::cerr << "Usage: base64 encode|decode" << std::endl; | |
| 35 | + exit(2); | |
| 36 | +} | |
| 37 | + | |
| 38 | +int | |
| 39 | +main(int argc, char* argv[]) | |
| 40 | +{ | |
| 41 | + if (argc != 2) { | |
| 42 | + usage(); | |
| 43 | + } | |
| 44 | + QUtil::binary_stdout(); | |
| 45 | + QUtil::binary_stdin(); | |
| 46 | + Pl_Base64::action_e action = Pl_Base64::a_decode; | |
| 47 | + if (strcmp(argv[1], "encode") == 0) { | |
| 48 | + action = Pl_Base64::a_encode; | |
| 49 | + } else if (strcmp(argv[1], "decode") != 0) { | |
| 50 | + usage(); | |
| 51 | + } | |
| 52 | + | |
| 53 | + try { | |
| 54 | + Pl_StdioFile out("stdout", stdout); | |
| 55 | + Pl_Base64 decode("decode", &out, action); | |
| 56 | + // The comments are "n: n%4 n%3", where n is the number of | |
| 57 | + // bytes read at the end of the call, and are there to | |
| 58 | + // indicate that we are reading in chunks that exercise | |
| 59 | + // various boundary conditions around subsequent writes and | |
| 60 | + // the state of buf and pos. There are some writes that don't | |
| 61 | + // do flush at all, some that call flush multiple times, and | |
| 62 | + // some that start in the middle and do flush, and this is | |
| 63 | + // true for both encode and decode. | |
| 64 | + if (write_some(stdin, 1, &decode) && // 1: 1 1 | |
| 65 | + write_some(stdin, 4, &decode) && // 5: 1 2 | |
| 66 | + write_some(stdin, 2, &decode) && // 7: 3 1 | |
| 67 | + write_some(stdin, 2, &decode) && // 9: 1 0 | |
| 68 | + write_some(stdin, 7, &decode) && // 16: 0 1 | |
| 69 | + write_some(stdin, 1, &decode) && // 17: 1 2 | |
| 70 | + write_some(stdin, 9, &decode) && // 26: 2 2 | |
| 71 | + write_some(stdin, 2, &decode)) { // 28: 0 1 | |
| 72 | + while (write_some(stdin, 1000, &decode)) { | |
| 73 | + } | |
| 74 | + } | |
| 75 | + } catch (std::exception& e) { | |
| 76 | + std::cout << "exception: " << e.what() << std::endl; | |
| 77 | + exit(2); | |
| 78 | + } | |
| 79 | + | |
| 80 | + return 0; | |
| 81 | +} | ... | ... |
libtests/qtest/base64.test
0 → 100644
| 1 | +#!/usr/bin/env perl | |
| 2 | +require 5.008; | |
| 3 | +use warnings; | |
| 4 | +use strict; | |
| 5 | + | |
| 6 | +chdir("base64") or die "chdir testdir failed: $!\n"; | |
| 7 | + | |
| 8 | +require TestDriver; | |
| 9 | + | |
| 10 | +my $td = new TestDriver('base64'); | |
| 11 | + | |
| 12 | +cleanup(); | |
| 13 | + | |
| 14 | +# ** Do not use normalize newlines on these tests. ** | |
| 15 | + | |
| 16 | +my $n = 5; | |
| 17 | +for (my $i = 1; $i <= $n; ++$i) | |
| 18 | +{ | |
| 19 | + $td->runtest("encode $i", | |
| 20 | + {$td->COMMAND => "base64 encode < $i.dec"}, | |
| 21 | + {$td->FILE => "$i.enc", $td->EXIT_STATUS => 0}); | |
| 22 | + $td->runtest("code $i", | |
| 23 | + {$td->COMMAND => "base64 decode < $i.enc"}, | |
| 24 | + {$td->FILE => "$i.dec", $td->EXIT_STATUS => 0}); | |
| 25 | +} | |
| 26 | + | |
| 27 | +$td->runtest("non-zero discard bits", | |
| 28 | + {$td->COMMAND => "echo c2FsYWR= | base64 decode"}, | |
| 29 | + {$td->STRING => "salad", $td->EXIT_STATUS => 0}); | |
| 30 | +$td->runtest("write with +/", | |
| 31 | + {$td->COMMAND => "echo +/== | base64 decode > a"}, | |
| 32 | + {$td->STRING => "", $td->EXIT_STATUS => 0}); | |
| 33 | +$td->runtest("write with -_", | |
| 34 | + {$td->COMMAND => "echo -_== | base64 decode > b"}, | |
| 35 | + {$td->STRING => "", $td->EXIT_STATUS => 0}); | |
| 36 | +$td->runtest("interchangeability of +/ and -_", | |
| 37 | + {$td->FILE => "a"}, | |
| 38 | + {$td->FILE => "b"}); | |
| 39 | + | |
| 40 | +$td->runtest("invalid characters", | |
| 41 | + {$td->COMMAND => "echo aaaaa! | base64 decode"}, | |
| 42 | + {$td->REGEXP => ".*invalid input.*", $td->EXIT_STATUS => 2}); | |
| 43 | +$td->runtest("invalid pad", | |
| 44 | + {$td->COMMAND => "echo a= | base64 decode"}, | |
| 45 | + {$td->REGEXP => ".*invalid input.*", $td->EXIT_STATUS => 2}); | |
| 46 | +$td->runtest("data after pad", | |
| 47 | + {$td->COMMAND => "echo aa==potato | base64 decode"}, | |
| 48 | + {$td->REGEXP => ".*data follows pad characters.*", | |
| 49 | + $td->EXIT_STATUS => 2}); | |
| 50 | + | |
| 51 | +cleanup(); | |
| 52 | + | |
| 53 | +$td->report(7 + (2 * $n)); | |
| 54 | + | |
| 55 | +sub cleanup | |
| 56 | +{ | |
| 57 | + unlink('a', 'b'); | |
| 58 | +} | ... | ... |
libtests/qtest/base64/1.dec
0 → 100644
libtests/qtest/base64/1.enc
0 → 100644
libtests/qtest/base64/2.dec
0 → 100644
libtests/qtest/base64/2.enc
0 → 100644
libtests/qtest/base64/3.dec
0 → 100644
| 1 | +This file has a multiple of four bytes and is longer than four bytes... | ... | ... |
libtests/qtest/base64/3.enc
0 → 100644
libtests/qtest/base64/4.dec
0 → 100644
| 1 | +This file has a non-multiple of four bytes and is longer than four bytes. | ... | ... |
libtests/qtest/base64/4.enc
0 → 100644
libtests/qtest/base64/5.dec
0 → 100644
No preview for this file type
libtests/qtest/base64/5.enc
0 → 100644
| 1 | +VGhpcyBmaWxlIGFzIG9uZSBvZiBldmVyeSBieXRlIGluIGl0LiBOb3QgdGhhdCBpdCByZWFsbHkgbWFrZXMgYW55CmRpZmZlcmVuY2UsIGJ1dCB3aHkgbm90LgoKAAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fn+AgYKDhIWGh4iJiouMjY6PkJGSk5SVlpeYmZqbnJ2en6ChoqOkpaanqKmqq6ytrq+wsbKztLW2t7i5uru8vb6/wMHCw8TFxsfIycrLzM3Oz9DR0tPU1dbX2Nna29zd3t/g4eLj5OXm5+jp6uvs7e7v8PHy8/T19vf4+fr7/P3+/wo= | |
| 0 | 2 | \ No newline at end of file | ... | ... |