Commit c9da66a018b6381eadfa3570d511b2a2341ebae8

Authored by Jay Berkenbilt
1 parent 3680922a

Incorporate sha2 code from sphlib 3.0

Changes from upstream are limited to change #include paths so that I
can place header files and included "c" files in a subdirectory.  I
didn't keep the unit tests from sphlib but instead verified them by
running them manually.  I will implement the same tests using the
Pl_SHA2 pipeline except that sphlib's sha2 implementation supports
partial bytes, which I will not exercise in qpdf or our tests.
... ... @@ -48,6 +48,33 @@ obtained from
48 48 http://www.efgh.com/software/rijndael.htm
49 49 http://www.efgh.com/software/rijndael.txt
50 50  
  51 +The embedded sha2 code comes from sphlib 3.0
  52 +
  53 + http://www.saphir2.com/sphlib/
  54 +
  55 +That code has the following license:
  56 +
  57 + Copyright (c) 2007-2011 Projet RNRT SAPHIR
  58 +
  59 + Permission is hereby granted, free of charge, to any person obtaining
  60 + a copy of this software and associated documentation files (the
  61 + "Software"), to deal in the Software without restriction, including
  62 + without limitation the rights to use, copy, modify, merge, publish,
  63 + distribute, sublicense, and/or sell copies of the Software, and to
  64 + permit persons to whom the Software is furnished to do so, subject to
  65 + the following conditions:
  66 +
  67 + The above copyright notice and this permission notice shall be included
  68 + in all copies or substantial portions of the Software.
  69 +
  70 + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  71 + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  72 + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  73 + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  74 + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  75 + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  76 + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  77 +
51 78  
52 79 Building on UNIX/Linux
53 80 ======================
... ...
libqpdf/build.mk
... ... @@ -54,18 +54,27 @@ SRCS_libqpdf = \
54 54 libqpdf/QUtil.cc \
55 55 libqpdf/RC4.cc \
56 56 libqpdf/qpdf-c.cc \
57   - libqpdf/rijndael.cc
  57 + libqpdf/rijndael.cc \
  58 + libqpdf/sha2.c \
  59 + libqpdf/sha2big.c
58 60  
59 61 # -----
60 62  
61   -OBJS_libqpdf = $(call src_to_lobj,$(SRCS_libqpdf))
  63 +CCSRCS_libqpdf = $(filter %.cc,$(SRCS_libqpdf))
  64 +CSRCS_libqpdf = $(filter %.c,$(SRCS_libqpdf))
  65 +
  66 +CCOBJS_libqpdf = $(call src_to_lobj,$(CCSRCS_libqpdf))
  67 +COBJS_libqpdf = $(call c_src_to_lobj,$(CSRCS_libqpdf))
  68 +OBJS_libqpdf = $(CCOBJS_libqpdf) $(COBJS_libqpdf)
62 69  
63 70 ifeq ($(GENDEPS),1)
64 71 -include $(call lobj_to_dep,$(OBJS_libqpdf))
65 72 endif
66 73  
67   -$(OBJS_libqpdf): libqpdf/$(OUTPUT_DIR)/%.$(LOBJ): libqpdf/%.cc
  74 +$(CCOBJS_libqpdf): libqpdf/$(OUTPUT_DIR)/%.$(LOBJ): libqpdf/%.cc
68 75 $(call libcompile,$<,$(INCLUDES_libqpdf))
  76 +$(COBJS_libqpdf): libqpdf/$(OUTPUT_DIR)/%.$(LOBJ): libqpdf/%.c
  77 + $(call c_libcompile,$<,$(INCLUDES_libqpdf))
69 78  
70 79 # Last three arguments to makelib are CURRENT,REVISION,AGE.
71 80 #
... ...
libqpdf/sha2.c 0 → 100644
  1 +/* $Id: sha2.c 227 2010-06-16 17:28:38Z tp $ */
  2 +/*
  3 + * SHA-224 / SHA-256 implementation.
  4 + *
  5 + * ==========================(LICENSE BEGIN)============================
  6 + *
  7 + * Copyright (c) 2007-2010 Projet RNRT SAPHIR
  8 + *
  9 + * Permission is hereby granted, free of charge, to any person obtaining
  10 + * a copy of this software and associated documentation files (the
  11 + * "Software"), to deal in the Software without restriction, including
  12 + * without limitation the rights to use, copy, modify, merge, publish,
  13 + * distribute, sublicense, and/or sell copies of the Software, and to
  14 + * permit persons to whom the Software is furnished to do so, subject to
  15 + * the following conditions:
  16 + *
  17 + * The above copyright notice and this permission notice shall be
  18 + * included in all copies or substantial portions of the Software.
  19 + *
  20 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  21 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  22 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  23 + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  24 + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  25 + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  26 + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  27 + *
  28 + * ===========================(LICENSE END)=============================
  29 + *
  30 + * @author Thomas Pornin <thomas.pornin@cryptolog.com>
  31 + */
  32 +
  33 +#include <stddef.h>
  34 +#include <string.h>
  35 +
  36 +#include "sph/sph_sha2.h"
  37 +
  38 +#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_SHA2
  39 +#define SPH_SMALL_FOOTPRINT_SHA2 1
  40 +#endif
  41 +
  42 +#define CH(X, Y, Z) ((((Y) ^ (Z)) & (X)) ^ (Z))
  43 +#define MAJ(X, Y, Z) (((Y) & (Z)) | (((Y) | (Z)) & (X)))
  44 +
  45 +#define ROTR SPH_ROTR32
  46 +
  47 +#define BSG2_0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
  48 +#define BSG2_1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
  49 +#define SSG2_0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SPH_T32((x) >> 3))
  50 +#define SSG2_1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SPH_T32((x) >> 10))
  51 +
  52 +static const sph_u32 H224[8] = {
  53 + SPH_C32(0xC1059ED8), SPH_C32(0x367CD507), SPH_C32(0x3070DD17),
  54 + SPH_C32(0xF70E5939), SPH_C32(0xFFC00B31), SPH_C32(0x68581511),
  55 + SPH_C32(0x64F98FA7), SPH_C32(0xBEFA4FA4)
  56 +};
  57 +
  58 +static const sph_u32 H256[8] = {
  59 + SPH_C32(0x6A09E667), SPH_C32(0xBB67AE85), SPH_C32(0x3C6EF372),
  60 + SPH_C32(0xA54FF53A), SPH_C32(0x510E527F), SPH_C32(0x9B05688C),
  61 + SPH_C32(0x1F83D9AB), SPH_C32(0x5BE0CD19)
  62 +};
  63 +
  64 +/*
  65 + * The SHA2_ROUND_BODY defines the body for a SHA-224 / SHA-256
  66 + * compression function implementation. The "in" parameter should
  67 + * evaluate, when applied to a numerical input parameter from 0 to 15,
  68 + * to an expression which yields the corresponding input block. The "r"
  69 + * parameter should evaluate to an array or pointer expression
  70 + * designating the array of 8 words which contains the input and output
  71 + * of the compression function.
  72 + */
  73 +
  74 +#if SPH_SMALL_FOOTPRINT_SHA2
  75 +
  76 +static const sph_u32 K[64] = {
  77 + SPH_C32(0x428A2F98), SPH_C32(0x71374491),
  78 + SPH_C32(0xB5C0FBCF), SPH_C32(0xE9B5DBA5),
  79 + SPH_C32(0x3956C25B), SPH_C32(0x59F111F1),
  80 + SPH_C32(0x923F82A4), SPH_C32(0xAB1C5ED5),
  81 + SPH_C32(0xD807AA98), SPH_C32(0x12835B01),
  82 + SPH_C32(0x243185BE), SPH_C32(0x550C7DC3),
  83 + SPH_C32(0x72BE5D74), SPH_C32(0x80DEB1FE),
  84 + SPH_C32(0x9BDC06A7), SPH_C32(0xC19BF174),
  85 + SPH_C32(0xE49B69C1), SPH_C32(0xEFBE4786),
  86 + SPH_C32(0x0FC19DC6), SPH_C32(0x240CA1CC),
  87 + SPH_C32(0x2DE92C6F), SPH_C32(0x4A7484AA),
  88 + SPH_C32(0x5CB0A9DC), SPH_C32(0x76F988DA),
  89 + SPH_C32(0x983E5152), SPH_C32(0xA831C66D),
  90 + SPH_C32(0xB00327C8), SPH_C32(0xBF597FC7),
  91 + SPH_C32(0xC6E00BF3), SPH_C32(0xD5A79147),
  92 + SPH_C32(0x06CA6351), SPH_C32(0x14292967),
  93 + SPH_C32(0x27B70A85), SPH_C32(0x2E1B2138),
  94 + SPH_C32(0x4D2C6DFC), SPH_C32(0x53380D13),
  95 + SPH_C32(0x650A7354), SPH_C32(0x766A0ABB),
  96 + SPH_C32(0x81C2C92E), SPH_C32(0x92722C85),
  97 + SPH_C32(0xA2BFE8A1), SPH_C32(0xA81A664B),
  98 + SPH_C32(0xC24B8B70), SPH_C32(0xC76C51A3),
  99 + SPH_C32(0xD192E819), SPH_C32(0xD6990624),
  100 + SPH_C32(0xF40E3585), SPH_C32(0x106AA070),
  101 + SPH_C32(0x19A4C116), SPH_C32(0x1E376C08),
  102 + SPH_C32(0x2748774C), SPH_C32(0x34B0BCB5),
  103 + SPH_C32(0x391C0CB3), SPH_C32(0x4ED8AA4A),
  104 + SPH_C32(0x5B9CCA4F), SPH_C32(0x682E6FF3),
  105 + SPH_C32(0x748F82EE), SPH_C32(0x78A5636F),
  106 + SPH_C32(0x84C87814), SPH_C32(0x8CC70208),
  107 + SPH_C32(0x90BEFFFA), SPH_C32(0xA4506CEB),
  108 + SPH_C32(0xBEF9A3F7), SPH_C32(0xC67178F2)
  109 +};
  110 +
  111 +#define SHA2_MEXP1(in, pc) do { \
  112 + W[pc] = in(pc); \
  113 + } while (0)
  114 +
  115 +#define SHA2_MEXP2(in, pc) do { \
  116 + W[(pc) & 0x0F] = SPH_T32(SSG2_1(W[((pc) - 2) & 0x0F]) \
  117 + + W[((pc) - 7) & 0x0F] \
  118 + + SSG2_0(W[((pc) - 15) & 0x0F]) + W[(pc) & 0x0F]); \
  119 + } while (0)
  120 +
  121 +#define SHA2_STEPn(n, a, b, c, d, e, f, g, h, in, pc) do { \
  122 + sph_u32 t1, t2; \
  123 + SHA2_MEXP ## n(in, pc); \
  124 + t1 = SPH_T32(h + BSG2_1(e) + CH(e, f, g) \
  125 + + K[pcount + (pc)] + W[(pc) & 0x0F]); \
  126 + t2 = SPH_T32(BSG2_0(a) + MAJ(a, b, c)); \
  127 + d = SPH_T32(d + t1); \
  128 + h = SPH_T32(t1 + t2); \
  129 + } while (0)
  130 +
  131 +#define SHA2_STEP1(a, b, c, d, e, f, g, h, in, pc) \
  132 + SHA2_STEPn(1, a, b, c, d, e, f, g, h, in, pc)
  133 +#define SHA2_STEP2(a, b, c, d, e, f, g, h, in, pc) \
  134 + SHA2_STEPn(2, a, b, c, d, e, f, g, h, in, pc)
  135 +
  136 +#define SHA2_ROUND_BODY(in, r) do { \
  137 + sph_u32 A, B, C, D, E, F, G, H; \
  138 + sph_u32 W[16]; \
  139 + unsigned pcount; \
  140 + \
  141 + A = (r)[0]; \
  142 + B = (r)[1]; \
  143 + C = (r)[2]; \
  144 + D = (r)[3]; \
  145 + E = (r)[4]; \
  146 + F = (r)[5]; \
  147 + G = (r)[6]; \
  148 + H = (r)[7]; \
  149 + pcount = 0; \
  150 + SHA2_STEP1(A, B, C, D, E, F, G, H, in, 0); \
  151 + SHA2_STEP1(H, A, B, C, D, E, F, G, in, 1); \
  152 + SHA2_STEP1(G, H, A, B, C, D, E, F, in, 2); \
  153 + SHA2_STEP1(F, G, H, A, B, C, D, E, in, 3); \
  154 + SHA2_STEP1(E, F, G, H, A, B, C, D, in, 4); \
  155 + SHA2_STEP1(D, E, F, G, H, A, B, C, in, 5); \
  156 + SHA2_STEP1(C, D, E, F, G, H, A, B, in, 6); \
  157 + SHA2_STEP1(B, C, D, E, F, G, H, A, in, 7); \
  158 + SHA2_STEP1(A, B, C, D, E, F, G, H, in, 8); \
  159 + SHA2_STEP1(H, A, B, C, D, E, F, G, in, 9); \
  160 + SHA2_STEP1(G, H, A, B, C, D, E, F, in, 10); \
  161 + SHA2_STEP1(F, G, H, A, B, C, D, E, in, 11); \
  162 + SHA2_STEP1(E, F, G, H, A, B, C, D, in, 12); \
  163 + SHA2_STEP1(D, E, F, G, H, A, B, C, in, 13); \
  164 + SHA2_STEP1(C, D, E, F, G, H, A, B, in, 14); \
  165 + SHA2_STEP1(B, C, D, E, F, G, H, A, in, 15); \
  166 + for (pcount = 16; pcount < 64; pcount += 16) { \
  167 + SHA2_STEP2(A, B, C, D, E, F, G, H, in, 0); \
  168 + SHA2_STEP2(H, A, B, C, D, E, F, G, in, 1); \
  169 + SHA2_STEP2(G, H, A, B, C, D, E, F, in, 2); \
  170 + SHA2_STEP2(F, G, H, A, B, C, D, E, in, 3); \
  171 + SHA2_STEP2(E, F, G, H, A, B, C, D, in, 4); \
  172 + SHA2_STEP2(D, E, F, G, H, A, B, C, in, 5); \
  173 + SHA2_STEP2(C, D, E, F, G, H, A, B, in, 6); \
  174 + SHA2_STEP2(B, C, D, E, F, G, H, A, in, 7); \
  175 + SHA2_STEP2(A, B, C, D, E, F, G, H, in, 8); \
  176 + SHA2_STEP2(H, A, B, C, D, E, F, G, in, 9); \
  177 + SHA2_STEP2(G, H, A, B, C, D, E, F, in, 10); \
  178 + SHA2_STEP2(F, G, H, A, B, C, D, E, in, 11); \
  179 + SHA2_STEP2(E, F, G, H, A, B, C, D, in, 12); \
  180 + SHA2_STEP2(D, E, F, G, H, A, B, C, in, 13); \
  181 + SHA2_STEP2(C, D, E, F, G, H, A, B, in, 14); \
  182 + SHA2_STEP2(B, C, D, E, F, G, H, A, in, 15); \
  183 + } \
  184 + (r)[0] = SPH_T32((r)[0] + A); \
  185 + (r)[1] = SPH_T32((r)[1] + B); \
  186 + (r)[2] = SPH_T32((r)[2] + C); \
  187 + (r)[3] = SPH_T32((r)[3] + D); \
  188 + (r)[4] = SPH_T32((r)[4] + E); \
  189 + (r)[5] = SPH_T32((r)[5] + F); \
  190 + (r)[6] = SPH_T32((r)[6] + G); \
  191 + (r)[7] = SPH_T32((r)[7] + H); \
  192 + } while (0)
  193 +
  194 +#else
  195 +
  196 +#define SHA2_ROUND_BODY(in, r) do { \
  197 + sph_u32 A, B, C, D, E, F, G, H, T1, T2; \
  198 + sph_u32 W00, W01, W02, W03, W04, W05, W06, W07; \
  199 + sph_u32 W08, W09, W10, W11, W12, W13, W14, W15; \
  200 + \
  201 + A = (r)[0]; \
  202 + B = (r)[1]; \
  203 + C = (r)[2]; \
  204 + D = (r)[3]; \
  205 + E = (r)[4]; \
  206 + F = (r)[5]; \
  207 + G = (r)[6]; \
  208 + H = (r)[7]; \
  209 + W00 = in(0); \
  210 + T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
  211 + + SPH_C32(0x428A2F98) + W00); \
  212 + T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
  213 + D = SPH_T32(D + T1); \
  214 + H = SPH_T32(T1 + T2); \
  215 + W01 = in(1); \
  216 + T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
  217 + + SPH_C32(0x71374491) + W01); \
  218 + T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
  219 + C = SPH_T32(C + T1); \
  220 + G = SPH_T32(T1 + T2); \
  221 + W02 = in(2); \
  222 + T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
  223 + + SPH_C32(0xB5C0FBCF) + W02); \
  224 + T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
  225 + B = SPH_T32(B + T1); \
  226 + F = SPH_T32(T1 + T2); \
  227 + W03 = in(3); \
  228 + T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
  229 + + SPH_C32(0xE9B5DBA5) + W03); \
  230 + T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
  231 + A = SPH_T32(A + T1); \
  232 + E = SPH_T32(T1 + T2); \
  233 + W04 = in(4); \
  234 + T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
  235 + + SPH_C32(0x3956C25B) + W04); \
  236 + T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
  237 + H = SPH_T32(H + T1); \
  238 + D = SPH_T32(T1 + T2); \
  239 + W05 = in(5); \
  240 + T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
  241 + + SPH_C32(0x59F111F1) + W05); \
  242 + T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
  243 + G = SPH_T32(G + T1); \
  244 + C = SPH_T32(T1 + T2); \
  245 + W06 = in(6); \
  246 + T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
  247 + + SPH_C32(0x923F82A4) + W06); \
  248 + T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
  249 + F = SPH_T32(F + T1); \
  250 + B = SPH_T32(T1 + T2); \
  251 + W07 = in(7); \
  252 + T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
  253 + + SPH_C32(0xAB1C5ED5) + W07); \
  254 + T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
  255 + E = SPH_T32(E + T1); \
  256 + A = SPH_T32(T1 + T2); \
  257 + W08 = in(8); \
  258 + T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
  259 + + SPH_C32(0xD807AA98) + W08); \
  260 + T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
  261 + D = SPH_T32(D + T1); \
  262 + H = SPH_T32(T1 + T2); \
  263 + W09 = in(9); \
  264 + T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
  265 + + SPH_C32(0x12835B01) + W09); \
  266 + T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
  267 + C = SPH_T32(C + T1); \
  268 + G = SPH_T32(T1 + T2); \
  269 + W10 = in(10); \
  270 + T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
  271 + + SPH_C32(0x243185BE) + W10); \
  272 + T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
  273 + B = SPH_T32(B + T1); \
  274 + F = SPH_T32(T1 + T2); \
  275 + W11 = in(11); \
  276 + T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
  277 + + SPH_C32(0x550C7DC3) + W11); \
  278 + T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
  279 + A = SPH_T32(A + T1); \
  280 + E = SPH_T32(T1 + T2); \
  281 + W12 = in(12); \
  282 + T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
  283 + + SPH_C32(0x72BE5D74) + W12); \
  284 + T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
  285 + H = SPH_T32(H + T1); \
  286 + D = SPH_T32(T1 + T2); \
  287 + W13 = in(13); \
  288 + T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
  289 + + SPH_C32(0x80DEB1FE) + W13); \
  290 + T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
  291 + G = SPH_T32(G + T1); \
  292 + C = SPH_T32(T1 + T2); \
  293 + W14 = in(14); \
  294 + T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
  295 + + SPH_C32(0x9BDC06A7) + W14); \
  296 + T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
  297 + F = SPH_T32(F + T1); \
  298 + B = SPH_T32(T1 + T2); \
  299 + W15 = in(15); \
  300 + T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
  301 + + SPH_C32(0xC19BF174) + W15); \
  302 + T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
  303 + E = SPH_T32(E + T1); \
  304 + A = SPH_T32(T1 + T2); \
  305 + W00 = SPH_T32(SSG2_1(W14) + W09 + SSG2_0(W01) + W00); \
  306 + T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
  307 + + SPH_C32(0xE49B69C1) + W00); \
  308 + T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
  309 + D = SPH_T32(D + T1); \
  310 + H = SPH_T32(T1 + T2); \
  311 + W01 = SPH_T32(SSG2_1(W15) + W10 + SSG2_0(W02) + W01); \
  312 + T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
  313 + + SPH_C32(0xEFBE4786) + W01); \
  314 + T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
  315 + C = SPH_T32(C + T1); \
  316 + G = SPH_T32(T1 + T2); \
  317 + W02 = SPH_T32(SSG2_1(W00) + W11 + SSG2_0(W03) + W02); \
  318 + T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
  319 + + SPH_C32(0x0FC19DC6) + W02); \
  320 + T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
  321 + B = SPH_T32(B + T1); \
  322 + F = SPH_T32(T1 + T2); \
  323 + W03 = SPH_T32(SSG2_1(W01) + W12 + SSG2_0(W04) + W03); \
  324 + T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
  325 + + SPH_C32(0x240CA1CC) + W03); \
  326 + T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
  327 + A = SPH_T32(A + T1); \
  328 + E = SPH_T32(T1 + T2); \
  329 + W04 = SPH_T32(SSG2_1(W02) + W13 + SSG2_0(W05) + W04); \
  330 + T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
  331 + + SPH_C32(0x2DE92C6F) + W04); \
  332 + T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
  333 + H = SPH_T32(H + T1); \
  334 + D = SPH_T32(T1 + T2); \
  335 + W05 = SPH_T32(SSG2_1(W03) + W14 + SSG2_0(W06) + W05); \
  336 + T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
  337 + + SPH_C32(0x4A7484AA) + W05); \
  338 + T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
  339 + G = SPH_T32(G + T1); \
  340 + C = SPH_T32(T1 + T2); \
  341 + W06 = SPH_T32(SSG2_1(W04) + W15 + SSG2_0(W07) + W06); \
  342 + T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
  343 + + SPH_C32(0x5CB0A9DC) + W06); \
  344 + T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
  345 + F = SPH_T32(F + T1); \
  346 + B = SPH_T32(T1 + T2); \
  347 + W07 = SPH_T32(SSG2_1(W05) + W00 + SSG2_0(W08) + W07); \
  348 + T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
  349 + + SPH_C32(0x76F988DA) + W07); \
  350 + T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
  351 + E = SPH_T32(E + T1); \
  352 + A = SPH_T32(T1 + T2); \
  353 + W08 = SPH_T32(SSG2_1(W06) + W01 + SSG2_0(W09) + W08); \
  354 + T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
  355 + + SPH_C32(0x983E5152) + W08); \
  356 + T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
  357 + D = SPH_T32(D + T1); \
  358 + H = SPH_T32(T1 + T2); \
  359 + W09 = SPH_T32(SSG2_1(W07) + W02 + SSG2_0(W10) + W09); \
  360 + T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
  361 + + SPH_C32(0xA831C66D) + W09); \
  362 + T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
  363 + C = SPH_T32(C + T1); \
  364 + G = SPH_T32(T1 + T2); \
  365 + W10 = SPH_T32(SSG2_1(W08) + W03 + SSG2_0(W11) + W10); \
  366 + T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
  367 + + SPH_C32(0xB00327C8) + W10); \
  368 + T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
  369 + B = SPH_T32(B + T1); \
  370 + F = SPH_T32(T1 + T2); \
  371 + W11 = SPH_T32(SSG2_1(W09) + W04 + SSG2_0(W12) + W11); \
  372 + T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
  373 + + SPH_C32(0xBF597FC7) + W11); \
  374 + T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
  375 + A = SPH_T32(A + T1); \
  376 + E = SPH_T32(T1 + T2); \
  377 + W12 = SPH_T32(SSG2_1(W10) + W05 + SSG2_0(W13) + W12); \
  378 + T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
  379 + + SPH_C32(0xC6E00BF3) + W12); \
  380 + T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
  381 + H = SPH_T32(H + T1); \
  382 + D = SPH_T32(T1 + T2); \
  383 + W13 = SPH_T32(SSG2_1(W11) + W06 + SSG2_0(W14) + W13); \
  384 + T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
  385 + + SPH_C32(0xD5A79147) + W13); \
  386 + T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
  387 + G = SPH_T32(G + T1); \
  388 + C = SPH_T32(T1 + T2); \
  389 + W14 = SPH_T32(SSG2_1(W12) + W07 + SSG2_0(W15) + W14); \
  390 + T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
  391 + + SPH_C32(0x06CA6351) + W14); \
  392 + T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
  393 + F = SPH_T32(F + T1); \
  394 + B = SPH_T32(T1 + T2); \
  395 + W15 = SPH_T32(SSG2_1(W13) + W08 + SSG2_0(W00) + W15); \
  396 + T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
  397 + + SPH_C32(0x14292967) + W15); \
  398 + T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
  399 + E = SPH_T32(E + T1); \
  400 + A = SPH_T32(T1 + T2); \
  401 + W00 = SPH_T32(SSG2_1(W14) + W09 + SSG2_0(W01) + W00); \
  402 + T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
  403 + + SPH_C32(0x27B70A85) + W00); \
  404 + T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
  405 + D = SPH_T32(D + T1); \
  406 + H = SPH_T32(T1 + T2); \
  407 + W01 = SPH_T32(SSG2_1(W15) + W10 + SSG2_0(W02) + W01); \
  408 + T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
  409 + + SPH_C32(0x2E1B2138) + W01); \
  410 + T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
  411 + C = SPH_T32(C + T1); \
  412 + G = SPH_T32(T1 + T2); \
  413 + W02 = SPH_T32(SSG2_1(W00) + W11 + SSG2_0(W03) + W02); \
  414 + T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
  415 + + SPH_C32(0x4D2C6DFC) + W02); \
  416 + T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
  417 + B = SPH_T32(B + T1); \
  418 + F = SPH_T32(T1 + T2); \
  419 + W03 = SPH_T32(SSG2_1(W01) + W12 + SSG2_0(W04) + W03); \
  420 + T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
  421 + + SPH_C32(0x53380D13) + W03); \
  422 + T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
  423 + A = SPH_T32(A + T1); \
  424 + E = SPH_T32(T1 + T2); \
  425 + W04 = SPH_T32(SSG2_1(W02) + W13 + SSG2_0(W05) + W04); \
  426 + T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
  427 + + SPH_C32(0x650A7354) + W04); \
  428 + T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
  429 + H = SPH_T32(H + T1); \
  430 + D = SPH_T32(T1 + T2); \
  431 + W05 = SPH_T32(SSG2_1(W03) + W14 + SSG2_0(W06) + W05); \
  432 + T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
  433 + + SPH_C32(0x766A0ABB) + W05); \
  434 + T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
  435 + G = SPH_T32(G + T1); \
  436 + C = SPH_T32(T1 + T2); \
  437 + W06 = SPH_T32(SSG2_1(W04) + W15 + SSG2_0(W07) + W06); \
  438 + T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
  439 + + SPH_C32(0x81C2C92E) + W06); \
  440 + T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
  441 + F = SPH_T32(F + T1); \
  442 + B = SPH_T32(T1 + T2); \
  443 + W07 = SPH_T32(SSG2_1(W05) + W00 + SSG2_0(W08) + W07); \
  444 + T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
  445 + + SPH_C32(0x92722C85) + W07); \
  446 + T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
  447 + E = SPH_T32(E + T1); \
  448 + A = SPH_T32(T1 + T2); \
  449 + W08 = SPH_T32(SSG2_1(W06) + W01 + SSG2_0(W09) + W08); \
  450 + T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
  451 + + SPH_C32(0xA2BFE8A1) + W08); \
  452 + T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
  453 + D = SPH_T32(D + T1); \
  454 + H = SPH_T32(T1 + T2); \
  455 + W09 = SPH_T32(SSG2_1(W07) + W02 + SSG2_0(W10) + W09); \
  456 + T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
  457 + + SPH_C32(0xA81A664B) + W09); \
  458 + T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
  459 + C = SPH_T32(C + T1); \
  460 + G = SPH_T32(T1 + T2); \
  461 + W10 = SPH_T32(SSG2_1(W08) + W03 + SSG2_0(W11) + W10); \
  462 + T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
  463 + + SPH_C32(0xC24B8B70) + W10); \
  464 + T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
  465 + B = SPH_T32(B + T1); \
  466 + F = SPH_T32(T1 + T2); \
  467 + W11 = SPH_T32(SSG2_1(W09) + W04 + SSG2_0(W12) + W11); \
  468 + T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
  469 + + SPH_C32(0xC76C51A3) + W11); \
  470 + T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
  471 + A = SPH_T32(A + T1); \
  472 + E = SPH_T32(T1 + T2); \
  473 + W12 = SPH_T32(SSG2_1(W10) + W05 + SSG2_0(W13) + W12); \
  474 + T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
  475 + + SPH_C32(0xD192E819) + W12); \
  476 + T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
  477 + H = SPH_T32(H + T1); \
  478 + D = SPH_T32(T1 + T2); \
  479 + W13 = SPH_T32(SSG2_1(W11) + W06 + SSG2_0(W14) + W13); \
  480 + T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
  481 + + SPH_C32(0xD6990624) + W13); \
  482 + T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
  483 + G = SPH_T32(G + T1); \
  484 + C = SPH_T32(T1 + T2); \
  485 + W14 = SPH_T32(SSG2_1(W12) + W07 + SSG2_0(W15) + W14); \
  486 + T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
  487 + + SPH_C32(0xF40E3585) + W14); \
  488 + T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
  489 + F = SPH_T32(F + T1); \
  490 + B = SPH_T32(T1 + T2); \
  491 + W15 = SPH_T32(SSG2_1(W13) + W08 + SSG2_0(W00) + W15); \
  492 + T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
  493 + + SPH_C32(0x106AA070) + W15); \
  494 + T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
  495 + E = SPH_T32(E + T1); \
  496 + A = SPH_T32(T1 + T2); \
  497 + W00 = SPH_T32(SSG2_1(W14) + W09 + SSG2_0(W01) + W00); \
  498 + T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
  499 + + SPH_C32(0x19A4C116) + W00); \
  500 + T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
  501 + D = SPH_T32(D + T1); \
  502 + H = SPH_T32(T1 + T2); \
  503 + W01 = SPH_T32(SSG2_1(W15) + W10 + SSG2_0(W02) + W01); \
  504 + T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
  505 + + SPH_C32(0x1E376C08) + W01); \
  506 + T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
  507 + C = SPH_T32(C + T1); \
  508 + G = SPH_T32(T1 + T2); \
  509 + W02 = SPH_T32(SSG2_1(W00) + W11 + SSG2_0(W03) + W02); \
  510 + T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
  511 + + SPH_C32(0x2748774C) + W02); \
  512 + T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
  513 + B = SPH_T32(B + T1); \
  514 + F = SPH_T32(T1 + T2); \
  515 + W03 = SPH_T32(SSG2_1(W01) + W12 + SSG2_0(W04) + W03); \
  516 + T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
  517 + + SPH_C32(0x34B0BCB5) + W03); \
  518 + T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
  519 + A = SPH_T32(A + T1); \
  520 + E = SPH_T32(T1 + T2); \
  521 + W04 = SPH_T32(SSG2_1(W02) + W13 + SSG2_0(W05) + W04); \
  522 + T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
  523 + + SPH_C32(0x391C0CB3) + W04); \
  524 + T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
  525 + H = SPH_T32(H + T1); \
  526 + D = SPH_T32(T1 + T2); \
  527 + W05 = SPH_T32(SSG2_1(W03) + W14 + SSG2_0(W06) + W05); \
  528 + T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
  529 + + SPH_C32(0x4ED8AA4A) + W05); \
  530 + T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
  531 + G = SPH_T32(G + T1); \
  532 + C = SPH_T32(T1 + T2); \
  533 + W06 = SPH_T32(SSG2_1(W04) + W15 + SSG2_0(W07) + W06); \
  534 + T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
  535 + + SPH_C32(0x5B9CCA4F) + W06); \
  536 + T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
  537 + F = SPH_T32(F + T1); \
  538 + B = SPH_T32(T1 + T2); \
  539 + W07 = SPH_T32(SSG2_1(W05) + W00 + SSG2_0(W08) + W07); \
  540 + T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
  541 + + SPH_C32(0x682E6FF3) + W07); \
  542 + T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
  543 + E = SPH_T32(E + T1); \
  544 + A = SPH_T32(T1 + T2); \
  545 + W08 = SPH_T32(SSG2_1(W06) + W01 + SSG2_0(W09) + W08); \
  546 + T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
  547 + + SPH_C32(0x748F82EE) + W08); \
  548 + T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
  549 + D = SPH_T32(D + T1); \
  550 + H = SPH_T32(T1 + T2); \
  551 + W09 = SPH_T32(SSG2_1(W07) + W02 + SSG2_0(W10) + W09); \
  552 + T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
  553 + + SPH_C32(0x78A5636F) + W09); \
  554 + T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
  555 + C = SPH_T32(C + T1); \
  556 + G = SPH_T32(T1 + T2); \
  557 + W10 = SPH_T32(SSG2_1(W08) + W03 + SSG2_0(W11) + W10); \
  558 + T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
  559 + + SPH_C32(0x84C87814) + W10); \
  560 + T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
  561 + B = SPH_T32(B + T1); \
  562 + F = SPH_T32(T1 + T2); \
  563 + W11 = SPH_T32(SSG2_1(W09) + W04 + SSG2_0(W12) + W11); \
  564 + T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
  565 + + SPH_C32(0x8CC70208) + W11); \
  566 + T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
  567 + A = SPH_T32(A + T1); \
  568 + E = SPH_T32(T1 + T2); \
  569 + W12 = SPH_T32(SSG2_1(W10) + W05 + SSG2_0(W13) + W12); \
  570 + T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
  571 + + SPH_C32(0x90BEFFFA) + W12); \
  572 + T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
  573 + H = SPH_T32(H + T1); \
  574 + D = SPH_T32(T1 + T2); \
  575 + W13 = SPH_T32(SSG2_1(W11) + W06 + SSG2_0(W14) + W13); \
  576 + T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
  577 + + SPH_C32(0xA4506CEB) + W13); \
  578 + T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
  579 + G = SPH_T32(G + T1); \
  580 + C = SPH_T32(T1 + T2); \
  581 + W14 = SPH_T32(SSG2_1(W12) + W07 + SSG2_0(W15) + W14); \
  582 + T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
  583 + + SPH_C32(0xBEF9A3F7) + W14); \
  584 + T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
  585 + F = SPH_T32(F + T1); \
  586 + B = SPH_T32(T1 + T2); \
  587 + W15 = SPH_T32(SSG2_1(W13) + W08 + SSG2_0(W00) + W15); \
  588 + T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
  589 + + SPH_C32(0xC67178F2) + W15); \
  590 + T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
  591 + E = SPH_T32(E + T1); \
  592 + A = SPH_T32(T1 + T2); \
  593 + (r)[0] = SPH_T32((r)[0] + A); \
  594 + (r)[1] = SPH_T32((r)[1] + B); \
  595 + (r)[2] = SPH_T32((r)[2] + C); \
  596 + (r)[3] = SPH_T32((r)[3] + D); \
  597 + (r)[4] = SPH_T32((r)[4] + E); \
  598 + (r)[5] = SPH_T32((r)[5] + F); \
  599 + (r)[6] = SPH_T32((r)[6] + G); \
  600 + (r)[7] = SPH_T32((r)[7] + H); \
  601 + } while (0)
  602 +
  603 +#endif
  604 +
  605 +/*
  606 + * One round of SHA-224 / SHA-256. The data must be aligned for 32-bit access.
  607 + */
  608 +static void
  609 +sha2_round(const unsigned char *data, sph_u32 r[8])
  610 +{
  611 +#define SHA2_IN(x) sph_dec32be_aligned(data + (4 * (x)))
  612 + SHA2_ROUND_BODY(SHA2_IN, r);
  613 +#undef SHA2_IN
  614 +}
  615 +
  616 +/* see sph_sha2.h */
  617 +void
  618 +sph_sha224_init(void *cc)
  619 +{
  620 + sph_sha224_context *sc;
  621 +
  622 + sc = cc;
  623 + memcpy(sc->val, H224, sizeof H224);
  624 +#if SPH_64
  625 + sc->count = 0;
  626 +#else
  627 + sc->count_high = sc->count_low = 0;
  628 +#endif
  629 +}
  630 +
  631 +/* see sph_sha2.h */
  632 +void
  633 +sph_sha256_init(void *cc)
  634 +{
  635 + sph_sha256_context *sc;
  636 +
  637 + sc = cc;
  638 + memcpy(sc->val, H256, sizeof H256);
  639 +#if SPH_64
  640 + sc->count = 0;
  641 +#else
  642 + sc->count_high = sc->count_low = 0;
  643 +#endif
  644 +}
  645 +
  646 +#define RFUN sha2_round
  647 +#define HASH sha224
  648 +#define BE32 1
  649 +#include "sph/md_helper.c"
  650 +
  651 +/* see sph_sha2.h */
  652 +void
  653 +sph_sha224_close(void *cc, void *dst)
  654 +{
  655 + sha224_close(cc, dst, 7);
  656 + sph_sha224_init(cc);
  657 +}
  658 +
  659 +/* see sph_sha2.h */
  660 +void
  661 +sph_sha224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
  662 +{
  663 + sha224_addbits_and_close(cc, ub, n, dst, 7);
  664 + sph_sha224_init(cc);
  665 +}
  666 +
  667 +/* see sph_sha2.h */
  668 +void
  669 +sph_sha256_close(void *cc, void *dst)
  670 +{
  671 + sha224_close(cc, dst, 8);
  672 + sph_sha256_init(cc);
  673 +}
  674 +
  675 +/* see sph_sha2.h */
  676 +void
  677 +sph_sha256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
  678 +{
  679 + sha224_addbits_and_close(cc, ub, n, dst, 8);
  680 + sph_sha256_init(cc);
  681 +}
  682 +
  683 +/* see sph_sha2.h */
  684 +void
  685 +sph_sha224_comp(const sph_u32 msg[16], sph_u32 val[8])
  686 +{
  687 +#define SHA2_IN(x) msg[x]
  688 + SHA2_ROUND_BODY(SHA2_IN, val);
  689 +#undef SHA2_IN
  690 +}
... ...
libqpdf/sha2big.c 0 → 100644
  1 +/* $Id: sha2big.c 216 2010-06-08 09:46:57Z tp $ */
  2 +/*
  3 + * SHA-384 / SHA-512 implementation.
  4 + *
  5 + * ==========================(LICENSE BEGIN)============================
  6 + *
  7 + * Copyright (c) 2007-2010 Projet RNRT SAPHIR
  8 + *
  9 + * Permission is hereby granted, free of charge, to any person obtaining
  10 + * a copy of this software and associated documentation files (the
  11 + * "Software"), to deal in the Software without restriction, including
  12 + * without limitation the rights to use, copy, modify, merge, publish,
  13 + * distribute, sublicense, and/or sell copies of the Software, and to
  14 + * permit persons to whom the Software is furnished to do so, subject to
  15 + * the following conditions:
  16 + *
  17 + * The above copyright notice and this permission notice shall be
  18 + * included in all copies or substantial portions of the Software.
  19 + *
  20 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  21 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  22 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  23 + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  24 + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  25 + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  26 + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  27 + *
  28 + * ===========================(LICENSE END)=============================
  29 + *
  30 + * @author Thomas Pornin <thomas.pornin@cryptolog.com>
  31 + */
  32 +
  33 +#include <stddef.h>
  34 +#include <string.h>
  35 +
  36 +#include "sph/sph_sha2.h"
  37 +
  38 +#if SPH_64
  39 +
  40 +#define CH(X, Y, Z) ((((Y) ^ (Z)) & (X)) ^ (Z))
  41 +#define MAJ(X, Y, Z) (((X) & (Y)) | (((X) | (Y)) & (Z)))
  42 +
  43 +#define ROTR64 SPH_ROTR64
  44 +
  45 +#define BSG5_0(x) (ROTR64(x, 28) ^ ROTR64(x, 34) ^ ROTR64(x, 39))
  46 +#define BSG5_1(x) (ROTR64(x, 14) ^ ROTR64(x, 18) ^ ROTR64(x, 41))
  47 +#define SSG5_0(x) (ROTR64(x, 1) ^ ROTR64(x, 8) ^ SPH_T64((x) >> 7))
  48 +#define SSG5_1(x) (ROTR64(x, 19) ^ ROTR64(x, 61) ^ SPH_T64((x) >> 6))
  49 +
  50 +static const sph_u64 K512[80] = {
  51 + SPH_C64(0x428A2F98D728AE22), SPH_C64(0x7137449123EF65CD),
  52 + SPH_C64(0xB5C0FBCFEC4D3B2F), SPH_C64(0xE9B5DBA58189DBBC),
  53 + SPH_C64(0x3956C25BF348B538), SPH_C64(0x59F111F1B605D019),
  54 + SPH_C64(0x923F82A4AF194F9B), SPH_C64(0xAB1C5ED5DA6D8118),
  55 + SPH_C64(0xD807AA98A3030242), SPH_C64(0x12835B0145706FBE),
  56 + SPH_C64(0x243185BE4EE4B28C), SPH_C64(0x550C7DC3D5FFB4E2),
  57 + SPH_C64(0x72BE5D74F27B896F), SPH_C64(0x80DEB1FE3B1696B1),
  58 + SPH_C64(0x9BDC06A725C71235), SPH_C64(0xC19BF174CF692694),
  59 + SPH_C64(0xE49B69C19EF14AD2), SPH_C64(0xEFBE4786384F25E3),
  60 + SPH_C64(0x0FC19DC68B8CD5B5), SPH_C64(0x240CA1CC77AC9C65),
  61 + SPH_C64(0x2DE92C6F592B0275), SPH_C64(0x4A7484AA6EA6E483),
  62 + SPH_C64(0x5CB0A9DCBD41FBD4), SPH_C64(0x76F988DA831153B5),
  63 + SPH_C64(0x983E5152EE66DFAB), SPH_C64(0xA831C66D2DB43210),
  64 + SPH_C64(0xB00327C898FB213F), SPH_C64(0xBF597FC7BEEF0EE4),
  65 + SPH_C64(0xC6E00BF33DA88FC2), SPH_C64(0xD5A79147930AA725),
  66 + SPH_C64(0x06CA6351E003826F), SPH_C64(0x142929670A0E6E70),
  67 + SPH_C64(0x27B70A8546D22FFC), SPH_C64(0x2E1B21385C26C926),
  68 + SPH_C64(0x4D2C6DFC5AC42AED), SPH_C64(0x53380D139D95B3DF),
  69 + SPH_C64(0x650A73548BAF63DE), SPH_C64(0x766A0ABB3C77B2A8),
  70 + SPH_C64(0x81C2C92E47EDAEE6), SPH_C64(0x92722C851482353B),
  71 + SPH_C64(0xA2BFE8A14CF10364), SPH_C64(0xA81A664BBC423001),
  72 + SPH_C64(0xC24B8B70D0F89791), SPH_C64(0xC76C51A30654BE30),
  73 + SPH_C64(0xD192E819D6EF5218), SPH_C64(0xD69906245565A910),
  74 + SPH_C64(0xF40E35855771202A), SPH_C64(0x106AA07032BBD1B8),
  75 + SPH_C64(0x19A4C116B8D2D0C8), SPH_C64(0x1E376C085141AB53),
  76 + SPH_C64(0x2748774CDF8EEB99), SPH_C64(0x34B0BCB5E19B48A8),
  77 + SPH_C64(0x391C0CB3C5C95A63), SPH_C64(0x4ED8AA4AE3418ACB),
  78 + SPH_C64(0x5B9CCA4F7763E373), SPH_C64(0x682E6FF3D6B2B8A3),
  79 + SPH_C64(0x748F82EE5DEFB2FC), SPH_C64(0x78A5636F43172F60),
  80 + SPH_C64(0x84C87814A1F0AB72), SPH_C64(0x8CC702081A6439EC),
  81 + SPH_C64(0x90BEFFFA23631E28), SPH_C64(0xA4506CEBDE82BDE9),
  82 + SPH_C64(0xBEF9A3F7B2C67915), SPH_C64(0xC67178F2E372532B),
  83 + SPH_C64(0xCA273ECEEA26619C), SPH_C64(0xD186B8C721C0C207),
  84 + SPH_C64(0xEADA7DD6CDE0EB1E), SPH_C64(0xF57D4F7FEE6ED178),
  85 + SPH_C64(0x06F067AA72176FBA), SPH_C64(0x0A637DC5A2C898A6),
  86 + SPH_C64(0x113F9804BEF90DAE), SPH_C64(0x1B710B35131C471B),
  87 + SPH_C64(0x28DB77F523047D84), SPH_C64(0x32CAAB7B40C72493),
  88 + SPH_C64(0x3C9EBE0A15C9BEBC), SPH_C64(0x431D67C49C100D4C),
  89 + SPH_C64(0x4CC5D4BECB3E42B6), SPH_C64(0x597F299CFC657E2A),
  90 + SPH_C64(0x5FCB6FAB3AD6FAEC), SPH_C64(0x6C44198C4A475817)
  91 +};
  92 +
  93 +static const sph_u64 H384[8] = {
  94 + SPH_C64(0xCBBB9D5DC1059ED8), SPH_C64(0x629A292A367CD507),
  95 + SPH_C64(0x9159015A3070DD17), SPH_C64(0x152FECD8F70E5939),
  96 + SPH_C64(0x67332667FFC00B31), SPH_C64(0x8EB44A8768581511),
  97 + SPH_C64(0xDB0C2E0D64F98FA7), SPH_C64(0x47B5481DBEFA4FA4)
  98 +};
  99 +
  100 +static const sph_u64 H512[8] = {
  101 + SPH_C64(0x6A09E667F3BCC908), SPH_C64(0xBB67AE8584CAA73B),
  102 + SPH_C64(0x3C6EF372FE94F82B), SPH_C64(0xA54FF53A5F1D36F1),
  103 + SPH_C64(0x510E527FADE682D1), SPH_C64(0x9B05688C2B3E6C1F),
  104 + SPH_C64(0x1F83D9ABFB41BD6B), SPH_C64(0x5BE0CD19137E2179)
  105 +};
  106 +
  107 +/*
  108 + * This macro defines the body for a SHA-384 / SHA-512 compression function
  109 + * implementation. The "in" parameter should evaluate, when applied to a
  110 + * numerical input parameter from 0 to 15, to an expression which yields
  111 + * the corresponding input block. The "r" parameter should evaluate to
  112 + * an array or pointer expression designating the array of 8 words which
  113 + * contains the input and output of the compression function.
  114 + *
  115 + * SHA-512 is hard for the compiler. If the loop is completely unrolled,
  116 + * then the code will be quite huge (possibly more than 100 kB), and the
  117 + * performance will be degraded due to cache misses on the code. We
  118 + * unroll only eight steps, which avoids all needless copies when
  119 + * 64-bit registers are swapped.
  120 + */
  121 +
  122 +#define SHA3_STEP(A, B, C, D, E, F, G, H, i) do { \
  123 + sph_u64 T1, T2; \
  124 + T1 = SPH_T64(H + BSG5_1(E) + CH(E, F, G) + K512[i] + W[i]); \
  125 + T2 = SPH_T64(BSG5_0(A) + MAJ(A, B, C)); \
  126 + D = SPH_T64(D + T1); \
  127 + H = SPH_T64(T1 + T2); \
  128 + } while (0)
  129 +
  130 +#define SHA3_ROUND_BODY(in, r) do { \
  131 + int i; \
  132 + sph_u64 A, B, C, D, E, F, G, H; \
  133 + sph_u64 W[80]; \
  134 + \
  135 + for (i = 0; i < 16; i ++) \
  136 + W[i] = in(i); \
  137 + for (i = 16; i < 80; i ++) \
  138 + W[i] = SPH_T64(SSG5_1(W[i - 2]) + W[i - 7] \
  139 + + SSG5_0(W[i - 15]) + W[i - 16]); \
  140 + A = (r)[0]; \
  141 + B = (r)[1]; \
  142 + C = (r)[2]; \
  143 + D = (r)[3]; \
  144 + E = (r)[4]; \
  145 + F = (r)[5]; \
  146 + G = (r)[6]; \
  147 + H = (r)[7]; \
  148 + for (i = 0; i < 80; i += 8) { \
  149 + SHA3_STEP(A, B, C, D, E, F, G, H, i + 0); \
  150 + SHA3_STEP(H, A, B, C, D, E, F, G, i + 1); \
  151 + SHA3_STEP(G, H, A, B, C, D, E, F, i + 2); \
  152 + SHA3_STEP(F, G, H, A, B, C, D, E, i + 3); \
  153 + SHA3_STEP(E, F, G, H, A, B, C, D, i + 4); \
  154 + SHA3_STEP(D, E, F, G, H, A, B, C, i + 5); \
  155 + SHA3_STEP(C, D, E, F, G, H, A, B, i + 6); \
  156 + SHA3_STEP(B, C, D, E, F, G, H, A, i + 7); \
  157 + } \
  158 + (r)[0] = SPH_T64((r)[0] + A); \
  159 + (r)[1] = SPH_T64((r)[1] + B); \
  160 + (r)[2] = SPH_T64((r)[2] + C); \
  161 + (r)[3] = SPH_T64((r)[3] + D); \
  162 + (r)[4] = SPH_T64((r)[4] + E); \
  163 + (r)[5] = SPH_T64((r)[5] + F); \
  164 + (r)[6] = SPH_T64((r)[6] + G); \
  165 + (r)[7] = SPH_T64((r)[7] + H); \
  166 + } while (0)
  167 +
  168 +/*
  169 + * One round of SHA-384 / SHA-512. The data must be aligned for 64-bit access.
  170 + */
  171 +static void
  172 +sha3_round(const unsigned char *data, sph_u64 r[8])
  173 +{
  174 +#define SHA3_IN(x) sph_dec64be_aligned(data + (8 * (x)))
  175 + SHA3_ROUND_BODY(SHA3_IN, r);
  176 +#undef SHA3_IN
  177 +}
  178 +
  179 +/* see sph_sha3.h */
  180 +void
  181 +sph_sha384_init(void *cc)
  182 +{
  183 + sph_sha384_context *sc;
  184 +
  185 + sc = cc;
  186 + memcpy(sc->val, H384, sizeof H384);
  187 + sc->count = 0;
  188 +}
  189 +
  190 +/* see sph_sha3.h */
  191 +void
  192 +sph_sha512_init(void *cc)
  193 +{
  194 + sph_sha512_context *sc;
  195 +
  196 + sc = cc;
  197 + memcpy(sc->val, H512, sizeof H512);
  198 + sc->count = 0;
  199 +}
  200 +
  201 +#define RFUN sha3_round
  202 +#define HASH sha384
  203 +#define BE64 1
  204 +#include "sph/md_helper.c"
  205 +
  206 +/* see sph_sha3.h */
  207 +void
  208 +sph_sha384_close(void *cc, void *dst)
  209 +{
  210 + sha384_close(cc, dst, 6);
  211 + sph_sha384_init(cc);
  212 +}
  213 +
  214 +/* see sph_sha3.h */
  215 +void
  216 +sph_sha384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
  217 +{
  218 + sha384_addbits_and_close(cc, ub, n, dst, 6);
  219 + sph_sha384_init(cc);
  220 +}
  221 +
  222 +/* see sph_sha3.h */
  223 +void
  224 +sph_sha512_close(void *cc, void *dst)
  225 +{
  226 + sha384_close(cc, dst, 8);
  227 + sph_sha512_init(cc);
  228 +}
  229 +
  230 +/* see sph_sha3.h */
  231 +void
  232 +sph_sha512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
  233 +{
  234 + sha384_addbits_and_close(cc, ub, n, dst, 8);
  235 + sph_sha512_init(cc);
  236 +}
  237 +
  238 +/* see sph_sha3.h */
  239 +void
  240 +sph_sha384_comp(const sph_u64 msg[16], sph_u64 val[8])
  241 +{
  242 +#define SHA3_IN(x) msg[x]
  243 + SHA3_ROUND_BODY(SHA3_IN, val);
  244 +#undef SHA3_IN
  245 +}
  246 +
  247 +#endif
... ...
libqpdf/sph/md_helper.c 0 → 100644
  1 +/* $Id: md_helper.c 216 2010-06-08 09:46:57Z tp $ */
  2 +/*
  3 + * This file contains some functions which implement the external data
  4 + * handling and padding for Merkle-Damgard hash functions which follow
  5 + * the conventions set out by MD4 (little-endian) or SHA-1 (big-endian).
  6 + *
  7 + * API: this file is meant to be included, not compiled as a stand-alone
  8 + * file. Some macros must be defined:
  9 + * RFUN name for the round function
  10 + * HASH "short name" for the hash function
  11 + * BE32 defined for big-endian, 32-bit based (e.g. SHA-1)
  12 + * LE32 defined for little-endian, 32-bit based (e.g. MD5)
  13 + * BE64 defined for big-endian, 64-bit based (e.g. SHA-512)
  14 + * LE64 defined for little-endian, 64-bit based (no example yet)
  15 + * PW01 if defined, append 0x01 instead of 0x80 (for Tiger)
  16 + * BLEN if defined, length of a message block (in bytes)
  17 + * PLW1 if defined, length is defined on one 64-bit word only (for Tiger)
  18 + * PLW4 if defined, length is defined on four 64-bit words (for WHIRLPOOL)
  19 + * SVAL if defined, reference to the context state information
  20 + *
  21 + * BLEN is used when a message block is not 16 (32-bit or 64-bit) words:
  22 + * this is used for instance for Tiger, which works on 64-bit words but
  23 + * uses 512-bit message blocks (eight 64-bit words). PLW1 and PLW4 are
  24 + * ignored if 32-bit words are used; if 64-bit words are used and PLW1 is
  25 + * set, then only one word (64 bits) will be used to encode the input
  26 + * message length (in bits), otherwise two words will be used (as in
  27 + * SHA-384 and SHA-512). If 64-bit words are used and PLW4 is defined (but
  28 + * not PLW1), four 64-bit words will be used to encode the message length
  29 + * (in bits). Note that regardless of those settings, only 64-bit message
  30 + * lengths are supported (in bits): messages longer than 2 Exabytes will be
  31 + * improperly hashed (this is unlikely to happen soon: 2 Exabytes is about
  32 + * 2 millions Terabytes, which is huge).
  33 + *
  34 + * If CLOSE_ONLY is defined, then this file defines only the sph_XXX_close()
  35 + * function. This is used for Tiger2, which is identical to Tiger except
  36 + * when it comes to the padding (Tiger2 uses the standard 0x80 byte instead
  37 + * of the 0x01 from original Tiger).
  38 + *
  39 + * The RFUN function is invoked with two arguments, the first pointing to
  40 + * aligned data (as a "const void *"), the second being state information
  41 + * from the context structure. By default, this state information is the
  42 + * "val" field from the context, and this field is assumed to be an array
  43 + * of words ("sph_u32" or "sph_u64", depending on BE32/LE32/BE64/LE64).
  44 + * from the context structure. The "val" field can have any type, except
  45 + * for the output encoding which assumes that it is an array of "sph_u32"
  46 + * values. By defining NO_OUTPUT, this last step is deactivated; the
  47 + * includer code is then responsible for writing out the hash result. When
  48 + * NO_OUTPUT is defined, the third parameter to the "close()" function is
  49 + * ignored.
  50 + *
  51 + * ==========================(LICENSE BEGIN)============================
  52 + *
  53 + * Copyright (c) 2007-2010 Projet RNRT SAPHIR
  54 + *
  55 + * Permission is hereby granted, free of charge, to any person obtaining
  56 + * a copy of this software and associated documentation files (the
  57 + * "Software"), to deal in the Software without restriction, including
  58 + * without limitation the rights to use, copy, modify, merge, publish,
  59 + * distribute, sublicense, and/or sell copies of the Software, and to
  60 + * permit persons to whom the Software is furnished to do so, subject to
  61 + * the following conditions:
  62 + *
  63 + * The above copyright notice and this permission notice shall be
  64 + * included in all copies or substantial portions of the Software.
  65 + *
  66 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  67 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  68 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  69 + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  70 + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  71 + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  72 + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  73 + *
  74 + * ===========================(LICENSE END)=============================
  75 + *
  76 + * @author Thomas Pornin <thomas.pornin@cryptolog.com>
  77 + */
  78 +
  79 +#ifdef _MSC_VER
  80 +#pragma warning (disable: 4146)
  81 +#endif
  82 +
  83 +#undef SPH_XCAT
  84 +#define SPH_XCAT(a, b) SPH_XCAT_(a, b)
  85 +#undef SPH_XCAT_
  86 +#define SPH_XCAT_(a, b) a ## b
  87 +
  88 +#undef SPH_BLEN
  89 +#undef SPH_WLEN
  90 +#if defined BE64 || defined LE64
  91 +#define SPH_BLEN 128U
  92 +#define SPH_WLEN 8U
  93 +#else
  94 +#define SPH_BLEN 64U
  95 +#define SPH_WLEN 4U
  96 +#endif
  97 +
  98 +#ifdef BLEN
  99 +#undef SPH_BLEN
  100 +#define SPH_BLEN BLEN
  101 +#endif
  102 +
  103 +#undef SPH_MAXPAD
  104 +#if defined PLW1
  105 +#define SPH_MAXPAD (SPH_BLEN - SPH_WLEN)
  106 +#elif defined PLW4
  107 +#define SPH_MAXPAD (SPH_BLEN - (SPH_WLEN << 2))
  108 +#else
  109 +#define SPH_MAXPAD (SPH_BLEN - (SPH_WLEN << 1))
  110 +#endif
  111 +
  112 +#undef SPH_VAL
  113 +#undef SPH_NO_OUTPUT
  114 +#ifdef SVAL
  115 +#define SPH_VAL SVAL
  116 +#define SPH_NO_OUTPUT 1
  117 +#else
  118 +#define SPH_VAL sc->val
  119 +#endif
  120 +
  121 +#ifndef CLOSE_ONLY
  122 +
  123 +#ifdef SPH_UPTR
  124 +static void
  125 +SPH_XCAT(HASH, _short)(void *cc, const void *data, size_t len)
  126 +#else
  127 +void
  128 +SPH_XCAT(sph_, HASH)(void *cc, const void *data, size_t len)
  129 +#endif
  130 +{
  131 + SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc;
  132 + unsigned current;
  133 +
  134 + sc = cc;
  135 +#if SPH_64
  136 + current = (unsigned)sc->count & (SPH_BLEN - 1U);
  137 +#else
  138 + current = (unsigned)sc->count_low & (SPH_BLEN - 1U);
  139 +#endif
  140 + while (len > 0) {
  141 + unsigned clen;
  142 +#if !SPH_64
  143 + sph_u32 clow, clow2;
  144 +#endif
  145 +
  146 + clen = SPH_BLEN - current;
  147 + if (clen > len)
  148 + clen = len;
  149 + memcpy(sc->buf + current, data, clen);
  150 + data = (const unsigned char *)data + clen;
  151 + current += clen;
  152 + len -= clen;
  153 + if (current == SPH_BLEN) {
  154 + RFUN(sc->buf, SPH_VAL);
  155 + current = 0;
  156 + }
  157 +#if SPH_64
  158 + sc->count += clen;
  159 +#else
  160 + clow = sc->count_low;
  161 + clow2 = SPH_T32(clow + clen);
  162 + sc->count_low = clow2;
  163 + if (clow2 < clow)
  164 + sc->count_high ++;
  165 +#endif
  166 + }
  167 +}
  168 +
  169 +#ifdef SPH_UPTR
  170 +void
  171 +SPH_XCAT(sph_, HASH)(void *cc, const void *data, size_t len)
  172 +{
  173 + SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc;
  174 + unsigned current;
  175 + size_t orig_len;
  176 +#if !SPH_64
  177 + sph_u32 clow, clow2;
  178 +#endif
  179 +
  180 + if (len < (2 * SPH_BLEN)) {
  181 + SPH_XCAT(HASH, _short)(cc, data, len);
  182 + return;
  183 + }
  184 + sc = cc;
  185 +#if SPH_64
  186 + current = (unsigned)sc->count & (SPH_BLEN - 1U);
  187 +#else
  188 + current = (unsigned)sc->count_low & (SPH_BLEN - 1U);
  189 +#endif
  190 + if (current > 0) {
  191 + unsigned t;
  192 +
  193 + t = SPH_BLEN - current;
  194 + SPH_XCAT(HASH, _short)(cc, data, t);
  195 + data = (const unsigned char *)data + t;
  196 + len -= t;
  197 + }
  198 +#if !SPH_UNALIGNED
  199 + if (((SPH_UPTR)data & (SPH_WLEN - 1U)) != 0) {
  200 + SPH_XCAT(HASH, _short)(cc, data, len);
  201 + return;
  202 + }
  203 +#endif
  204 + orig_len = len;
  205 + while (len >= SPH_BLEN) {
  206 + RFUN(data, SPH_VAL);
  207 + len -= SPH_BLEN;
  208 + data = (const unsigned char *)data + SPH_BLEN;
  209 + }
  210 + if (len > 0)
  211 + memcpy(sc->buf, data, len);
  212 +#if SPH_64
  213 + sc->count += (sph_u64)orig_len;
  214 +#else
  215 + clow = sc->count_low;
  216 + clow2 = SPH_T32(clow + orig_len);
  217 + sc->count_low = clow2;
  218 + if (clow2 < clow)
  219 + sc->count_high ++;
  220 + /*
  221 + * This code handles the improbable situation where "size_t" is
  222 + * greater than 32 bits, and yet we do not have a 64-bit type.
  223 + */
  224 + orig_len >>= 12;
  225 + orig_len >>= 10;
  226 + orig_len >>= 10;
  227 + sc->count_high += orig_len;
  228 +#endif
  229 +}
  230 +#endif
  231 +
  232 +#endif
  233 +
  234 +/*
  235 + * Perform padding and produce result. The context is NOT reinitialized
  236 + * by this function.
  237 + */
  238 +static void
  239 +SPH_XCAT(HASH, _addbits_and_close)(void *cc,
  240 + unsigned ub, unsigned n, void *dst, unsigned rnum)
  241 +{
  242 + SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc;
  243 + unsigned current, u;
  244 +#if !SPH_64
  245 + sph_u32 low, high;
  246 +#endif
  247 +
  248 + sc = cc;
  249 +#if SPH_64
  250 + current = (unsigned)sc->count & (SPH_BLEN - 1U);
  251 +#else
  252 + current = (unsigned)sc->count_low & (SPH_BLEN - 1U);
  253 +#endif
  254 +#ifdef PW01
  255 + sc->buf[current ++] = (0x100 | (ub & 0xFF)) >> (8 - n);
  256 +#else
  257 + {
  258 + unsigned z;
  259 +
  260 + z = 0x80 >> n;
  261 + sc->buf[current ++] = ((ub & -z) | z) & 0xFF;
  262 + }
  263 +#endif
  264 + if (current > SPH_MAXPAD) {
  265 + memset(sc->buf + current, 0, SPH_BLEN - current);
  266 + RFUN(sc->buf, SPH_VAL);
  267 + memset(sc->buf, 0, SPH_MAXPAD);
  268 + } else {
  269 + memset(sc->buf + current, 0, SPH_MAXPAD - current);
  270 + }
  271 +#if defined BE64
  272 +#if defined PLW1
  273 + sph_enc64be_aligned(sc->buf + SPH_MAXPAD,
  274 + SPH_T64(sc->count << 3) + (sph_u64)n);
  275 +#elif defined PLW4
  276 + memset(sc->buf + SPH_MAXPAD, 0, 2 * SPH_WLEN);
  277 + sph_enc64be_aligned(sc->buf + SPH_MAXPAD + 2 * SPH_WLEN,
  278 + sc->count >> 61);
  279 + sph_enc64be_aligned(sc->buf + SPH_MAXPAD + 3 * SPH_WLEN,
  280 + SPH_T64(sc->count << 3) + (sph_u64)n);
  281 +#else
  282 + sph_enc64be_aligned(sc->buf + SPH_MAXPAD, sc->count >> 61);
  283 + sph_enc64be_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN,
  284 + SPH_T64(sc->count << 3) + (sph_u64)n);
  285 +#endif
  286 +#elif defined LE64
  287 +#if defined PLW1
  288 + sph_enc64le_aligned(sc->buf + SPH_MAXPAD,
  289 + SPH_T64(sc->count << 3) + (sph_u64)n);
  290 +#elif defined PLW1
  291 + sph_enc64le_aligned(sc->buf + SPH_MAXPAD,
  292 + SPH_T64(sc->count << 3) + (sph_u64)n);
  293 + sph_enc64le_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN, sc->count >> 61);
  294 + memset(sc->buf + SPH_MAXPAD + 2 * SPH_WLEN, 0, 2 * SPH_WLEN);
  295 +#else
  296 + sph_enc64le_aligned(sc->buf + SPH_MAXPAD,
  297 + SPH_T64(sc->count << 3) + (sph_u64)n);
  298 + sph_enc64le_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN, sc->count >> 61);
  299 +#endif
  300 +#else
  301 +#if SPH_64
  302 +#ifdef BE32
  303 + sph_enc64be_aligned(sc->buf + SPH_MAXPAD,
  304 + SPH_T64(sc->count << 3) + (sph_u64)n);
  305 +#else
  306 + sph_enc64le_aligned(sc->buf + SPH_MAXPAD,
  307 + SPH_T64(sc->count << 3) + (sph_u64)n);
  308 +#endif
  309 +#else
  310 + low = sc->count_low;
  311 + high = SPH_T32((sc->count_high << 3) | (low >> 29));
  312 + low = SPH_T32(low << 3) + (sph_u32)n;
  313 +#ifdef BE32
  314 + sph_enc32be(sc->buf + SPH_MAXPAD, high);
  315 + sph_enc32be(sc->buf + SPH_MAXPAD + SPH_WLEN, low);
  316 +#else
  317 + sph_enc32le(sc->buf + SPH_MAXPAD, low);
  318 + sph_enc32le(sc->buf + SPH_MAXPAD + SPH_WLEN, high);
  319 +#endif
  320 +#endif
  321 +#endif
  322 + RFUN(sc->buf, SPH_VAL);
  323 +#ifdef SPH_NO_OUTPUT
  324 + (void)dst;
  325 + (void)rnum;
  326 + (void)u;
  327 +#else
  328 + for (u = 0; u < rnum; u ++) {
  329 +#if defined BE64
  330 + sph_enc64be((unsigned char *)dst + 8 * u, sc->val[u]);
  331 +#elif defined LE64
  332 + sph_enc64le((unsigned char *)dst + 8 * u, sc->val[u]);
  333 +#elif defined BE32
  334 + sph_enc32be((unsigned char *)dst + 4 * u, sc->val[u]);
  335 +#else
  336 + sph_enc32le((unsigned char *)dst + 4 * u, sc->val[u]);
  337 +#endif
  338 + }
  339 +#endif
  340 +}
  341 +
  342 +static void
  343 +SPH_XCAT(HASH, _close)(void *cc, void *dst, unsigned rnum)
  344 +{
  345 + SPH_XCAT(HASH, _addbits_and_close)(cc, 0, 0, dst, rnum);
  346 +}
... ...
libqpdf/sph/sph_sha2.h 0 → 100644
  1 +/* $Id: sph_sha2.h 216 2010-06-08 09:46:57Z tp $ */
  2 +/**
  3 + * SHA-224, SHA-256, SHA-384 and SHA-512 interface.
  4 + *
  5 + * SHA-256 has been published in FIPS 180-2, now amended with a change
  6 + * notice to include SHA-224 as well (which is a simple variation on
  7 + * SHA-256). SHA-384 and SHA-512 are also defined in FIPS 180-2. FIPS
  8 + * standards can be found at:
  9 + * http://csrc.nist.gov/publications/fips/
  10 + *
  11 + * ==========================(LICENSE BEGIN)============================
  12 + *
  13 + * Copyright (c) 2007-2010 Projet RNRT SAPHIR
  14 + *
  15 + * Permission is hereby granted, free of charge, to any person obtaining
  16 + * a copy of this software and associated documentation files (the
  17 + * "Software"), to deal in the Software without restriction, including
  18 + * without limitation the rights to use, copy, modify, merge, publish,
  19 + * distribute, sublicense, and/or sell copies of the Software, and to
  20 + * permit persons to whom the Software is furnished to do so, subject to
  21 + * the following conditions:
  22 + *
  23 + * The above copyright notice and this permission notice shall be
  24 + * included in all copies or substantial portions of the Software.
  25 + *
  26 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  27 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  28 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  29 + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  30 + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  31 + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  32 + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  33 + *
  34 + * ===========================(LICENSE END)=============================
  35 + *
  36 + * @file sph_sha2.h
  37 + * @author Thomas Pornin <thomas.pornin@cryptolog.com>
  38 + */
  39 +
  40 +#ifndef SPH_SHA2_H__
  41 +#define SPH_SHA2_H__
  42 +
  43 +#include <stddef.h>
  44 +#include "sph_types.h"
  45 +
  46 +/**
  47 + * Output size (in bits) for SHA-224.
  48 + */
  49 +#define SPH_SIZE_sha224 224
  50 +
  51 +/**
  52 + * Output size (in bits) for SHA-256.
  53 + */
  54 +#define SPH_SIZE_sha256 256
  55 +
  56 +/**
  57 + * This structure is a context for SHA-224 computations: it contains the
  58 + * intermediate values and some data from the last entered block. Once
  59 + * a SHA-224 computation has been performed, the context can be reused for
  60 + * another computation.
  61 + *
  62 + * The contents of this structure are private. A running SHA-224 computation
  63 + * can be cloned by copying the context (e.g. with a simple
  64 + * <code>memcpy()</code>).
  65 + */
  66 +typedef struct {
  67 +#ifndef DOXYGEN_IGNORE
  68 + unsigned char buf[64]; /* first field, for alignment */
  69 + sph_u32 val[8];
  70 +#if SPH_64
  71 + sph_u64 count;
  72 +#else
  73 + sph_u32 count_high, count_low;
  74 +#endif
  75 +#endif
  76 +} sph_sha224_context;
  77 +
  78 +/**
  79 + * This structure is a context for SHA-256 computations. It is identical
  80 + * to the SHA-224 context. However, a context is initialized for SHA-224
  81 + * <strong>or</strong> SHA-256, but not both (the internal IV is not the
  82 + * same).
  83 + */
  84 +typedef sph_sha224_context sph_sha256_context;
  85 +
  86 +/**
  87 + * Initialize a SHA-224 context. This process performs no memory allocation.
  88 + *
  89 + * @param cc the SHA-224 context (pointer to
  90 + * a <code>sph_sha224_context</code>)
  91 + */
  92 +void sph_sha224_init(void *cc);
  93 +
  94 +/**
  95 + * Process some data bytes. It is acceptable that <code>len</code> is zero
  96 + * (in which case this function does nothing).
  97 + *
  98 + * @param cc the SHA-224 context
  99 + * @param data the input data
  100 + * @param len the input data length (in bytes)
  101 + */
  102 +void sph_sha224(void *cc, const void *data, size_t len);
  103 +
  104 +/**
  105 + * Terminate the current SHA-224 computation and output the result into the
  106 + * provided buffer. The destination buffer must be wide enough to
  107 + * accomodate the result (28 bytes). The context is automatically
  108 + * reinitialized.
  109 + *
  110 + * @param cc the SHA-224 context
  111 + * @param dst the destination buffer
  112 + */
  113 +void sph_sha224_close(void *cc, void *dst);
  114 +
  115 +/**
  116 + * Add a few additional bits (0 to 7) to the current computation, then
  117 + * terminate it and output the result in the provided buffer, which must
  118 + * be wide enough to accomodate the result (28 bytes). If bit number i
  119 + * in <code>ub</code> has value 2^i, then the extra bits are those
  120 + * numbered 7 downto 8-n (this is the big-endian convention at the byte
  121 + * level). The context is automatically reinitialized.
  122 + *
  123 + * @param cc the SHA-224 context
  124 + * @param ub the extra bits
  125 + * @param n the number of extra bits (0 to 7)
  126 + * @param dst the destination buffer
  127 + */
  128 +void sph_sha224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst);
  129 +
  130 +/**
  131 + * Apply the SHA-224 compression function on the provided data. The
  132 + * <code>msg</code> parameter contains the 16 32-bit input blocks,
  133 + * as numerical values (hence after the big-endian decoding). The
  134 + * <code>val</code> parameter contains the 8 32-bit input blocks for
  135 + * the compression function; the output is written in place in this
  136 + * array.
  137 + *
  138 + * @param msg the message block (16 values)
  139 + * @param val the function 256-bit input and output
  140 + */
  141 +void sph_sha224_comp(const sph_u32 msg[16], sph_u32 val[8]);
  142 +
  143 +/**
  144 + * Initialize a SHA-256 context. This process performs no memory allocation.
  145 + *
  146 + * @param cc the SHA-256 context (pointer to
  147 + * a <code>sph_sha256_context</code>)
  148 + */
  149 +void sph_sha256_init(void *cc);
  150 +
  151 +#ifdef DOXYGEN_IGNORE
  152 +/**
  153 + * Process some data bytes, for SHA-256. This function is identical to
  154 + * <code>sha_224()</code>
  155 + *
  156 + * @param cc the SHA-224 context
  157 + * @param data the input data
  158 + * @param len the input data length (in bytes)
  159 + */
  160 +void sph_sha256(void *cc, const void *data, size_t len);
  161 +#endif
  162 +
  163 +#ifndef DOXYGEN_IGNORE
  164 +#define sph_sha256 sph_sha224
  165 +#endif
  166 +
  167 +/**
  168 + * Terminate the current SHA-256 computation and output the result into the
  169 + * provided buffer. The destination buffer must be wide enough to
  170 + * accomodate the result (32 bytes). The context is automatically
  171 + * reinitialized.
  172 + *
  173 + * @param cc the SHA-256 context
  174 + * @param dst the destination buffer
  175 + */
  176 +void sph_sha256_close(void *cc, void *dst);
  177 +
  178 +/**
  179 + * Add a few additional bits (0 to 7) to the current computation, then
  180 + * terminate it and output the result in the provided buffer, which must
  181 + * be wide enough to accomodate the result (32 bytes). If bit number i
  182 + * in <code>ub</code> has value 2^i, then the extra bits are those
  183 + * numbered 7 downto 8-n (this is the big-endian convention at the byte
  184 + * level). The context is automatically reinitialized.
  185 + *
  186 + * @param cc the SHA-256 context
  187 + * @param ub the extra bits
  188 + * @param n the number of extra bits (0 to 7)
  189 + * @param dst the destination buffer
  190 + */
  191 +void sph_sha256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst);
  192 +
  193 +#ifdef DOXYGEN_IGNORE
  194 +/**
  195 + * Apply the SHA-256 compression function on the provided data. This
  196 + * function is identical to <code>sha224_comp()</code>.
  197 + *
  198 + * @param msg the message block (16 values)
  199 + * @param val the function 256-bit input and output
  200 + */
  201 +void sph_sha256_comp(const sph_u32 msg[16], sph_u32 val[8]);
  202 +#endif
  203 +
  204 +#ifndef DOXYGEN_IGNORE
  205 +#define sph_sha256_comp sph_sha224_comp
  206 +#endif
  207 +
  208 +#if SPH_64
  209 +
  210 +/**
  211 + * Output size (in bits) for SHA-384.
  212 + */
  213 +#define SPH_SIZE_sha384 384
  214 +
  215 +/**
  216 + * Output size (in bits) for SHA-512.
  217 + */
  218 +#define SPH_SIZE_sha512 512
  219 +
  220 +/**
  221 + * This structure is a context for SHA-384 computations: it contains the
  222 + * intermediate values and some data from the last entered block. Once
  223 + * a SHA-384 computation has been performed, the context can be reused for
  224 + * another computation.
  225 + *
  226 + * The contents of this structure are private. A running SHA-384 computation
  227 + * can be cloned by copying the context (e.g. with a simple
  228 + * <code>memcpy()</code>).
  229 + */
  230 +typedef struct {
  231 +#ifndef DOXYGEN_IGNORE
  232 + unsigned char buf[128]; /* first field, for alignment */
  233 + sph_u64 val[8];
  234 + sph_u64 count;
  235 +#endif
  236 +} sph_sha384_context;
  237 +
  238 +/**
  239 + * Initialize a SHA-384 context. This process performs no memory allocation.
  240 + *
  241 + * @param cc the SHA-384 context (pointer to
  242 + * a <code>sph_sha384_context</code>)
  243 + */
  244 +void sph_sha384_init(void *cc);
  245 +
  246 +/**
  247 + * Process some data bytes. It is acceptable that <code>len</code> is zero
  248 + * (in which case this function does nothing).
  249 + *
  250 + * @param cc the SHA-384 context
  251 + * @param data the input data
  252 + * @param len the input data length (in bytes)
  253 + */
  254 +void sph_sha384(void *cc, const void *data, size_t len);
  255 +
  256 +/**
  257 + * Terminate the current SHA-384 computation and output the result into the
  258 + * provided buffer. The destination buffer must be wide enough to
  259 + * accomodate the result (48 bytes). The context is automatically
  260 + * reinitialized.
  261 + *
  262 + * @param cc the SHA-384 context
  263 + * @param dst the destination buffer
  264 + */
  265 +void sph_sha384_close(void *cc, void *dst);
  266 +
  267 +/**
  268 + * Add a few additional bits (0 to 7) to the current computation, then
  269 + * terminate it and output the result in the provided buffer, which must
  270 + * be wide enough to accomodate the result (48 bytes). If bit number i
  271 + * in <code>ub</code> has value 2^i, then the extra bits are those
  272 + * numbered 7 downto 8-n (this is the big-endian convention at the byte
  273 + * level). The context is automatically reinitialized.
  274 + *
  275 + * @param cc the SHA-384 context
  276 + * @param ub the extra bits
  277 + * @param n the number of extra bits (0 to 7)
  278 + * @param dst the destination buffer
  279 + */
  280 +void sph_sha384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst);
  281 +
  282 +/**
  283 + * Apply the SHA-384 compression function on the provided data. The
  284 + * <code>msg</code> parameter contains the 16 64-bit input blocks,
  285 + * as numerical values (hence after the big-endian decoding). The
  286 + * <code>val</code> parameter contains the 8 64-bit input blocks for
  287 + * the compression function; the output is written in place in this
  288 + * array.
  289 + *
  290 + * @param msg the message block (16 values)
  291 + * @param val the function 512-bit input and output
  292 + */
  293 +void sph_sha384_comp(const sph_u64 msg[16], sph_u64 val[8]);
  294 +
  295 +/**
  296 + * This structure is a context for SHA-512 computations. It is identical
  297 + * to the SHA-384 context. However, a context is initialized for SHA-384
  298 + * <strong>or</strong> SHA-512, but not both (the internal IV is not the
  299 + * same).
  300 + */
  301 +typedef sph_sha384_context sph_sha512_context;
  302 +
  303 +/**
  304 + * Initialize a SHA-512 context. This process performs no memory allocation.
  305 + *
  306 + * @param cc the SHA-512 context (pointer to
  307 + * a <code>sph_sha512_context</code>)
  308 + */
  309 +void sph_sha512_init(void *cc);
  310 +
  311 +#ifdef DOXYGEN_IGNORE
  312 +/**
  313 + * Process some data bytes, for SHA-512. This function is identical to
  314 + * <code>sph_sha384()</code>.
  315 + *
  316 + * @param cc the SHA-384 context
  317 + * @param data the input data
  318 + * @param len the input data length (in bytes)
  319 + */
  320 +void sph_sha512(void *cc, const void *data, size_t len);
  321 +#endif
  322 +
  323 +#ifndef DOXYGEN_IGNORE
  324 +#define sph_sha512 sph_sha384
  325 +#endif
  326 +
  327 +/**
  328 + * Terminate the current SHA-512 computation and output the result into the
  329 + * provided buffer. The destination buffer must be wide enough to
  330 + * accomodate the result (64 bytes). The context is automatically
  331 + * reinitialized.
  332 + *
  333 + * @param cc the SHA-512 context
  334 + * @param dst the destination buffer
  335 + */
  336 +void sph_sha512_close(void *cc, void *dst);
  337 +
  338 +/**
  339 + * Add a few additional bits (0 to 7) to the current computation, then
  340 + * terminate it and output the result in the provided buffer, which must
  341 + * be wide enough to accomodate the result (64 bytes). If bit number i
  342 + * in <code>ub</code> has value 2^i, then the extra bits are those
  343 + * numbered 7 downto 8-n (this is the big-endian convention at the byte
  344 + * level). The context is automatically reinitialized.
  345 + *
  346 + * @param cc the SHA-512 context
  347 + * @param ub the extra bits
  348 + * @param n the number of extra bits (0 to 7)
  349 + * @param dst the destination buffer
  350 + */
  351 +void sph_sha512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst);
  352 +
  353 +#ifdef DOXYGEN_IGNORE
  354 +/**
  355 + * Apply the SHA-512 compression function. This function is identical to
  356 + * <code>sph_sha384_comp()</code>.
  357 + *
  358 + * @param msg the message block (16 values)
  359 + * @param val the function 512-bit input and output
  360 + */
  361 +void sph_sha512_comp(const sph_u64 msg[16], sph_u64 val[8]);
  362 +#endif
  363 +
  364 +#ifndef DOXYGEN_IGNORE
  365 +#define sph_sha512_comp sph_sha384_comp
  366 +#endif
  367 +
  368 +#endif
  369 +
  370 +#endif
... ...
libqpdf/sph/sph_types.h 0 → 100644
  1 +/* $Id: sph_types.h 260 2011-07-21 01:02:38Z tp $ */
  2 +/**
  3 + * Basic type definitions.
  4 + *
  5 + * This header file defines the generic integer types that will be used
  6 + * for the implementation of hash functions; it also contains helper
  7 + * functions which encode and decode multi-byte integer values, using
  8 + * either little-endian or big-endian conventions.
  9 + *
  10 + * This file contains a compile-time test on the size of a byte
  11 + * (the <code>unsigned char</code> C type). If bytes are not octets,
  12 + * i.e. if they do not have a size of exactly 8 bits, then compilation
  13 + * is aborted. Architectures where bytes are not octets are relatively
  14 + * rare, even in the embedded devices market. We forbid non-octet bytes
  15 + * because there is no clear convention on how octet streams are encoded
  16 + * on such systems.
  17 + *
  18 + * ==========================(LICENSE BEGIN)============================
  19 + *
  20 + * Copyright (c) 2007-2010 Projet RNRT SAPHIR
  21 + *
  22 + * Permission is hereby granted, free of charge, to any person obtaining
  23 + * a copy of this software and associated documentation files (the
  24 + * "Software"), to deal in the Software without restriction, including
  25 + * without limitation the rights to use, copy, modify, merge, publish,
  26 + * distribute, sublicense, and/or sell copies of the Software, and to
  27 + * permit persons to whom the Software is furnished to do so, subject to
  28 + * the following conditions:
  29 + *
  30 + * The above copyright notice and this permission notice shall be
  31 + * included in all copies or substantial portions of the Software.
  32 + *
  33 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  34 + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  35 + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  36 + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
  37 + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  38 + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  39 + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  40 + *
  41 + * ===========================(LICENSE END)=============================
  42 + *
  43 + * @file sph_types.h
  44 + * @author Thomas Pornin <thomas.pornin@cryptolog.com>
  45 + */
  46 +
  47 +#ifndef SPH_TYPES_H__
  48 +#define SPH_TYPES_H__
  49 +
  50 +#include <limits.h>
  51 +
  52 +/*
  53 + * All our I/O functions are defined over octet streams. We do not know
  54 + * how to handle input data if bytes are not octets.
  55 + */
  56 +#if CHAR_BIT != 8
  57 +#error This code requires 8-bit bytes
  58 +#endif
  59 +
  60 +/* ============= BEGIN documentation block for Doxygen ============ */
  61 +
  62 +#ifdef DOXYGEN_IGNORE
  63 +
  64 +/** @mainpage sphlib C code documentation
  65 + *
  66 + * @section overview Overview
  67 + *
  68 + * <code>sphlib</code> is a library which contains implementations of
  69 + * various cryptographic hash functions. These pages have been generated
  70 + * with <a href="http://www.doxygen.org/index.html">doxygen</a> and
  71 + * document the API for the C implementations.
  72 + *
  73 + * The API is described in appropriate header files, which are available
  74 + * in the "Files" section. Each hash function family has its own header,
  75 + * whose name begins with <code>"sph_"</code> and contains the family
  76 + * name. For instance, the API for the RIPEMD hash functions is available
  77 + * in the header file <code>sph_ripemd.h</code>.
  78 + *
  79 + * @section principles API structure and conventions
  80 + *
  81 + * @subsection io Input/output conventions
  82 + *
  83 + * In all generality, hash functions operate over strings of bits.
  84 + * Individual bits are rarely encountered in C programming or actual
  85 + * communication protocols; most protocols converge on the ubiquitous
  86 + * "octet" which is a group of eight bits. Data is thus expressed as a
  87 + * stream of octets. The C programming language contains the notion of a
  88 + * "byte", which is a data unit managed under the type <code>"unsigned
  89 + * char"</code>. The C standard prescribes that a byte should hold at
  90 + * least eight bits, but possibly more. Most modern architectures, even
  91 + * in the embedded world, feature eight-bit bytes, i.e. map bytes to
  92 + * octets.
  93 + *
  94 + * Nevertheless, for some of the implemented hash functions, an extra
  95 + * API has been added, which allows the input of arbitrary sequences of
  96 + * bits: when the computation is about to be closed, 1 to 7 extra bits
  97 + * can be added. The functions for which this API is implemented include
  98 + * the SHA-2 functions and all SHA-3 candidates.
  99 + *
  100 + * <code>sphlib</code> defines hash function which may hash octet streams,
  101 + * i.e. streams of bits where the number of bits is a multiple of eight.
  102 + * The data input functions in the <code>sphlib</code> API expect data
  103 + * as anonymous pointers (<code>"const void *"</code>) with a length
  104 + * (of type <code>"size_t"</code>) which gives the input data chunk length
  105 + * in bytes. A byte is assumed to be an octet; the <code>sph_types.h</code>
  106 + * header contains a compile-time test which prevents compilation on
  107 + * architectures where this property is not met.
  108 + *
  109 + * The hash function output is also converted into bytes. All currently
  110 + * implemented hash functions have an output width which is a multiple of
  111 + * eight, and this is likely to remain true for new designs.
  112 + *
  113 + * Most hash functions internally convert input data into 32-bit of 64-bit
  114 + * words, using either little-endian or big-endian conversion. The hash
  115 + * output also often consists of such words, which are encoded into output
  116 + * bytes with a similar endianness convention. Some hash functions have
  117 + * been only loosely specified on that subject; when necessary,
  118 + * <code>sphlib</code> has been tested against published "reference"
  119 + * implementations in order to use the same conventions.
  120 + *
  121 + * @subsection shortname Function short name
  122 + *
  123 + * Each implemented hash function has a "short name" which is used
  124 + * internally to derive the identifiers for the functions and context
  125 + * structures which the function uses. For instance, MD5 has the short
  126 + * name <code>"md5"</code>. Short names are listed in the next section,
  127 + * for the implemented hash functions. In subsequent sections, the
  128 + * short name will be assumed to be <code>"XXX"</code>: replace with the
  129 + * actual hash function name to get the C identifier.
  130 + *
  131 + * Note: some functions within the same family share the same core
  132 + * elements, such as update function or context structure. Correspondingly,
  133 + * some of the defined types or functions may actually be macros which
  134 + * transparently evaluate to another type or function name.
  135 + *
  136 + * @subsection context Context structure
  137 + *
  138 + * Each implemented hash fonction has its own context structure, available
  139 + * under the type name <code>"sph_XXX_context"</code> for the hash function
  140 + * with short name <code>"XXX"</code>. This structure holds all needed
  141 + * state for a running hash computation.
  142 + *
  143 + * The contents of these structures are meant to be opaque, and private
  144 + * to the implementation. However, these contents are specified in the
  145 + * header files so that application code which uses <code>sphlib</code>
  146 + * may access the size of those structures.
  147 + *
  148 + * The caller is responsible for allocating the context structure,
  149 + * whether by dynamic allocation (<code>malloc()</code> or equivalent),
  150 + * static allocation (a global permanent variable), as an automatic
  151 + * variable ("on the stack"), or by any other mean which ensures proper
  152 + * structure alignment. <code>sphlib</code> code performs no dynamic
  153 + * allocation by itself.
  154 + *
  155 + * The context must be initialized before use, using the
  156 + * <code>sph_XXX_init()</code> function. This function sets the context
  157 + * state to proper initial values for hashing.
  158 + *
  159 + * Since all state data is contained within the context structure,
  160 + * <code>sphlib</code> is thread-safe and reentrant: several hash
  161 + * computations may be performed in parallel, provided that they do not
  162 + * operate on the same context. Moreover, a running computation can be
  163 + * cloned by copying the context (with a simple <code>memcpy()</code>):
  164 + * the context and its clone are then independant and may be updated
  165 + * with new data and/or closed without interfering with each other.
  166 + * Similarly, a context structure can be moved in memory at will:
  167 + * context structures contain no pointer, in particular no pointer to
  168 + * themselves.
  169 + *
  170 + * @subsection dataio Data input
  171 + *
  172 + * Hashed data is input with the <code>sph_XXX()</code> fonction, which
  173 + * takes as parameters a pointer to the context, a pointer to the data
  174 + * to hash, and the number of data bytes to hash. The context is updated
  175 + * with the new data.
  176 + *
  177 + * Data can be input in one or several calls, with arbitrary input lengths.
  178 + * However, it is best, performance wise, to input data by relatively big
  179 + * chunks (say a few kilobytes), because this allows <code>sphlib</code> to
  180 + * optimize things and avoid internal copying.
  181 + *
  182 + * When all data has been input, the context can be closed with
  183 + * <code>sph_XXX_close()</code>. The hash output is computed and written
  184 + * into the provided buffer. The caller must take care to provide a
  185 + * buffer of appropriate length; e.g., when using SHA-1, the output is
  186 + * a 20-byte word, therefore the output buffer must be at least 20-byte
  187 + * long.
  188 + *
  189 + * For some hash functions, the <code>sph_XXX_addbits_and_close()</code>
  190 + * function can be used instead of <code>sph_XXX_close()</code>. This
  191 + * function can take a few extra <strong>bits</strong> to be added at
  192 + * the end of the input message. This allows hashing messages with a
  193 + * bit length which is not a multiple of 8. The extra bits are provided
  194 + * as an unsigned integer value, and a bit count. The bit count must be
  195 + * between 0 and 7, inclusive. The extra bits are provided as bits 7 to
  196 + * 0 (bits of numerical value 128, 64, 32... downto 0), in that order.
  197 + * For instance, to add three bits of value 1, 1 and 0, the unsigned
  198 + * integer will have value 192 (1*128 + 1*64 + 0*32) and the bit count
  199 + * will be 3.
  200 + *
  201 + * The <code>SPH_SIZE_XXX</code> macro is defined for each hash function;
  202 + * it evaluates to the function output size, expressed in bits. For instance,
  203 + * <code>SPH_SIZE_sha1</code> evaluates to <code>160</code>.
  204 + *
  205 + * When closed, the context is automatically reinitialized and can be
  206 + * immediately used for another computation. It is not necessary to call
  207 + * <code>sph_XXX_init()</code> after a close. Note that
  208 + * <code>sph_XXX_init()</code> can still be called to "reset" a context,
  209 + * i.e. forget previously input data, and get back to the initial state.
  210 + *
  211 + * @subsection alignment Data alignment
  212 + *
  213 + * "Alignment" is a property of data, which is said to be "properly
  214 + * aligned" when its emplacement in memory is such that the data can
  215 + * be optimally read by full words. This depends on the type of access;
  216 + * basically, some hash functions will read data by 32-bit or 64-bit
  217 + * words. <code>sphlib</code> does not mandate such alignment for input
  218 + * data, but using aligned data can substantially improve performance.
  219 + *
  220 + * As a rule, it is best to input data by chunks whose length (in bytes)
  221 + * is a multiple of eight, and which begins at "generally aligned"
  222 + * addresses, such as the base address returned by a call to
  223 + * <code>malloc()</code>.
  224 + *
  225 + * @section functions Implemented functions
  226 + *
  227 + * We give here the list of implemented functions. They are grouped by
  228 + * family; to each family corresponds a specific header file. Each
  229 + * individual function has its associated "short name". Please refer to
  230 + * the documentation for that header file to get details on the hash
  231 + * function denomination and provenance.
  232 + *
  233 + * Note: the functions marked with a '(64)' in the list below are
  234 + * available only if the C compiler provides an integer type of length
  235 + * 64 bits or more. Such a type is mandatory in the latest C standard
  236 + * (ISO 9899:1999, aka "C99") and is present in several older compilers
  237 + * as well, so chances are that such a type is available.
  238 + *
  239 + * - HAVAL family: file <code>sph_haval.h</code>
  240 + * - HAVAL-128/3 (128-bit, 3 passes): short name: <code>haval128_3</code>
  241 + * - HAVAL-128/4 (128-bit, 4 passes): short name: <code>haval128_4</code>
  242 + * - HAVAL-128/5 (128-bit, 5 passes): short name: <code>haval128_5</code>
  243 + * - HAVAL-160/3 (160-bit, 3 passes): short name: <code>haval160_3</code>
  244 + * - HAVAL-160/4 (160-bit, 4 passes): short name: <code>haval160_4</code>
  245 + * - HAVAL-160/5 (160-bit, 5 passes): short name: <code>haval160_5</code>
  246 + * - HAVAL-192/3 (192-bit, 3 passes): short name: <code>haval192_3</code>
  247 + * - HAVAL-192/4 (192-bit, 4 passes): short name: <code>haval192_4</code>
  248 + * - HAVAL-192/5 (192-bit, 5 passes): short name: <code>haval192_5</code>
  249 + * - HAVAL-224/3 (224-bit, 3 passes): short name: <code>haval224_3</code>
  250 + * - HAVAL-224/4 (224-bit, 4 passes): short name: <code>haval224_4</code>
  251 + * - HAVAL-224/5 (224-bit, 5 passes): short name: <code>haval224_5</code>
  252 + * - HAVAL-256/3 (256-bit, 3 passes): short name: <code>haval256_3</code>
  253 + * - HAVAL-256/4 (256-bit, 4 passes): short name: <code>haval256_4</code>
  254 + * - HAVAL-256/5 (256-bit, 5 passes): short name: <code>haval256_5</code>
  255 + * - MD2: file <code>sph_md2.h</code>, short name: <code>md2</code>
  256 + * - MD4: file <code>sph_md4.h</code>, short name: <code>md4</code>
  257 + * - MD5: file <code>sph_md5.h</code>, short name: <code>md5</code>
  258 + * - PANAMA: file <code>sph_panama.h</code>, short name: <code>panama</code>
  259 + * - RadioGatun family: file <code>sph_radiogatun.h</code>
  260 + * - RadioGatun[32]: short name: <code>radiogatun32</code>
  261 + * - RadioGatun[64]: short name: <code>radiogatun64</code> (64)
  262 + * - RIPEMD family: file <code>sph_ripemd.h</code>
  263 + * - RIPEMD: short name: <code>ripemd</code>
  264 + * - RIPEMD-128: short name: <code>ripemd128</code>
  265 + * - RIPEMD-160: short name: <code>ripemd160</code>
  266 + * - SHA-0: file <code>sph_sha0.h</code>, short name: <code>sha0</code>
  267 + * - SHA-1: file <code>sph_sha1.h</code>, short name: <code>sha1</code>
  268 + * - SHA-2 family, 32-bit hashes: file <code>sph_sha2.h</code>
  269 + * - SHA-224: short name: <code>sha224</code>
  270 + * - SHA-256: short name: <code>sha256</code>
  271 + * - SHA-384: short name: <code>sha384</code> (64)
  272 + * - SHA-512: short name: <code>sha512</code> (64)
  273 + * - Tiger family: file <code>sph_tiger.h</code>
  274 + * - Tiger: short name: <code>tiger</code> (64)
  275 + * - Tiger2: short name: <code>tiger2</code> (64)
  276 + * - WHIRLPOOL family: file <code>sph_whirlpool.h</code>
  277 + * - WHIRLPOOL-0: short name: <code>whirlpool0</code> (64)
  278 + * - WHIRLPOOL-1: short name: <code>whirlpool1</code> (64)
  279 + * - WHIRLPOOL: short name: <code>whirlpool</code> (64)
  280 + *
  281 + * The fourteen second-round SHA-3 candidates are also implemented;
  282 + * when applicable, the implementations follow the "final" specifications
  283 + * as published for the third round of the SHA-3 competition (BLAKE,
  284 + * Groestl, JH, Keccak and Skein have been tweaked for third round).
  285 + *
  286 + * - BLAKE family: file <code>sph_blake.h</code>
  287 + * - BLAKE-224: short name: <code>blake224</code>
  288 + * - BLAKE-256: short name: <code>blake256</code>
  289 + * - BLAKE-384: short name: <code>blake384</code>
  290 + * - BLAKE-512: short name: <code>blake512</code>
  291 + * - BMW (Blue Midnight Wish) family: file <code>sph_bmw.h</code>
  292 + * - BMW-224: short name: <code>bmw224</code>
  293 + * - BMW-256: short name: <code>bmw256</code>
  294 + * - BMW-384: short name: <code>bmw384</code> (64)
  295 + * - BMW-512: short name: <code>bmw512</code> (64)
  296 + * - CubeHash family: file <code>sph_cubehash.h</code> (specified as
  297 + * CubeHash16/32 in the CubeHash specification)
  298 + * - CubeHash-224: short name: <code>cubehash224</code>
  299 + * - CubeHash-256: short name: <code>cubehash256</code>
  300 + * - CubeHash-384: short name: <code>cubehash384</code>
  301 + * - CubeHash-512: short name: <code>cubehash512</code>
  302 + * - ECHO family: file <code>sph_echo.h</code>
  303 + * - ECHO-224: short name: <code>echo224</code>
  304 + * - ECHO-256: short name: <code>echo256</code>
  305 + * - ECHO-384: short name: <code>echo384</code>
  306 + * - ECHO-512: short name: <code>echo512</code>
  307 + * - Fugue family: file <code>sph_fugue.h</code>
  308 + * - Fugue-224: short name: <code>fugue224</code>
  309 + * - Fugue-256: short name: <code>fugue256</code>
  310 + * - Fugue-384: short name: <code>fugue384</code>
  311 + * - Fugue-512: short name: <code>fugue512</code>
  312 + * - Groestl family: file <code>sph_groestl.h</code>
  313 + * - Groestl-224: short name: <code>groestl224</code>
  314 + * - Groestl-256: short name: <code>groestl256</code>
  315 + * - Groestl-384: short name: <code>groestl384</code>
  316 + * - Groestl-512: short name: <code>groestl512</code>
  317 + * - Hamsi family: file <code>sph_hamsi.h</code>
  318 + * - Hamsi-224: short name: <code>hamsi224</code>
  319 + * - Hamsi-256: short name: <code>hamsi256</code>
  320 + * - Hamsi-384: short name: <code>hamsi384</code>
  321 + * - Hamsi-512: short name: <code>hamsi512</code>
  322 + * - JH family: file <code>sph_jh.h</code>
  323 + * - JH-224: short name: <code>jh224</code>
  324 + * - JH-256: short name: <code>jh256</code>
  325 + * - JH-384: short name: <code>jh384</code>
  326 + * - JH-512: short name: <code>jh512</code>
  327 + * - Keccak family: file <code>sph_keccak.h</code>
  328 + * - Keccak-224: short name: <code>keccak224</code>
  329 + * - Keccak-256: short name: <code>keccak256</code>
  330 + * - Keccak-384: short name: <code>keccak384</code>
  331 + * - Keccak-512: short name: <code>keccak512</code>
  332 + * - Luffa family: file <code>sph_luffa.h</code>
  333 + * - Luffa-224: short name: <code>luffa224</code>
  334 + * - Luffa-256: short name: <code>luffa256</code>
  335 + * - Luffa-384: short name: <code>luffa384</code>
  336 + * - Luffa-512: short name: <code>luffa512</code>
  337 + * - Shabal family: file <code>sph_shabal.h</code>
  338 + * - Shabal-192: short name: <code>shabal192</code>
  339 + * - Shabal-224: short name: <code>shabal224</code>
  340 + * - Shabal-256: short name: <code>shabal256</code>
  341 + * - Shabal-384: short name: <code>shabal384</code>
  342 + * - Shabal-512: short name: <code>shabal512</code>
  343 + * - SHAvite-3 family: file <code>sph_shavite.h</code>
  344 + * - SHAvite-224 (nominally "SHAvite-3 with 224-bit output"):
  345 + * short name: <code>shabal224</code>
  346 + * - SHAvite-256 (nominally "SHAvite-3 with 256-bit output"):
  347 + * short name: <code>shabal256</code>
  348 + * - SHAvite-384 (nominally "SHAvite-3 with 384-bit output"):
  349 + * short name: <code>shabal384</code>
  350 + * - SHAvite-512 (nominally "SHAvite-3 with 512-bit output"):
  351 + * short name: <code>shabal512</code>
  352 + * - SIMD family: file <code>sph_simd.h</code>
  353 + * - SIMD-224: short name: <code>simd224</code>
  354 + * - SIMD-256: short name: <code>simd256</code>
  355 + * - SIMD-384: short name: <code>simd384</code>
  356 + * - SIMD-512: short name: <code>simd512</code>
  357 + * - Skein family: file <code>sph_skein.h</code>
  358 + * - Skein-224 (nominally specified as Skein-512-224): short name:
  359 + * <code>skein224</code> (64)
  360 + * - Skein-256 (nominally specified as Skein-512-256): short name:
  361 + * <code>skein256</code> (64)
  362 + * - Skein-384 (nominally specified as Skein-512-384): short name:
  363 + * <code>skein384</code> (64)
  364 + * - Skein-512 (nominally specified as Skein-512-512): short name:
  365 + * <code>skein512</code> (64)
  366 + *
  367 + * For the second-round SHA-3 candidates, the functions are as specified
  368 + * for round 2, i.e. with the "tweaks" that some candidates added
  369 + * between round 1 and round 2. Also, some of the submitted packages for
  370 + * round 2 contained errors, in the specification, reference code, or
  371 + * both. <code>sphlib</code> implements the corrected versions.
  372 + */
  373 +
  374 +/** @hideinitializer
  375 + * Unsigned integer type whose length is at least 32 bits; on most
  376 + * architectures, it will have a width of exactly 32 bits. Unsigned C
  377 + * types implement arithmetics modulo a power of 2; use the
  378 + * <code>SPH_T32()</code> macro to ensure that the value is truncated
  379 + * to exactly 32 bits. Unless otherwise specified, all macros and
  380 + * functions which accept <code>sph_u32</code> values assume that these
  381 + * values fit on 32 bits, i.e. do not exceed 2^32-1, even on architectures
  382 + * where <code>sph_u32</code> is larger than that.
  383 + */
  384 +typedef __arch_dependant__ sph_u32;
  385 +
  386 +/** @hideinitializer
  387 + * Signed integer type corresponding to <code>sph_u32</code>; it has
  388 + * width 32 bits or more.
  389 + */
  390 +typedef __arch_dependant__ sph_s32;
  391 +
  392 +/** @hideinitializer
  393 + * Unsigned integer type whose length is at least 64 bits; on most
  394 + * architectures which feature such a type, it will have a width of
  395 + * exactly 64 bits. C99-compliant platform will have this type; it
  396 + * is also defined when the GNU compiler (gcc) is used, and on
  397 + * platforms where <code>unsigned long</code> is large enough. If this
  398 + * type is not available, then some hash functions which depends on
  399 + * a 64-bit type will not be available (most notably SHA-384, SHA-512,
  400 + * Tiger and WHIRLPOOL).
  401 + */
  402 +typedef __arch_dependant__ sph_u64;
  403 +
  404 +/** @hideinitializer
  405 + * Signed integer type corresponding to <code>sph_u64</code>; it has
  406 + * width 64 bits or more.
  407 + */
  408 +typedef __arch_dependant__ sph_s64;
  409 +
  410 +/**
  411 + * This macro expands the token <code>x</code> into a suitable
  412 + * constant expression of type <code>sph_u32</code>. Depending on
  413 + * how this type is defined, a suffix such as <code>UL</code> may
  414 + * be appended to the argument.
  415 + *
  416 + * @param x the token to expand into a suitable constant expression
  417 + */
  418 +#define SPH_C32(x)
  419 +
  420 +/**
  421 + * Truncate a 32-bit value to exactly 32 bits. On most systems, this is
  422 + * a no-op, recognized as such by the compiler.
  423 + *
  424 + * @param x the value to truncate (of type <code>sph_u32</code>)
  425 + */
  426 +#define SPH_T32(x)
  427 +
  428 +/**
  429 + * Rotate a 32-bit value by a number of bits to the left. The rotate
  430 + * count must reside between 1 and 31. This macro assumes that its
  431 + * first argument fits in 32 bits (no extra bit allowed on machines where
  432 + * <code>sph_u32</code> is wider); both arguments may be evaluated
  433 + * several times.
  434 + *
  435 + * @param x the value to rotate (of type <code>sph_u32</code>)
  436 + * @param n the rotation count (between 1 and 31, inclusive)
  437 + */
  438 +#define SPH_ROTL32(x, n)
  439 +
  440 +/**
  441 + * Rotate a 32-bit value by a number of bits to the left. The rotate
  442 + * count must reside between 1 and 31. This macro assumes that its
  443 + * first argument fits in 32 bits (no extra bit allowed on machines where
  444 + * <code>sph_u32</code> is wider); both arguments may be evaluated
  445 + * several times.
  446 + *
  447 + * @param x the value to rotate (of type <code>sph_u32</code>)
  448 + * @param n the rotation count (between 1 and 31, inclusive)
  449 + */
  450 +#define SPH_ROTR32(x, n)
  451 +
  452 +/**
  453 + * This macro is defined on systems for which a 64-bit type has been
  454 + * detected, and is used for <code>sph_u64</code>.
  455 + */
  456 +#define SPH_64
  457 +
  458 +/**
  459 + * This macro is defined on systems for the "native" integer size is
  460 + * 64 bits (64-bit values fit in one register).
  461 + */
  462 +#define SPH_64_TRUE
  463 +
  464 +/**
  465 + * This macro expands the token <code>x</code> into a suitable
  466 + * constant expression of type <code>sph_u64</code>. Depending on
  467 + * how this type is defined, a suffix such as <code>ULL</code> may
  468 + * be appended to the argument. This macro is defined only if a
  469 + * 64-bit type was detected and used for <code>sph_u64</code>.
  470 + *
  471 + * @param x the token to expand into a suitable constant expression
  472 + */
  473 +#define SPH_C64(x)
  474 +
  475 +/**
  476 + * Truncate a 64-bit value to exactly 64 bits. On most systems, this is
  477 + * a no-op, recognized as such by the compiler. This macro is defined only
  478 + * if a 64-bit type was detected and used for <code>sph_u64</code>.
  479 + *
  480 + * @param x the value to truncate (of type <code>sph_u64</code>)
  481 + */
  482 +#define SPH_T64(x)
  483 +
  484 +/**
  485 + * Rotate a 64-bit value by a number of bits to the left. The rotate
  486 + * count must reside between 1 and 63. This macro assumes that its
  487 + * first argument fits in 64 bits (no extra bit allowed on machines where
  488 + * <code>sph_u64</code> is wider); both arguments may be evaluated
  489 + * several times. This macro is defined only if a 64-bit type was detected
  490 + * and used for <code>sph_u64</code>.
  491 + *
  492 + * @param x the value to rotate (of type <code>sph_u64</code>)
  493 + * @param n the rotation count (between 1 and 63, inclusive)
  494 + */
  495 +#define SPH_ROTL64(x, n)
  496 +
  497 +/**
  498 + * Rotate a 64-bit value by a number of bits to the left. The rotate
  499 + * count must reside between 1 and 63. This macro assumes that its
  500 + * first argument fits in 64 bits (no extra bit allowed on machines where
  501 + * <code>sph_u64</code> is wider); both arguments may be evaluated
  502 + * several times. This macro is defined only if a 64-bit type was detected
  503 + * and used for <code>sph_u64</code>.
  504 + *
  505 + * @param x the value to rotate (of type <code>sph_u64</code>)
  506 + * @param n the rotation count (between 1 and 63, inclusive)
  507 + */
  508 +#define SPH_ROTR64(x, n)
  509 +
  510 +/**
  511 + * This macro evaluates to <code>inline</code> or an equivalent construction,
  512 + * if available on the compilation platform, or to nothing otherwise. This
  513 + * is used to declare inline functions, for which the compiler should
  514 + * endeavour to include the code directly in the caller. Inline functions
  515 + * are typically defined in header files as replacement for macros.
  516 + */
  517 +#define SPH_INLINE
  518 +
  519 +/**
  520 + * This macro is defined if the platform has been detected as using
  521 + * little-endian convention. This implies that the <code>sph_u32</code>
  522 + * type (and the <code>sph_u64</code> type also, if it is defined) has
  523 + * an exact width (i.e. exactly 32-bit, respectively 64-bit).
  524 + */
  525 +#define SPH_LITTLE_ENDIAN
  526 +
  527 +/**
  528 + * This macro is defined if the platform has been detected as using
  529 + * big-endian convention. This implies that the <code>sph_u32</code>
  530 + * type (and the <code>sph_u64</code> type also, if it is defined) has
  531 + * an exact width (i.e. exactly 32-bit, respectively 64-bit).
  532 + */
  533 +#define SPH_BIG_ENDIAN
  534 +
  535 +/**
  536 + * This macro is defined if 32-bit words (and 64-bit words, if defined)
  537 + * can be read from and written to memory efficiently in little-endian
  538 + * convention. This is the case for little-endian platforms, and also
  539 + * for the big-endian platforms which have special little-endian access
  540 + * opcodes (e.g. Ultrasparc).
  541 + */
  542 +#define SPH_LITTLE_FAST
  543 +
  544 +/**
  545 + * This macro is defined if 32-bit words (and 64-bit words, if defined)
  546 + * can be read from and written to memory efficiently in big-endian
  547 + * convention. This is the case for little-endian platforms, and also
  548 + * for the little-endian platforms which have special big-endian access
  549 + * opcodes.
  550 + */
  551 +#define SPH_BIG_FAST
  552 +
  553 +/**
  554 + * On some platforms, this macro is defined to an unsigned integer type
  555 + * into which pointer values may be cast. The resulting value can then
  556 + * be tested for being a multiple of 2, 4 or 8, indicating an aligned
  557 + * pointer for, respectively, 16-bit, 32-bit or 64-bit memory accesses.
  558 + */
  559 +#define SPH_UPTR
  560 +
  561 +/**
  562 + * When defined, this macro indicates that unaligned memory accesses
  563 + * are possible with only a minor penalty, and thus should be prefered
  564 + * over strategies which first copy data to an aligned buffer.
  565 + */
  566 +#define SPH_UNALIGNED
  567 +
  568 +/**
  569 + * Byte-swap a 32-bit word (i.e. <code>0x12345678</code> becomes
  570 + * <code>0x78563412</code>). This is an inline function which resorts
  571 + * to inline assembly on some platforms, for better performance.
  572 + *
  573 + * @param x the 32-bit value to byte-swap
  574 + * @return the byte-swapped value
  575 + */
  576 +static inline sph_u32 sph_bswap32(sph_u32 x);
  577 +
  578 +/**
  579 + * Byte-swap a 64-bit word. This is an inline function which resorts
  580 + * to inline assembly on some platforms, for better performance. This
  581 + * function is defined only if a suitable 64-bit type was found for
  582 + * <code>sph_u64</code>
  583 + *
  584 + * @param x the 64-bit value to byte-swap
  585 + * @return the byte-swapped value
  586 + */
  587 +static inline sph_u64 sph_bswap64(sph_u64 x);
  588 +
  589 +/**
  590 + * Decode a 16-bit unsigned value from memory, in little-endian convention
  591 + * (least significant byte comes first).
  592 + *
  593 + * @param src the source address
  594 + * @return the decoded value
  595 + */
  596 +static inline unsigned sph_dec16le(const void *src);
  597 +
  598 +/**
  599 + * Encode a 16-bit unsigned value into memory, in little-endian convention
  600 + * (least significant byte comes first).
  601 + *
  602 + * @param dst the destination buffer
  603 + * @param val the value to encode
  604 + */
  605 +static inline void sph_enc16le(void *dst, unsigned val);
  606 +
  607 +/**
  608 + * Decode a 16-bit unsigned value from memory, in big-endian convention
  609 + * (most significant byte comes first).
  610 + *
  611 + * @param src the source address
  612 + * @return the decoded value
  613 + */
  614 +static inline unsigned sph_dec16be(const void *src);
  615 +
  616 +/**
  617 + * Encode a 16-bit unsigned value into memory, in big-endian convention
  618 + * (most significant byte comes first).
  619 + *
  620 + * @param dst the destination buffer
  621 + * @param val the value to encode
  622 + */
  623 +static inline void sph_enc16be(void *dst, unsigned val);
  624 +
  625 +/**
  626 + * Decode a 32-bit unsigned value from memory, in little-endian convention
  627 + * (least significant byte comes first).
  628 + *
  629 + * @param src the source address
  630 + * @return the decoded value
  631 + */
  632 +static inline sph_u32 sph_dec32le(const void *src);
  633 +
  634 +/**
  635 + * Decode a 32-bit unsigned value from memory, in little-endian convention
  636 + * (least significant byte comes first). This function assumes that the
  637 + * source address is suitably aligned for a direct access, if the platform
  638 + * supports such things; it can thus be marginally faster than the generic
  639 + * <code>sph_dec32le()</code> function.
  640 + *
  641 + * @param src the source address
  642 + * @return the decoded value
  643 + */
  644 +static inline sph_u32 sph_dec32le_aligned(const void *src);
  645 +
  646 +/**
  647 + * Encode a 32-bit unsigned value into memory, in little-endian convention
  648 + * (least significant byte comes first).
  649 + *
  650 + * @param dst the destination buffer
  651 + * @param val the value to encode
  652 + */
  653 +static inline void sph_enc32le(void *dst, sph_u32 val);
  654 +
  655 +/**
  656 + * Encode a 32-bit unsigned value into memory, in little-endian convention
  657 + * (least significant byte comes first). This function assumes that the
  658 + * destination address is suitably aligned for a direct access, if the
  659 + * platform supports such things; it can thus be marginally faster than
  660 + * the generic <code>sph_enc32le()</code> function.
  661 + *
  662 + * @param dst the destination buffer
  663 + * @param val the value to encode
  664 + */
  665 +static inline void sph_enc32le_aligned(void *dst, sph_u32 val);
  666 +
  667 +/**
  668 + * Decode a 32-bit unsigned value from memory, in big-endian convention
  669 + * (most significant byte comes first).
  670 + *
  671 + * @param src the source address
  672 + * @return the decoded value
  673 + */
  674 +static inline sph_u32 sph_dec32be(const void *src);
  675 +
  676 +/**
  677 + * Decode a 32-bit unsigned value from memory, in big-endian convention
  678 + * (most significant byte comes first). This function assumes that the
  679 + * source address is suitably aligned for a direct access, if the platform
  680 + * supports such things; it can thus be marginally faster than the generic
  681 + * <code>sph_dec32be()</code> function.
  682 + *
  683 + * @param src the source address
  684 + * @return the decoded value
  685 + */
  686 +static inline sph_u32 sph_dec32be_aligned(const void *src);
  687 +
  688 +/**
  689 + * Encode a 32-bit unsigned value into memory, in big-endian convention
  690 + * (most significant byte comes first).
  691 + *
  692 + * @param dst the destination buffer
  693 + * @param val the value to encode
  694 + */
  695 +static inline void sph_enc32be(void *dst, sph_u32 val);
  696 +
  697 +/**
  698 + * Encode a 32-bit unsigned value into memory, in big-endian convention
  699 + * (most significant byte comes first). This function assumes that the
  700 + * destination address is suitably aligned for a direct access, if the
  701 + * platform supports such things; it can thus be marginally faster than
  702 + * the generic <code>sph_enc32be()</code> function.
  703 + *
  704 + * @param dst the destination buffer
  705 + * @param val the value to encode
  706 + */
  707 +static inline void sph_enc32be_aligned(void *dst, sph_u32 val);
  708 +
  709 +/**
  710 + * Decode a 64-bit unsigned value from memory, in little-endian convention
  711 + * (least significant byte comes first). This function is defined only
  712 + * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
  713 + *
  714 + * @param src the source address
  715 + * @return the decoded value
  716 + */
  717 +static inline sph_u64 sph_dec64le(const void *src);
  718 +
  719 +/**
  720 + * Decode a 64-bit unsigned value from memory, in little-endian convention
  721 + * (least significant byte comes first). This function assumes that the
  722 + * source address is suitably aligned for a direct access, if the platform
  723 + * supports such things; it can thus be marginally faster than the generic
  724 + * <code>sph_dec64le()</code> function. This function is defined only
  725 + * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
  726 + *
  727 + * @param src the source address
  728 + * @return the decoded value
  729 + */
  730 +static inline sph_u64 sph_dec64le_aligned(const void *src);
  731 +
  732 +/**
  733 + * Encode a 64-bit unsigned value into memory, in little-endian convention
  734 + * (least significant byte comes first). This function is defined only
  735 + * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
  736 + *
  737 + * @param dst the destination buffer
  738 + * @param val the value to encode
  739 + */
  740 +static inline void sph_enc64le(void *dst, sph_u64 val);
  741 +
  742 +/**
  743 + * Encode a 64-bit unsigned value into memory, in little-endian convention
  744 + * (least significant byte comes first). This function assumes that the
  745 + * destination address is suitably aligned for a direct access, if the
  746 + * platform supports such things; it can thus be marginally faster than
  747 + * the generic <code>sph_enc64le()</code> function. This function is defined
  748 + * only if a suitable 64-bit type was detected and used for
  749 + * <code>sph_u64</code>.
  750 + *
  751 + * @param dst the destination buffer
  752 + * @param val the value to encode
  753 + */
  754 +static inline void sph_enc64le_aligned(void *dst, sph_u64 val);
  755 +
  756 +/**
  757 + * Decode a 64-bit unsigned value from memory, in big-endian convention
  758 + * (most significant byte comes first). This function is defined only
  759 + * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
  760 + *
  761 + * @param src the source address
  762 + * @return the decoded value
  763 + */
  764 +static inline sph_u64 sph_dec64be(const void *src);
  765 +
  766 +/**
  767 + * Decode a 64-bit unsigned value from memory, in big-endian convention
  768 + * (most significant byte comes first). This function assumes that the
  769 + * source address is suitably aligned for a direct access, if the platform
  770 + * supports such things; it can thus be marginally faster than the generic
  771 + * <code>sph_dec64be()</code> function. This function is defined only
  772 + * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
  773 + *
  774 + * @param src the source address
  775 + * @return the decoded value
  776 + */
  777 +static inline sph_u64 sph_dec64be_aligned(const void *src);
  778 +
  779 +/**
  780 + * Encode a 64-bit unsigned value into memory, in big-endian convention
  781 + * (most significant byte comes first). This function is defined only
  782 + * if a suitable 64-bit type was detected and used for <code>sph_u64</code>.
  783 + *
  784 + * @param dst the destination buffer
  785 + * @param val the value to encode
  786 + */
  787 +static inline void sph_enc64be(void *dst, sph_u64 val);
  788 +
  789 +/**
  790 + * Encode a 64-bit unsigned value into memory, in big-endian convention
  791 + * (most significant byte comes first). This function assumes that the
  792 + * destination address is suitably aligned for a direct access, if the
  793 + * platform supports such things; it can thus be marginally faster than
  794 + * the generic <code>sph_enc64be()</code> function. This function is defined
  795 + * only if a suitable 64-bit type was detected and used for
  796 + * <code>sph_u64</code>.
  797 + *
  798 + * @param dst the destination buffer
  799 + * @param val the value to encode
  800 + */
  801 +static inline void sph_enc64be_aligned(void *dst, sph_u64 val);
  802 +
  803 +#endif
  804 +
  805 +/* ============== END documentation block for Doxygen ============= */
  806 +
  807 +#ifndef DOXYGEN_IGNORE
  808 +
  809 +/*
  810 + * We want to define the types "sph_u32" and "sph_u64" which hold
  811 + * unsigned values of at least, respectively, 32 and 64 bits. These
  812 + * tests should select appropriate types for most platforms. The
  813 + * macro "SPH_64" is defined if the 64-bit is supported.
  814 + */
  815 +
  816 +#undef SPH_64
  817 +#undef SPH_64_TRUE
  818 +
  819 +#if defined __STDC__ && __STDC_VERSION__ >= 199901L
  820 +
  821 +/*
  822 + * On C99 implementations, we can use <stdint.h> to get an exact 64-bit
  823 + * type, if any, or otherwise use a wider type (which must exist, for
  824 + * C99 conformance).
  825 + */
  826 +
  827 +#include <stdint.h>
  828 +
  829 +#ifdef UINT32_MAX
  830 +typedef uint32_t sph_u32;
  831 +typedef int32_t sph_s32;
  832 +#else
  833 +typedef uint_fast32_t sph_u32;
  834 +typedef int_fast32_t sph_s32;
  835 +#endif
  836 +#if !SPH_NO_64
  837 +#ifdef UINT64_MAX
  838 +typedef uint64_t sph_u64;
  839 +typedef int64_t sph_s64;
  840 +#else
  841 +typedef uint_fast64_t sph_u64;
  842 +typedef int_fast64_t sph_s64;
  843 +#endif
  844 +#endif
  845 +
  846 +#define SPH_C32(x) ((sph_u32)(x))
  847 +#if !SPH_NO_64
  848 +#define SPH_C64(x) ((sph_u64)(x))
  849 +#define SPH_64 1
  850 +#endif
  851 +
  852 +#else
  853 +
  854 +/*
  855 + * On non-C99 systems, we use "unsigned int" if it is wide enough,
  856 + * "unsigned long" otherwise. This supports all "reasonable" architectures.
  857 + * We have to be cautious: pre-C99 preprocessors handle constants
  858 + * differently in '#if' expressions. Hence the shifts to test UINT_MAX.
  859 + */
  860 +
  861 +#if ((UINT_MAX >> 11) >> 11) >= 0x3FF
  862 +
  863 +typedef unsigned int sph_u32;
  864 +typedef int sph_s32;
  865 +
  866 +#define SPH_C32(x) ((sph_u32)(x ## U))
  867 +
  868 +#else
  869 +
  870 +typedef unsigned long sph_u32;
  871 +typedef long sph_s32;
  872 +
  873 +#define SPH_C32(x) ((sph_u32)(x ## UL))
  874 +
  875 +#endif
  876 +
  877 +#if !SPH_NO_64
  878 +
  879 +/*
  880 + * We want a 64-bit type. We use "unsigned long" if it is wide enough (as
  881 + * is common on 64-bit architectures such as AMD64, Alpha or Sparcv9),
  882 + * "unsigned long long" otherwise, if available. We use ULLONG_MAX to
  883 + * test whether "unsigned long long" is available; we also know that
  884 + * gcc features this type, even if the libc header do not know it.
  885 + */
  886 +
  887 +#if ((ULONG_MAX >> 31) >> 31) >= 3
  888 +
  889 +typedef unsigned long sph_u64;
  890 +typedef long sph_s64;
  891 +
  892 +#define SPH_C64(x) ((sph_u64)(x ## UL))
  893 +
  894 +#define SPH_64 1
  895 +
  896 +#elif ((ULLONG_MAX >> 31) >> 31) >= 3 || defined __GNUC__
  897 +
  898 +typedef unsigned long long sph_u64;
  899 +typedef long long sph_s64;
  900 +
  901 +#define SPH_C64(x) ((sph_u64)(x ## ULL))
  902 +
  903 +#define SPH_64 1
  904 +
  905 +#else
  906 +
  907 +/*
  908 + * No 64-bit type...
  909 + */
  910 +
  911 +#endif
  912 +
  913 +#endif
  914 +
  915 +#endif
  916 +
  917 +/*
  918 + * If the "unsigned long" type has length 64 bits or more, then this is
  919 + * a "true" 64-bit architectures. This is also true with Visual C on
  920 + * amd64, even though the "long" type is limited to 32 bits.
  921 + */
  922 +#if SPH_64 && (((ULONG_MAX >> 31) >> 31) >= 3 || defined _M_X64)
  923 +#define SPH_64_TRUE 1
  924 +#endif
  925 +
  926 +/*
  927 + * Implementation note: some processors have specific opcodes to perform
  928 + * a rotation. Recent versions of gcc recognize the expression above and
  929 + * use the relevant opcodes, when appropriate.
  930 + */
  931 +
  932 +#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF))
  933 +#define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n))))
  934 +#define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n)))
  935 +
  936 +#if SPH_64
  937 +
  938 +#define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF))
  939 +#define SPH_ROTL64(x, n) SPH_T64(((x) << (n)) | ((x) >> (64 - (n))))
  940 +#define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n)))
  941 +
  942 +#endif
  943 +
  944 +#ifndef DOXYGEN_IGNORE
  945 +/*
  946 + * Define SPH_INLINE to be an "inline" qualifier, if available. We define
  947 + * some small macro-like functions which benefit greatly from being inlined.
  948 + */
  949 +#if (defined __STDC__ && __STDC_VERSION__ >= 199901L) || defined __GNUC__
  950 +#define SPH_INLINE inline
  951 +#elif defined _MSC_VER
  952 +#define SPH_INLINE __inline
  953 +#else
  954 +#define SPH_INLINE
  955 +#endif
  956 +#endif
  957 +
  958 +/*
  959 + * We define some macros which qualify the architecture. These macros
  960 + * may be explicit set externally (e.g. as compiler parameters). The
  961 + * code below sets those macros if they are not already defined.
  962 + *
  963 + * Most macros are boolean, thus evaluate to either zero or non-zero.
  964 + * The SPH_UPTR macro is special, in that it evaluates to a C type,
  965 + * or is not defined.
  966 + *
  967 + * SPH_UPTR if defined: unsigned type to cast pointers into
  968 + *
  969 + * SPH_UNALIGNED non-zero if unaligned accesses are efficient
  970 + * SPH_LITTLE_ENDIAN non-zero if architecture is known to be little-endian
  971 + * SPH_BIG_ENDIAN non-zero if architecture is known to be big-endian
  972 + * SPH_LITTLE_FAST non-zero if little-endian decoding is fast
  973 + * SPH_BIG_FAST non-zero if big-endian decoding is fast
  974 + *
  975 + * If SPH_UPTR is defined, then encoding and decoding of 32-bit and 64-bit
  976 + * values will try to be "smart". Either SPH_LITTLE_ENDIAN or SPH_BIG_ENDIAN
  977 + * _must_ be non-zero in those situations. The 32-bit and 64-bit types
  978 + * _must_ also have an exact width.
  979 + *
  980 + * SPH_SPARCV9_GCC_32 UltraSPARC-compatible with gcc, 32-bit mode
  981 + * SPH_SPARCV9_GCC_64 UltraSPARC-compatible with gcc, 64-bit mode
  982 + * SPH_SPARCV9_GCC UltraSPARC-compatible with gcc
  983 + * SPH_I386_GCC x86-compatible (32-bit) with gcc
  984 + * SPH_I386_MSVC x86-compatible (32-bit) with Microsoft Visual C
  985 + * SPH_AMD64_GCC x86-compatible (64-bit) with gcc
  986 + * SPH_AMD64_MSVC x86-compatible (64-bit) with Microsoft Visual C
  987 + * SPH_PPC32_GCC PowerPC, 32-bit, with gcc
  988 + * SPH_PPC64_GCC PowerPC, 64-bit, with gcc
  989 + *
  990 + * TODO: enhance automatic detection, for more architectures and compilers.
  991 + * Endianness is the most important. SPH_UNALIGNED and SPH_UPTR help with
  992 + * some very fast functions (e.g. MD4) when using unaligned input data.
  993 + * The CPU-specific-with-GCC macros are useful only for inline assembly,
  994 + * normally restrained to this header file.
  995 + */
  996 +
  997 +/*
  998 + * 32-bit x86, aka "i386 compatible".
  999 + */
  1000 +#if defined __i386__ || defined _M_IX86
  1001 +
  1002 +#define SPH_DETECT_UNALIGNED 1
  1003 +#define SPH_DETECT_LITTLE_ENDIAN 1
  1004 +#define SPH_DETECT_UPTR sph_u32
  1005 +#ifdef __GNUC__
  1006 +#define SPH_DETECT_I386_GCC 1
  1007 +#endif
  1008 +#ifdef _MSC_VER
  1009 +#define SPH_DETECT_I386_MSVC 1
  1010 +#endif
  1011 +
  1012 +/*
  1013 + * 64-bit x86, hereafter known as "amd64".
  1014 + */
  1015 +#elif defined __x86_64 || defined _M_X64
  1016 +
  1017 +#define SPH_DETECT_UNALIGNED 1
  1018 +#define SPH_DETECT_LITTLE_ENDIAN 1
  1019 +#define SPH_DETECT_UPTR sph_u64
  1020 +#ifdef __GNUC__
  1021 +#define SPH_DETECT_AMD64_GCC 1
  1022 +#endif
  1023 +#ifdef _MSC_VER
  1024 +#define SPH_DETECT_AMD64_MSVC 1
  1025 +#endif
  1026 +
  1027 +/*
  1028 + * 64-bit Sparc architecture (implies v9).
  1029 + */
  1030 +#elif ((defined __sparc__ || defined __sparc) && defined __arch64__) \
  1031 + || defined __sparcv9
  1032 +
  1033 +#define SPH_DETECT_BIG_ENDIAN 1
  1034 +#define SPH_DETECT_UPTR sph_u64
  1035 +#ifdef __GNUC__
  1036 +#define SPH_DETECT_SPARCV9_GCC_64 1
  1037 +#define SPH_DETECT_LITTLE_FAST 1
  1038 +#endif
  1039 +
  1040 +/*
  1041 + * 32-bit Sparc.
  1042 + */
  1043 +#elif (defined __sparc__ || defined __sparc) \
  1044 + && !(defined __sparcv9 || defined __arch64__)
  1045 +
  1046 +#define SPH_DETECT_BIG_ENDIAN 1
  1047 +#define SPH_DETECT_UPTR sph_u32
  1048 +#if defined __GNUC__ && defined __sparc_v9__
  1049 +#define SPH_DETECT_SPARCV9_GCC_32 1
  1050 +#define SPH_DETECT_LITTLE_FAST 1
  1051 +#endif
  1052 +
  1053 +/*
  1054 + * ARM, little-endian.
  1055 + */
  1056 +#elif defined __arm__ && __ARMEL__
  1057 +
  1058 +#define SPH_DETECT_LITTLE_ENDIAN 1
  1059 +
  1060 +/*
  1061 + * MIPS, little-endian.
  1062 + */
  1063 +#elif MIPSEL || _MIPSEL || __MIPSEL || __MIPSEL__
  1064 +
  1065 +#define SPH_DETECT_LITTLE_ENDIAN 1
  1066 +
  1067 +/*
  1068 + * MIPS, big-endian.
  1069 + */
  1070 +#elif MIPSEB || _MIPSEB || __MIPSEB || __MIPSEB__
  1071 +
  1072 +#define SPH_DETECT_BIG_ENDIAN 1
  1073 +
  1074 +/*
  1075 + * PowerPC.
  1076 + */
  1077 +#elif defined __powerpc__ || defined __POWERPC__ || defined __ppc__ \
  1078 + || defined _ARCH_PPC
  1079 +
  1080 +/*
  1081 + * Note: we do not declare cross-endian access to be "fast": even if
  1082 + * using inline assembly, implementation should still assume that
  1083 + * keeping the decoded word in a temporary is faster than decoding
  1084 + * it again.
  1085 + */
  1086 +#if defined __GNUC__
  1087 +#if SPH_64_TRUE
  1088 +#define SPH_DETECT_PPC64_GCC 1
  1089 +#else
  1090 +#define SPH_DETECT_PPC32_GCC 1
  1091 +#endif
  1092 +#endif
  1093 +
  1094 +#if defined __BIG_ENDIAN__ || defined _BIG_ENDIAN
  1095 +#define SPH_DETECT_BIG_ENDIAN 1
  1096 +#elif defined __LITTLE_ENDIAN__ || defined _LITTLE_ENDIAN
  1097 +#define SPH_DETECT_LITTLE_ENDIAN 1
  1098 +#endif
  1099 +
  1100 +/*
  1101 + * Itanium, 64-bit.
  1102 + */
  1103 +#elif defined __ia64 || defined __ia64__ \
  1104 + || defined __itanium__ || defined _M_IA64
  1105 +
  1106 +#if defined __BIG_ENDIAN__ || defined _BIG_ENDIAN
  1107 +#define SPH_DETECT_BIG_ENDIAN 1
  1108 +#else
  1109 +#define SPH_DETECT_LITTLE_ENDIAN 1
  1110 +#endif
  1111 +#if defined __LP64__ || defined _LP64
  1112 +#define SPH_DETECT_UPTR sph_u64
  1113 +#else
  1114 +#define SPH_DETECT_UPTR sph_u32
  1115 +#endif
  1116 +
  1117 +#endif
  1118 +
  1119 +#if defined SPH_DETECT_SPARCV9_GCC_32 || defined SPH_DETECT_SPARCV9_GCC_64
  1120 +#define SPH_DETECT_SPARCV9_GCC 1
  1121 +#endif
  1122 +
  1123 +#if defined SPH_DETECT_UNALIGNED && !defined SPH_UNALIGNED
  1124 +#define SPH_UNALIGNED SPH_DETECT_UNALIGNED
  1125 +#endif
  1126 +#if defined SPH_DETECT_UPTR && !defined SPH_UPTR
  1127 +#define SPH_UPTR SPH_DETECT_UPTR
  1128 +#endif
  1129 +#if defined SPH_DETECT_LITTLE_ENDIAN && !defined SPH_LITTLE_ENDIAN
  1130 +#define SPH_LITTLE_ENDIAN SPH_DETECT_LITTLE_ENDIAN
  1131 +#endif
  1132 +#if defined SPH_DETECT_BIG_ENDIAN && !defined SPH_BIG_ENDIAN
  1133 +#define SPH_BIG_ENDIAN SPH_DETECT_BIG_ENDIAN
  1134 +#endif
  1135 +#if defined SPH_DETECT_LITTLE_FAST && !defined SPH_LITTLE_FAST
  1136 +#define SPH_LITTLE_FAST SPH_DETECT_LITTLE_FAST
  1137 +#endif
  1138 +#if defined SPH_DETECT_BIG_FAST && !defined SPH_BIG_FAST
  1139 +#define SPH_BIG_FAST SPH_DETECT_BIG_FAST
  1140 +#endif
  1141 +#if defined SPH_DETECT_SPARCV9_GCC_32 && !defined SPH_SPARCV9_GCC_32
  1142 +#define SPH_SPARCV9_GCC_32 SPH_DETECT_SPARCV9_GCC_32
  1143 +#endif
  1144 +#if defined SPH_DETECT_SPARCV9_GCC_64 && !defined SPH_SPARCV9_GCC_64
  1145 +#define SPH_SPARCV9_GCC_64 SPH_DETECT_SPARCV9_GCC_64
  1146 +#endif
  1147 +#if defined SPH_DETECT_SPARCV9_GCC && !defined SPH_SPARCV9_GCC
  1148 +#define SPH_SPARCV9_GCC SPH_DETECT_SPARCV9_GCC
  1149 +#endif
  1150 +#if defined SPH_DETECT_I386_GCC && !defined SPH_I386_GCC
  1151 +#define SPH_I386_GCC SPH_DETECT_I386_GCC
  1152 +#endif
  1153 +#if defined SPH_DETECT_I386_MSVC && !defined SPH_I386_MSVC
  1154 +#define SPH_I386_MSVC SPH_DETECT_I386_MSVC
  1155 +#endif
  1156 +#if defined SPH_DETECT_AMD64_GCC && !defined SPH_AMD64_GCC
  1157 +#define SPH_AMD64_GCC SPH_DETECT_AMD64_GCC
  1158 +#endif
  1159 +#if defined SPH_DETECT_AMD64_MSVC && !defined SPH_AMD64_MSVC
  1160 +#define SPH_AMD64_MSVC SPH_DETECT_AMD64_MSVC
  1161 +#endif
  1162 +#if defined SPH_DETECT_PPC32_GCC && !defined SPH_PPC32_GCC
  1163 +#define SPH_PPC32_GCC SPH_DETECT_PPC32_GCC
  1164 +#endif
  1165 +#if defined SPH_DETECT_PPC64_GCC && !defined SPH_PPC64_GCC
  1166 +#define SPH_PPC64_GCC SPH_DETECT_PPC64_GCC
  1167 +#endif
  1168 +
  1169 +#if SPH_LITTLE_ENDIAN && !defined SPH_LITTLE_FAST
  1170 +#define SPH_LITTLE_FAST 1
  1171 +#endif
  1172 +#if SPH_BIG_ENDIAN && !defined SPH_BIG_FAST
  1173 +#define SPH_BIG_FAST 1
  1174 +#endif
  1175 +
  1176 +#if defined SPH_UPTR && !(SPH_LITTLE_ENDIAN || SPH_BIG_ENDIAN)
  1177 +#error SPH_UPTR defined, but endianness is not known.
  1178 +#endif
  1179 +
  1180 +#if SPH_I386_GCC && !SPH_NO_ASM
  1181 +
  1182 +/*
  1183 + * On x86 32-bit, with gcc, we use the bswapl opcode to byte-swap 32-bit
  1184 + * values.
  1185 + */
  1186 +
  1187 +static SPH_INLINE sph_u32
  1188 +sph_bswap32(sph_u32 x)
  1189 +{
  1190 + __asm__ __volatile__ ("bswapl %0" : "=r" (x) : "0" (x));
  1191 + return x;
  1192 +}
  1193 +
  1194 +#if SPH_64
  1195 +
  1196 +static SPH_INLINE sph_u64
  1197 +sph_bswap64(sph_u64 x)
  1198 +{
  1199 + return ((sph_u64)sph_bswap32((sph_u32)x) << 32)
  1200 + | (sph_u64)sph_bswap32((sph_u32)(x >> 32));
  1201 +}
  1202 +
  1203 +#endif
  1204 +
  1205 +#elif SPH_AMD64_GCC && !SPH_NO_ASM
  1206 +
  1207 +/*
  1208 + * On x86 64-bit, with gcc, we use the bswapl opcode to byte-swap 32-bit
  1209 + * and 64-bit values.
  1210 + */
  1211 +
  1212 +static SPH_INLINE sph_u32
  1213 +sph_bswap32(sph_u32 x)
  1214 +{
  1215 + __asm__ __volatile__ ("bswapl %0" : "=r" (x) : "0" (x));
  1216 + return x;
  1217 +}
  1218 +
  1219 +#if SPH_64
  1220 +
  1221 +static SPH_INLINE sph_u64
  1222 +sph_bswap64(sph_u64 x)
  1223 +{
  1224 + __asm__ __volatile__ ("bswapq %0" : "=r" (x) : "0" (x));
  1225 + return x;
  1226 +}
  1227 +
  1228 +#endif
  1229 +
  1230 +/*
  1231 + * Disabled code. Apparently, Microsoft Visual C 2005 is smart enough
  1232 + * to generate proper opcodes for endianness swapping with the pure C
  1233 + * implementation below.
  1234 + *
  1235 +
  1236 +#elif SPH_I386_MSVC && !SPH_NO_ASM
  1237 +
  1238 +static __inline sph_u32 __declspec(naked) __fastcall
  1239 +sph_bswap32(sph_u32 x)
  1240 +{
  1241 + __asm {
  1242 + bswap ecx
  1243 + mov eax,ecx
  1244 + ret
  1245 + }
  1246 +}
  1247 +
  1248 +#if SPH_64
  1249 +
  1250 +static SPH_INLINE sph_u64
  1251 +sph_bswap64(sph_u64 x)
  1252 +{
  1253 + return ((sph_u64)sph_bswap32((sph_u32)x) << 32)
  1254 + | (sph_u64)sph_bswap32((sph_u32)(x >> 32));
  1255 +}
  1256 +
  1257 +#endif
  1258 +
  1259 + *
  1260 + * [end of disabled code]
  1261 + */
  1262 +
  1263 +#else
  1264 +
  1265 +static SPH_INLINE sph_u32
  1266 +sph_bswap32(sph_u32 x)
  1267 +{
  1268 + x = SPH_T32((x << 16) | (x >> 16));
  1269 + x = ((x & SPH_C32(0xFF00FF00)) >> 8)
  1270 + | ((x & SPH_C32(0x00FF00FF)) << 8);
  1271 + return x;
  1272 +}
  1273 +
  1274 +#if SPH_64
  1275 +
  1276 +/**
  1277 + * Byte-swap a 64-bit value.
  1278 + *
  1279 + * @param x the input value
  1280 + * @return the byte-swapped value
  1281 + */
  1282 +static SPH_INLINE sph_u64
  1283 +sph_bswap64(sph_u64 x)
  1284 +{
  1285 + x = SPH_T64((x << 32) | (x >> 32));
  1286 + x = ((x & SPH_C64(0xFFFF0000FFFF0000)) >> 16)
  1287 + | ((x & SPH_C64(0x0000FFFF0000FFFF)) << 16);
  1288 + x = ((x & SPH_C64(0xFF00FF00FF00FF00)) >> 8)
  1289 + | ((x & SPH_C64(0x00FF00FF00FF00FF)) << 8);
  1290 + return x;
  1291 +}
  1292 +
  1293 +#endif
  1294 +
  1295 +#endif
  1296 +
  1297 +#if SPH_SPARCV9_GCC && !SPH_NO_ASM
  1298 +
  1299 +/*
  1300 + * On UltraSPARC systems, native ordering is big-endian, but it is
  1301 + * possible to perform little-endian read accesses by specifying the
  1302 + * address space 0x88 (ASI_PRIMARY_LITTLE). Basically, either we use
  1303 + * the opcode "lda [%reg]0x88,%dst", where %reg is the register which
  1304 + * contains the source address and %dst is the destination register,
  1305 + * or we use "lda [%reg+imm]%asi,%dst", which uses the %asi register
  1306 + * to get the address space name. The latter format is better since it
  1307 + * combines an addition and the actual access in a single opcode; but
  1308 + * it requires the setting (and subsequent resetting) of %asi, which is
  1309 + * slow. Some operations (i.e. MD5 compression function) combine many
  1310 + * successive little-endian read accesses, which may share the same
  1311 + * %asi setting. The macros below contain the appropriate inline
  1312 + * assembly.
  1313 + */
  1314 +
  1315 +#define SPH_SPARCV9_SET_ASI \
  1316 + sph_u32 sph_sparcv9_asi; \
  1317 + __asm__ __volatile__ ( \
  1318 + "rd %%asi,%0\n\twr %%g0,0x88,%%asi" : "=r" (sph_sparcv9_asi));
  1319 +
  1320 +#define SPH_SPARCV9_RESET_ASI \
  1321 + __asm__ __volatile__ ("wr %%g0,%0,%%asi" : : "r" (sph_sparcv9_asi));
  1322 +
  1323 +#define SPH_SPARCV9_DEC32LE(base, idx) ({ \
  1324 + sph_u32 sph_sparcv9_tmp; \
  1325 + __asm__ __volatile__ ("lda [%1+" #idx "*4]%%asi,%0" \
  1326 + : "=r" (sph_sparcv9_tmp) : "r" (base)); \
  1327 + sph_sparcv9_tmp; \
  1328 + })
  1329 +
  1330 +#endif
  1331 +
  1332 +static SPH_INLINE void
  1333 +sph_enc16be(void *dst, unsigned val)
  1334 +{
  1335 + ((unsigned char *)dst)[0] = (val >> 8);
  1336 + ((unsigned char *)dst)[1] = val;
  1337 +}
  1338 +
  1339 +static SPH_INLINE unsigned
  1340 +sph_dec16be(const void *src)
  1341 +{
  1342 + return ((unsigned)(((const unsigned char *)src)[0]) << 8)
  1343 + | (unsigned)(((const unsigned char *)src)[1]);
  1344 +}
  1345 +
  1346 +static SPH_INLINE void
  1347 +sph_enc16le(void *dst, unsigned val)
  1348 +{
  1349 + ((unsigned char *)dst)[0] = val;
  1350 + ((unsigned char *)dst)[1] = val >> 8;
  1351 +}
  1352 +
  1353 +static SPH_INLINE unsigned
  1354 +sph_dec16le(const void *src)
  1355 +{
  1356 + return (unsigned)(((const unsigned char *)src)[0])
  1357 + | ((unsigned)(((const unsigned char *)src)[1]) << 8);
  1358 +}
  1359 +
  1360 +/**
  1361 + * Encode a 32-bit value into the provided buffer (big endian convention).
  1362 + *
  1363 + * @param dst the destination buffer
  1364 + * @param val the 32-bit value to encode
  1365 + */
  1366 +static SPH_INLINE void
  1367 +sph_enc32be(void *dst, sph_u32 val)
  1368 +{
  1369 +#if defined SPH_UPTR
  1370 +#if SPH_UNALIGNED
  1371 +#if SPH_LITTLE_ENDIAN
  1372 + val = sph_bswap32(val);
  1373 +#endif
  1374 + *(sph_u32 *)dst = val;
  1375 +#else
  1376 + if (((SPH_UPTR)dst & 3) == 0) {
  1377 +#if SPH_LITTLE_ENDIAN
  1378 + val = sph_bswap32(val);
  1379 +#endif
  1380 + *(sph_u32 *)dst = val;
  1381 + } else {
  1382 + ((unsigned char *)dst)[0] = (val >> 24);
  1383 + ((unsigned char *)dst)[1] = (val >> 16);
  1384 + ((unsigned char *)dst)[2] = (val >> 8);
  1385 + ((unsigned char *)dst)[3] = val;
  1386 + }
  1387 +#endif
  1388 +#else
  1389 + ((unsigned char *)dst)[0] = (val >> 24);
  1390 + ((unsigned char *)dst)[1] = (val >> 16);
  1391 + ((unsigned char *)dst)[2] = (val >> 8);
  1392 + ((unsigned char *)dst)[3] = val;
  1393 +#endif
  1394 +}
  1395 +
  1396 +/**
  1397 + * Encode a 32-bit value into the provided buffer (big endian convention).
  1398 + * The destination buffer must be properly aligned.
  1399 + *
  1400 + * @param dst the destination buffer (32-bit aligned)
  1401 + * @param val the value to encode
  1402 + */
  1403 +static SPH_INLINE void
  1404 +sph_enc32be_aligned(void *dst, sph_u32 val)
  1405 +{
  1406 +#if SPH_LITTLE_ENDIAN
  1407 + *(sph_u32 *)dst = sph_bswap32(val);
  1408 +#elif SPH_BIG_ENDIAN
  1409 + *(sph_u32 *)dst = val;
  1410 +#else
  1411 + ((unsigned char *)dst)[0] = (val >> 24);
  1412 + ((unsigned char *)dst)[1] = (val >> 16);
  1413 + ((unsigned char *)dst)[2] = (val >> 8);
  1414 + ((unsigned char *)dst)[3] = val;
  1415 +#endif
  1416 +}
  1417 +
  1418 +/**
  1419 + * Decode a 32-bit value from the provided buffer (big endian convention).
  1420 + *
  1421 + * @param src the source buffer
  1422 + * @return the decoded value
  1423 + */
  1424 +static SPH_INLINE sph_u32
  1425 +sph_dec32be(const void *src)
  1426 +{
  1427 +#if defined SPH_UPTR
  1428 +#if SPH_UNALIGNED
  1429 +#if SPH_LITTLE_ENDIAN
  1430 + return sph_bswap32(*(const sph_u32 *)src);
  1431 +#else
  1432 + return *(const sph_u32 *)src;
  1433 +#endif
  1434 +#else
  1435 + if (((SPH_UPTR)src & 3) == 0) {
  1436 +#if SPH_LITTLE_ENDIAN
  1437 + return sph_bswap32(*(const sph_u32 *)src);
  1438 +#else
  1439 + return *(const sph_u32 *)src;
  1440 +#endif
  1441 + } else {
  1442 + return ((sph_u32)(((const unsigned char *)src)[0]) << 24)
  1443 + | ((sph_u32)(((const unsigned char *)src)[1]) << 16)
  1444 + | ((sph_u32)(((const unsigned char *)src)[2]) << 8)
  1445 + | (sph_u32)(((const unsigned char *)src)[3]);
  1446 + }
  1447 +#endif
  1448 +#else
  1449 + return ((sph_u32)(((const unsigned char *)src)[0]) << 24)
  1450 + | ((sph_u32)(((const unsigned char *)src)[1]) << 16)
  1451 + | ((sph_u32)(((const unsigned char *)src)[2]) << 8)
  1452 + | (sph_u32)(((const unsigned char *)src)[3]);
  1453 +#endif
  1454 +}
  1455 +
  1456 +/**
  1457 + * Decode a 32-bit value from the provided buffer (big endian convention).
  1458 + * The source buffer must be properly aligned.
  1459 + *
  1460 + * @param src the source buffer (32-bit aligned)
  1461 + * @return the decoded value
  1462 + */
  1463 +static SPH_INLINE sph_u32
  1464 +sph_dec32be_aligned(const void *src)
  1465 +{
  1466 +#if SPH_LITTLE_ENDIAN
  1467 + return sph_bswap32(*(const sph_u32 *)src);
  1468 +#elif SPH_BIG_ENDIAN
  1469 + return *(const sph_u32 *)src;
  1470 +#else
  1471 + return ((sph_u32)(((const unsigned char *)src)[0]) << 24)
  1472 + | ((sph_u32)(((const unsigned char *)src)[1]) << 16)
  1473 + | ((sph_u32)(((const unsigned char *)src)[2]) << 8)
  1474 + | (sph_u32)(((const unsigned char *)src)[3]);
  1475 +#endif
  1476 +}
  1477 +
  1478 +/**
  1479 + * Encode a 32-bit value into the provided buffer (little endian convention).
  1480 + *
  1481 + * @param dst the destination buffer
  1482 + * @param val the 32-bit value to encode
  1483 + */
  1484 +static SPH_INLINE void
  1485 +sph_enc32le(void *dst, sph_u32 val)
  1486 +{
  1487 +#if defined SPH_UPTR
  1488 +#if SPH_UNALIGNED
  1489 +#if SPH_BIG_ENDIAN
  1490 + val = sph_bswap32(val);
  1491 +#endif
  1492 + *(sph_u32 *)dst = val;
  1493 +#else
  1494 + if (((SPH_UPTR)dst & 3) == 0) {
  1495 +#if SPH_BIG_ENDIAN
  1496 + val = sph_bswap32(val);
  1497 +#endif
  1498 + *(sph_u32 *)dst = val;
  1499 + } else {
  1500 + ((unsigned char *)dst)[0] = val;
  1501 + ((unsigned char *)dst)[1] = (val >> 8);
  1502 + ((unsigned char *)dst)[2] = (val >> 16);
  1503 + ((unsigned char *)dst)[3] = (val >> 24);
  1504 + }
  1505 +#endif
  1506 +#else
  1507 + ((unsigned char *)dst)[0] = val;
  1508 + ((unsigned char *)dst)[1] = (val >> 8);
  1509 + ((unsigned char *)dst)[2] = (val >> 16);
  1510 + ((unsigned char *)dst)[3] = (val >> 24);
  1511 +#endif
  1512 +}
  1513 +
  1514 +/**
  1515 + * Encode a 32-bit value into the provided buffer (little endian convention).
  1516 + * The destination buffer must be properly aligned.
  1517 + *
  1518 + * @param dst the destination buffer (32-bit aligned)
  1519 + * @param val the value to encode
  1520 + */
  1521 +static SPH_INLINE void
  1522 +sph_enc32le_aligned(void *dst, sph_u32 val)
  1523 +{
  1524 +#if SPH_LITTLE_ENDIAN
  1525 + *(sph_u32 *)dst = val;
  1526 +#elif SPH_BIG_ENDIAN
  1527 + *(sph_u32 *)dst = sph_bswap32(val);
  1528 +#else
  1529 + ((unsigned char *)dst)[0] = val;
  1530 + ((unsigned char *)dst)[1] = (val >> 8);
  1531 + ((unsigned char *)dst)[2] = (val >> 16);
  1532 + ((unsigned char *)dst)[3] = (val >> 24);
  1533 +#endif
  1534 +}
  1535 +
  1536 +/**
  1537 + * Decode a 32-bit value from the provided buffer (little endian convention).
  1538 + *
  1539 + * @param src the source buffer
  1540 + * @return the decoded value
  1541 + */
  1542 +static SPH_INLINE sph_u32
  1543 +sph_dec32le(const void *src)
  1544 +{
  1545 +#if defined SPH_UPTR
  1546 +#if SPH_UNALIGNED
  1547 +#if SPH_BIG_ENDIAN
  1548 + return sph_bswap32(*(const sph_u32 *)src);
  1549 +#else
  1550 + return *(const sph_u32 *)src;
  1551 +#endif
  1552 +#else
  1553 + if (((SPH_UPTR)src & 3) == 0) {
  1554 +#if SPH_BIG_ENDIAN
  1555 +#if SPH_SPARCV9_GCC && !SPH_NO_ASM
  1556 + sph_u32 tmp;
  1557 +
  1558 + /*
  1559 + * "__volatile__" is needed here because without it,
  1560 + * gcc-3.4.3 miscompiles the code and performs the
  1561 + * access before the test on the address, thus triggering
  1562 + * a bus error...
  1563 + */
  1564 + __asm__ __volatile__ (
  1565 + "lda [%1]0x88,%0" : "=r" (tmp) : "r" (src));
  1566 + return tmp;
  1567 +/*
  1568 + * On PowerPC, this turns out not to be worth the effort: the inline
  1569 + * assembly makes GCC optimizer uncomfortable, which tends to nullify
  1570 + * the decoding gains.
  1571 + *
  1572 + * For most hash functions, using this inline assembly trick changes
  1573 + * hashing speed by less than 5% and often _reduces_ it. The biggest
  1574 + * gains are for MD4 (+11%) and CubeHash (+30%). For all others, it is
  1575 + * less then 10%. The speed gain on CubeHash is probably due to the
  1576 + * chronic shortage of registers that CubeHash endures; for the other
  1577 + * functions, the generic code appears to be efficient enough already.
  1578 + *
  1579 +#elif (SPH_PPC32_GCC || SPH_PPC64_GCC) && !SPH_NO_ASM
  1580 + sph_u32 tmp;
  1581 +
  1582 + __asm__ __volatile__ (
  1583 + "lwbrx %0,0,%1" : "=r" (tmp) : "r" (src));
  1584 + return tmp;
  1585 + */
  1586 +#else
  1587 + return sph_bswap32(*(const sph_u32 *)src);
  1588 +#endif
  1589 +#else
  1590 + return *(const sph_u32 *)src;
  1591 +#endif
  1592 + } else {
  1593 + return (sph_u32)(((const unsigned char *)src)[0])
  1594 + | ((sph_u32)(((const unsigned char *)src)[1]) << 8)
  1595 + | ((sph_u32)(((const unsigned char *)src)[2]) << 16)
  1596 + | ((sph_u32)(((const unsigned char *)src)[3]) << 24);
  1597 + }
  1598 +#endif
  1599 +#else
  1600 + return (sph_u32)(((const unsigned char *)src)[0])
  1601 + | ((sph_u32)(((const unsigned char *)src)[1]) << 8)
  1602 + | ((sph_u32)(((const unsigned char *)src)[2]) << 16)
  1603 + | ((sph_u32)(((const unsigned char *)src)[3]) << 24);
  1604 +#endif
  1605 +}
  1606 +
  1607 +/**
  1608 + * Decode a 32-bit value from the provided buffer (little endian convention).
  1609 + * The source buffer must be properly aligned.
  1610 + *
  1611 + * @param src the source buffer (32-bit aligned)
  1612 + * @return the decoded value
  1613 + */
  1614 +static SPH_INLINE sph_u32
  1615 +sph_dec32le_aligned(const void *src)
  1616 +{
  1617 +#if SPH_LITTLE_ENDIAN
  1618 + return *(const sph_u32 *)src;
  1619 +#elif SPH_BIG_ENDIAN
  1620 +#if SPH_SPARCV9_GCC && !SPH_NO_ASM
  1621 + sph_u32 tmp;
  1622 +
  1623 + __asm__ __volatile__ ("lda [%1]0x88,%0" : "=r" (tmp) : "r" (src));
  1624 + return tmp;
  1625 +/*
  1626 + * Not worth it generally.
  1627 + *
  1628 +#elif (SPH_PPC32_GCC || SPH_PPC64_GCC) && !SPH_NO_ASM
  1629 + sph_u32 tmp;
  1630 +
  1631 + __asm__ __volatile__ ("lwbrx %0,0,%1" : "=r" (tmp) : "r" (src));
  1632 + return tmp;
  1633 + */
  1634 +#else
  1635 + return sph_bswap32(*(const sph_u32 *)src);
  1636 +#endif
  1637 +#else
  1638 + return (sph_u32)(((const unsigned char *)src)[0])
  1639 + | ((sph_u32)(((const unsigned char *)src)[1]) << 8)
  1640 + | ((sph_u32)(((const unsigned char *)src)[2]) << 16)
  1641 + | ((sph_u32)(((const unsigned char *)src)[3]) << 24);
  1642 +#endif
  1643 +}
  1644 +
  1645 +#if SPH_64
  1646 +
  1647 +/**
  1648 + * Encode a 64-bit value into the provided buffer (big endian convention).
  1649 + *
  1650 + * @param dst the destination buffer
  1651 + * @param val the 64-bit value to encode
  1652 + */
  1653 +static SPH_INLINE void
  1654 +sph_enc64be(void *dst, sph_u64 val)
  1655 +{
  1656 +#if defined SPH_UPTR
  1657 +#if SPH_UNALIGNED
  1658 +#if SPH_LITTLE_ENDIAN
  1659 + val = sph_bswap64(val);
  1660 +#endif
  1661 + *(sph_u64 *)dst = val;
  1662 +#else
  1663 + if (((SPH_UPTR)dst & 7) == 0) {
  1664 +#if SPH_LITTLE_ENDIAN
  1665 + val = sph_bswap64(val);
  1666 +#endif
  1667 + *(sph_u64 *)dst = val;
  1668 + } else {
  1669 + ((unsigned char *)dst)[0] = (val >> 56);
  1670 + ((unsigned char *)dst)[1] = (val >> 48);
  1671 + ((unsigned char *)dst)[2] = (val >> 40);
  1672 + ((unsigned char *)dst)[3] = (val >> 32);
  1673 + ((unsigned char *)dst)[4] = (val >> 24);
  1674 + ((unsigned char *)dst)[5] = (val >> 16);
  1675 + ((unsigned char *)dst)[6] = (val >> 8);
  1676 + ((unsigned char *)dst)[7] = val;
  1677 + }
  1678 +#endif
  1679 +#else
  1680 + ((unsigned char *)dst)[0] = (val >> 56);
  1681 + ((unsigned char *)dst)[1] = (val >> 48);
  1682 + ((unsigned char *)dst)[2] = (val >> 40);
  1683 + ((unsigned char *)dst)[3] = (val >> 32);
  1684 + ((unsigned char *)dst)[4] = (val >> 24);
  1685 + ((unsigned char *)dst)[5] = (val >> 16);
  1686 + ((unsigned char *)dst)[6] = (val >> 8);
  1687 + ((unsigned char *)dst)[7] = val;
  1688 +#endif
  1689 +}
  1690 +
  1691 +/**
  1692 + * Encode a 64-bit value into the provided buffer (big endian convention).
  1693 + * The destination buffer must be properly aligned.
  1694 + *
  1695 + * @param dst the destination buffer (64-bit aligned)
  1696 + * @param val the value to encode
  1697 + */
  1698 +static SPH_INLINE void
  1699 +sph_enc64be_aligned(void *dst, sph_u64 val)
  1700 +{
  1701 +#if SPH_LITTLE_ENDIAN
  1702 + *(sph_u64 *)dst = sph_bswap64(val);
  1703 +#elif SPH_BIG_ENDIAN
  1704 + *(sph_u64 *)dst = val;
  1705 +#else
  1706 + ((unsigned char *)dst)[0] = (val >> 56);
  1707 + ((unsigned char *)dst)[1] = (val >> 48);
  1708 + ((unsigned char *)dst)[2] = (val >> 40);
  1709 + ((unsigned char *)dst)[3] = (val >> 32);
  1710 + ((unsigned char *)dst)[4] = (val >> 24);
  1711 + ((unsigned char *)dst)[5] = (val >> 16);
  1712 + ((unsigned char *)dst)[6] = (val >> 8);
  1713 + ((unsigned char *)dst)[7] = val;
  1714 +#endif
  1715 +}
  1716 +
  1717 +/**
  1718 + * Decode a 64-bit value from the provided buffer (big endian convention).
  1719 + *
  1720 + * @param src the source buffer
  1721 + * @return the decoded value
  1722 + */
  1723 +static SPH_INLINE sph_u64
  1724 +sph_dec64be(const void *src)
  1725 +{
  1726 +#if defined SPH_UPTR
  1727 +#if SPH_UNALIGNED
  1728 +#if SPH_LITTLE_ENDIAN
  1729 + return sph_bswap64(*(const sph_u64 *)src);
  1730 +#else
  1731 + return *(const sph_u64 *)src;
  1732 +#endif
  1733 +#else
  1734 + if (((SPH_UPTR)src & 7) == 0) {
  1735 +#if SPH_LITTLE_ENDIAN
  1736 + return sph_bswap64(*(const sph_u64 *)src);
  1737 +#else
  1738 + return *(const sph_u64 *)src;
  1739 +#endif
  1740 + } else {
  1741 + return ((sph_u64)(((const unsigned char *)src)[0]) << 56)
  1742 + | ((sph_u64)(((const unsigned char *)src)[1]) << 48)
  1743 + | ((sph_u64)(((const unsigned char *)src)[2]) << 40)
  1744 + | ((sph_u64)(((const unsigned char *)src)[3]) << 32)
  1745 + | ((sph_u64)(((const unsigned char *)src)[4]) << 24)
  1746 + | ((sph_u64)(((const unsigned char *)src)[5]) << 16)
  1747 + | ((sph_u64)(((const unsigned char *)src)[6]) << 8)
  1748 + | (sph_u64)(((const unsigned char *)src)[7]);
  1749 + }
  1750 +#endif
  1751 +#else
  1752 + return ((sph_u64)(((const unsigned char *)src)[0]) << 56)
  1753 + | ((sph_u64)(((const unsigned char *)src)[1]) << 48)
  1754 + | ((sph_u64)(((const unsigned char *)src)[2]) << 40)
  1755 + | ((sph_u64)(((const unsigned char *)src)[3]) << 32)
  1756 + | ((sph_u64)(((const unsigned char *)src)[4]) << 24)
  1757 + | ((sph_u64)(((const unsigned char *)src)[5]) << 16)
  1758 + | ((sph_u64)(((const unsigned char *)src)[6]) << 8)
  1759 + | (sph_u64)(((const unsigned char *)src)[7]);
  1760 +#endif
  1761 +}
  1762 +
  1763 +/**
  1764 + * Decode a 64-bit value from the provided buffer (big endian convention).
  1765 + * The source buffer must be properly aligned.
  1766 + *
  1767 + * @param src the source buffer (64-bit aligned)
  1768 + * @return the decoded value
  1769 + */
  1770 +static SPH_INLINE sph_u64
  1771 +sph_dec64be_aligned(const void *src)
  1772 +{
  1773 +#if SPH_LITTLE_ENDIAN
  1774 + return sph_bswap64(*(const sph_u64 *)src);
  1775 +#elif SPH_BIG_ENDIAN
  1776 + return *(const sph_u64 *)src;
  1777 +#else
  1778 + return ((sph_u64)(((const unsigned char *)src)[0]) << 56)
  1779 + | ((sph_u64)(((const unsigned char *)src)[1]) << 48)
  1780 + | ((sph_u64)(((const unsigned char *)src)[2]) << 40)
  1781 + | ((sph_u64)(((const unsigned char *)src)[3]) << 32)
  1782 + | ((sph_u64)(((const unsigned char *)src)[4]) << 24)
  1783 + | ((sph_u64)(((const unsigned char *)src)[5]) << 16)
  1784 + | ((sph_u64)(((const unsigned char *)src)[6]) << 8)
  1785 + | (sph_u64)(((const unsigned char *)src)[7]);
  1786 +#endif
  1787 +}
  1788 +
  1789 +/**
  1790 + * Encode a 64-bit value into the provided buffer (little endian convention).
  1791 + *
  1792 + * @param dst the destination buffer
  1793 + * @param val the 64-bit value to encode
  1794 + */
  1795 +static SPH_INLINE void
  1796 +sph_enc64le(void *dst, sph_u64 val)
  1797 +{
  1798 +#if defined SPH_UPTR
  1799 +#if SPH_UNALIGNED
  1800 +#if SPH_BIG_ENDIAN
  1801 + val = sph_bswap64(val);
  1802 +#endif
  1803 + *(sph_u64 *)dst = val;
  1804 +#else
  1805 + if (((SPH_UPTR)dst & 7) == 0) {
  1806 +#if SPH_BIG_ENDIAN
  1807 + val = sph_bswap64(val);
  1808 +#endif
  1809 + *(sph_u64 *)dst = val;
  1810 + } else {
  1811 + ((unsigned char *)dst)[0] = val;
  1812 + ((unsigned char *)dst)[1] = (val >> 8);
  1813 + ((unsigned char *)dst)[2] = (val >> 16);
  1814 + ((unsigned char *)dst)[3] = (val >> 24);
  1815 + ((unsigned char *)dst)[4] = (val >> 32);
  1816 + ((unsigned char *)dst)[5] = (val >> 40);
  1817 + ((unsigned char *)dst)[6] = (val >> 48);
  1818 + ((unsigned char *)dst)[7] = (val >> 56);
  1819 + }
  1820 +#endif
  1821 +#else
  1822 + ((unsigned char *)dst)[0] = val;
  1823 + ((unsigned char *)dst)[1] = (val >> 8);
  1824 + ((unsigned char *)dst)[2] = (val >> 16);
  1825 + ((unsigned char *)dst)[3] = (val >> 24);
  1826 + ((unsigned char *)dst)[4] = (val >> 32);
  1827 + ((unsigned char *)dst)[5] = (val >> 40);
  1828 + ((unsigned char *)dst)[6] = (val >> 48);
  1829 + ((unsigned char *)dst)[7] = (val >> 56);
  1830 +#endif
  1831 +}
  1832 +
  1833 +/**
  1834 + * Encode a 64-bit value into the provided buffer (little endian convention).
  1835 + * The destination buffer must be properly aligned.
  1836 + *
  1837 + * @param dst the destination buffer (64-bit aligned)
  1838 + * @param val the value to encode
  1839 + */
  1840 +static SPH_INLINE void
  1841 +sph_enc64le_aligned(void *dst, sph_u64 val)
  1842 +{
  1843 +#if SPH_LITTLE_ENDIAN
  1844 + *(sph_u64 *)dst = val;
  1845 +#elif SPH_BIG_ENDIAN
  1846 + *(sph_u64 *)dst = sph_bswap64(val);
  1847 +#else
  1848 + ((unsigned char *)dst)[0] = val;
  1849 + ((unsigned char *)dst)[1] = (val >> 8);
  1850 + ((unsigned char *)dst)[2] = (val >> 16);
  1851 + ((unsigned char *)dst)[3] = (val >> 24);
  1852 + ((unsigned char *)dst)[4] = (val >> 32);
  1853 + ((unsigned char *)dst)[5] = (val >> 40);
  1854 + ((unsigned char *)dst)[6] = (val >> 48);
  1855 + ((unsigned char *)dst)[7] = (val >> 56);
  1856 +#endif
  1857 +}
  1858 +
  1859 +/**
  1860 + * Decode a 64-bit value from the provided buffer (little endian convention).
  1861 + *
  1862 + * @param src the source buffer
  1863 + * @return the decoded value
  1864 + */
  1865 +static SPH_INLINE sph_u64
  1866 +sph_dec64le(const void *src)
  1867 +{
  1868 +#if defined SPH_UPTR
  1869 +#if SPH_UNALIGNED
  1870 +#if SPH_BIG_ENDIAN
  1871 + return sph_bswap64(*(const sph_u64 *)src);
  1872 +#else
  1873 + return *(const sph_u64 *)src;
  1874 +#endif
  1875 +#else
  1876 + if (((SPH_UPTR)src & 7) == 0) {
  1877 +#if SPH_BIG_ENDIAN
  1878 +#if SPH_SPARCV9_GCC_64 && !SPH_NO_ASM
  1879 + sph_u64 tmp;
  1880 +
  1881 + __asm__ __volatile__ (
  1882 + "ldxa [%1]0x88,%0" : "=r" (tmp) : "r" (src));
  1883 + return tmp;
  1884 +/*
  1885 + * Not worth it generally.
  1886 + *
  1887 +#elif SPH_PPC32_GCC && !SPH_NO_ASM
  1888 + return (sph_u64)sph_dec32le_aligned(src)
  1889 + | ((sph_u64)sph_dec32le_aligned(
  1890 + (const char *)src + 4) << 32);
  1891 +#elif SPH_PPC64_GCC && !SPH_NO_ASM
  1892 + sph_u64 tmp;
  1893 +
  1894 + __asm__ __volatile__ (
  1895 + "ldbrx %0,0,%1" : "=r" (tmp) : "r" (src));
  1896 + return tmp;
  1897 + */
  1898 +#else
  1899 + return sph_bswap64(*(const sph_u64 *)src);
  1900 +#endif
  1901 +#else
  1902 + return *(const sph_u64 *)src;
  1903 +#endif
  1904 + } else {
  1905 + return (sph_u64)(((const unsigned char *)src)[0])
  1906 + | ((sph_u64)(((const unsigned char *)src)[1]) << 8)
  1907 + | ((sph_u64)(((const unsigned char *)src)[2]) << 16)
  1908 + | ((sph_u64)(((const unsigned char *)src)[3]) << 24)
  1909 + | ((sph_u64)(((const unsigned char *)src)[4]) << 32)
  1910 + | ((sph_u64)(((const unsigned char *)src)[5]) << 40)
  1911 + | ((sph_u64)(((const unsigned char *)src)[6]) << 48)
  1912 + | ((sph_u64)(((const unsigned char *)src)[7]) << 56);
  1913 + }
  1914 +#endif
  1915 +#else
  1916 + return (sph_u64)(((const unsigned char *)src)[0])
  1917 + | ((sph_u64)(((const unsigned char *)src)[1]) << 8)
  1918 + | ((sph_u64)(((const unsigned char *)src)[2]) << 16)
  1919 + | ((sph_u64)(((const unsigned char *)src)[3]) << 24)
  1920 + | ((sph_u64)(((const unsigned char *)src)[4]) << 32)
  1921 + | ((sph_u64)(((const unsigned char *)src)[5]) << 40)
  1922 + | ((sph_u64)(((const unsigned char *)src)[6]) << 48)
  1923 + | ((sph_u64)(((const unsigned char *)src)[7]) << 56);
  1924 +#endif
  1925 +}
  1926 +
  1927 +/**
  1928 + * Decode a 64-bit value from the provided buffer (little endian convention).
  1929 + * The source buffer must be properly aligned.
  1930 + *
  1931 + * @param src the source buffer (64-bit aligned)
  1932 + * @return the decoded value
  1933 + */
  1934 +static SPH_INLINE sph_u64
  1935 +sph_dec64le_aligned(const void *src)
  1936 +{
  1937 +#if SPH_LITTLE_ENDIAN
  1938 + return *(const sph_u64 *)src;
  1939 +#elif SPH_BIG_ENDIAN
  1940 +#if SPH_SPARCV9_GCC_64 && !SPH_NO_ASM
  1941 + sph_u64 tmp;
  1942 +
  1943 + __asm__ __volatile__ ("ldxa [%1]0x88,%0" : "=r" (tmp) : "r" (src));
  1944 + return tmp;
  1945 +/*
  1946 + * Not worth it generally.
  1947 + *
  1948 +#elif SPH_PPC32_GCC && !SPH_NO_ASM
  1949 + return (sph_u64)sph_dec32le_aligned(src)
  1950 + | ((sph_u64)sph_dec32le_aligned((const char *)src + 4) << 32);
  1951 +#elif SPH_PPC64_GCC && !SPH_NO_ASM
  1952 + sph_u64 tmp;
  1953 +
  1954 + __asm__ __volatile__ ("ldbrx %0,0,%1" : "=r" (tmp) : "r" (src));
  1955 + return tmp;
  1956 + */
  1957 +#else
  1958 + return sph_bswap64(*(const sph_u64 *)src);
  1959 +#endif
  1960 +#else
  1961 + return (sph_u64)(((const unsigned char *)src)[0])
  1962 + | ((sph_u64)(((const unsigned char *)src)[1]) << 8)
  1963 + | ((sph_u64)(((const unsigned char *)src)[2]) << 16)
  1964 + | ((sph_u64)(((const unsigned char *)src)[3]) << 24)
  1965 + | ((sph_u64)(((const unsigned char *)src)[4]) << 32)
  1966 + | ((sph_u64)(((const unsigned char *)src)[5]) << 40)
  1967 + | ((sph_u64)(((const unsigned char *)src)[6]) << 48)
  1968 + | ((sph_u64)(((const unsigned char *)src)[7]) << 56);
  1969 +#endif
  1970 +}
  1971 +
  1972 +#endif
  1973 +
  1974 +#endif /* Doxygen excluded block */
  1975 +
  1976 +#endif
... ...