diff --git a/Makefile.am b/Makefile.am index 0d389dd33..7933a2a13 100644 --- a/Makefile.am +++ b/Makefile.am @@ -32,9 +32,11 @@ cpuminer_SOURCES = \ sha3/sph_cubehash.c \ sha3/sph_simd.c \ sha3/sph_echo.c \ + sha3/sph_fugue.c \ sha3/sph_hamsi.c \ sha3/sph_haval.c \ - sha3/sph_fugue.c \ + sha3/sph_panama.c \ + sha3/sph_radiogatun.c \ sha3/sph_ripemd.c \ sha3/sph_sha2.c \ sha3/sph_sha2big.c \ @@ -107,6 +109,7 @@ cpuminer_SOURCES = \ algo/x15.c \ algo/x16r.c \ algo/x16s.c \ + algo/x20r.c \ algo/x17.c \ algo/xevan.c \ algo/yescrypt.c \ diff --git a/algo/x20r.c b/algo/x20r.c new file mode 100644 index 000000000..fc5c10468 --- /dev/null +++ b/algo/x20r.c @@ -0,0 +1,258 @@ +#include "miner.h" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +enum Algo { + BLAKE = 0, + BMW, + GROESTL, + JH, + KECCAK, + SKEIN, + LUFFA, + CUBEHASH, + SHAVITE, + SIMD, + ECHO, + HAMSI, + FUGUE, + SHABAL, + WHIRLPOOL, + SHA512, + HAVAL, + GOST, + RADIOGATUN, + PANAMA, + HASH_FUNC_COUNT +}; + +static __thread uint32_t s_ntime = UINT32_MAX; +static __thread char hashOrder[HASH_FUNC_COUNT + 1] = { 0 }; + +static void getAlgoString(const uint8_t* prevblock, char *output) +{ + char *sptr = output; + + for (int j = 0; j < HASH_FUNC_COUNT; j++) { + char b = (19 - j) >> 1; // 16 ascii hex chars, reversed + uint8_t algoDigit = (j & 1) ? prevblock[b] & 0xF : prevblock[b] >> 4; + if (algoDigit >= 10) + sprintf(sptr, "%c", 'A' + (algoDigit - 10)); + else + sprintf(sptr, "%u", (uint32_t) algoDigit); + sptr++; + } + *sptr = '\0'; +} + +void x20r_hash(void* output, const void* input) +{ + uint32_t _ALIGN(128) hash[64/4]; + + sph_blake512_context ctx_blake; + sph_bmw512_context ctx_bmw; + sph_groestl512_context ctx_groestl; + sph_skein512_context ctx_skein; + sph_jh512_context ctx_jh; + sph_keccak512_context ctx_keccak; + sph_luffa512_context ctx_luffa; + sph_cubehash512_context ctx_cubehash; + sph_shavite512_context ctx_shavite; + sph_simd512_context ctx_simd; + sph_echo512_context ctx_echo; + sph_hamsi512_context ctx_hamsi; + sph_fugue512_context ctx_fugue; + sph_shabal512_context ctx_shabal; + sph_whirlpool_context ctx_whirlpool; + sph_sha512_context ctx_sha512; + sph_haval256_5_context ctx_haval; + sph_gost512_context ctx_gost; + sph_radiogatun64_context ctx_radiogatun; + sph_panama_context ctx_panama; + + void *in = (void*) input; + int size = 80; + + if (s_ntime == UINT32_MAX) { + const uint8_t* in8 = (uint8_t*) input; + getAlgoString(&in8[4], hashOrder); + } + + for (int i = 0; i < 20; i++) + { + const char elem = hashOrder[i]; + const uint8_t algo = elem >= 'A' ? elem - 'A' + 10 : elem - '0'; + + switch (algo) { + case BLAKE: + sph_blake512_init(&ctx_blake); + sph_blake512(&ctx_blake, in, size); + sph_blake512_close(&ctx_blake, hash); + break; + case BMW: + sph_bmw512_init(&ctx_bmw); + sph_bmw512(&ctx_bmw, in, size); + sph_bmw512_close(&ctx_bmw, hash); + break; + case GROESTL: + sph_groestl512_init(&ctx_groestl); + sph_groestl512(&ctx_groestl, in, size); + sph_groestl512_close(&ctx_groestl, hash); + break; + case SKEIN: + sph_skein512_init(&ctx_skein); + sph_skein512(&ctx_skein, in, size); + sph_skein512_close(&ctx_skein, hash); + break; + case JH: + sph_jh512_init(&ctx_jh); + sph_jh512(&ctx_jh, in, size); + sph_jh512_close(&ctx_jh, hash); + break; + case KECCAK: + sph_keccak512_init(&ctx_keccak); + sph_keccak512(&ctx_keccak, in, size); + sph_keccak512_close(&ctx_keccak, hash); + break; + case LUFFA: + sph_luffa512_init(&ctx_luffa); + sph_luffa512(&ctx_luffa, in, size); + sph_luffa512_close(&ctx_luffa, hash); + break; + case CUBEHASH: + sph_cubehash512_init(&ctx_cubehash); + sph_cubehash512(&ctx_cubehash, in, size); + sph_cubehash512_close(&ctx_cubehash, hash); + break; + case SHAVITE: + sph_shavite512_init(&ctx_shavite); + sph_shavite512(&ctx_shavite, in, size); + sph_shavite512_close(&ctx_shavite, hash); + break; + case SIMD: + sph_simd512_init(&ctx_simd); + sph_simd512(&ctx_simd, in, size); + sph_simd512_close(&ctx_simd, hash); + break; + case ECHO: + sph_echo512_init(&ctx_echo); + sph_echo512(&ctx_echo, in, size); + sph_echo512_close(&ctx_echo, hash); + break; + case HAMSI: + sph_hamsi512_init(&ctx_hamsi); + sph_hamsi512(&ctx_hamsi, in, size); + sph_hamsi512_close(&ctx_hamsi, hash); + break; + case FUGUE: + sph_fugue512_init(&ctx_fugue); + sph_fugue512(&ctx_fugue, in, size); + sph_fugue512_close(&ctx_fugue, hash); + break; + case SHABAL: + sph_shabal512_init(&ctx_shabal); + sph_shabal512(&ctx_shabal, in, size); + sph_shabal512_close(&ctx_shabal, hash); + break; + case WHIRLPOOL: + sph_whirlpool_init(&ctx_whirlpool); + sph_whirlpool(&ctx_whirlpool, in, size); + sph_whirlpool_close(&ctx_whirlpool, hash); + break; + case SHA512: + sph_sha512_init(&ctx_sha512); + sph_sha512(&ctx_sha512,(const void*) in, size); + sph_sha512_close(&ctx_sha512,(void*) hash); + break; + case HAVAL: + sph_haval256_5_init(&ctx_haval); + sph_haval256_5(&ctx_haval, in, size); + sph_haval256_5_close(&ctx_haval, hash); + break; + case GOST: + sph_gost512_init(&ctx_gost); + sph_gost512(&ctx_gost, in, size); + sph_gost512_close(&ctx_gost, hash); + break; + case RADIOGATUN: + sph_radiogatun64_init(&ctx_radiogatun); + sph_radiogatun64(&ctx_radiogatun, in, size); + sph_radiogatun64_close(&ctx_radiogatun, hash); + break; + case PANAMA: + sph_panama_init(&ctx_panama); + sph_panama(&ctx_panama, in, size); + sph_panama_close(&ctx_panama, hash); + break; + } + in = (void*) hash; + size = 64; + } + memcpy(output, hash, 32); +} + +int scanhash_x20r(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done) +{ + uint32_t _ALIGN(128) hash32[8]; + uint32_t _ALIGN(128) endiandata[20]; + uint32_t *pdata = work->data; + uint32_t *ptarget = work->target; + const uint32_t Htarg = ptarget[7]; + const uint32_t first_nonce = pdata[19]; + uint32_t nonce = first_nonce; + volatile uint8_t *restart = &(work_restart[thr_id].restart); + + for (int k=0; k < 19; k++) + be32enc(&endiandata[k], pdata[k]); + + if (s_ntime != pdata[17]) { + uint32_t ntime = swab32(pdata[17]); + getAlgoString((const char*) (&endiandata[1]), hashOrder); + s_ntime = ntime; + if (opt_debug && !thr_id) applog(LOG_DEBUG, "hash order %s (%08x)", hashOrder, ntime); + } + + if (opt_benchmark) + ptarget[7] = 0x0cff; + + do { + be32enc(&endiandata[19], nonce); + x20r_hash(hash32, endiandata); + + if (hash32[7] <= Htarg && fulltest(hash32, ptarget)) { + work_set_target_ratio(work, hash32); + pdata[19] = nonce; + *hashes_done = pdata[19] - first_nonce; + return 1; + } + nonce++; + + } while (nonce < max_nonce && !(*restart)); + + pdata[19] = nonce; + *hashes_done = pdata[19] - first_nonce + 1; + return 0; +} diff --git a/cpu-miner.c b/cpu-miner.c index 8c538e988..30a3eabb5 100644 --- a/cpu-miner.c +++ b/cpu-miner.c @@ -134,6 +134,7 @@ enum algos { ALGO_X16R, ALGO_X16S, ALGO_X17, /* X17 */ + ALGO_X20R, ALGO_XEVAN, ALGO_YESCRYPT, ALGO_ZR5, @@ -198,6 +199,7 @@ static const char *algo_names[] = { "x16r", "x16s", "x17", + "x20r", "xevan", "yescrypt", "zr5", @@ -359,6 +361,7 @@ Options:\n\ x16r X16R (Raven)\n\ x16s X16S (Pigeon)\n\ x17 X17\n\ + x20r X20R\n\ xevan Xevan (BitSend)\n\ yescrypt Yescrypt\n\ zr5 ZR5\n\ @@ -1853,6 +1856,7 @@ static void stratum_gen_work(struct stratum_ctx *sctx, struct work *work) case ALGO_XEVAN: case ALGO_X16R: case ALGO_X16S: + case ALGO_X20R: work_set_target(work, sctx->job.diff / (256.0 * opt_diff_factor)); break; case ALGO_KECCAK: @@ -2205,6 +2209,7 @@ static void *miner_thread(void *userdata) case ALGO_X16R: case ALGO_X16S: case ALGO_X17: + case ALGO_X20R: case ALGO_ZR5: max64 = 0x1ffff; break; @@ -2404,6 +2409,9 @@ static void *miner_thread(void *userdata) case ALGO_X16R: rc = scanhash_x16r(thr_id, &work, max_nonce, &hashes_done); break; + case ALGO_X20R: + rc = scanhash_x20r(thr_id, &work, max_nonce, &hashes_done); + break; case ALGO_X16S: rc = scanhash_x16s(thr_id, &work, max_nonce, &hashes_done); break; diff --git a/cpuminer.vcxproj b/cpuminer.vcxproj index 0a4d4ec71..12005153f 100644 --- a/cpuminer.vcxproj +++ b/cpuminer.vcxproj @@ -289,6 +289,8 @@ true + + @@ -358,6 +360,8 @@ + + diff --git a/cpuminer.vcxproj.filters b/cpuminer.vcxproj.filters index dd46391b7..22d53c034 100644 --- a/cpuminer.vcxproj.filters +++ b/cpuminer.vcxproj.filters @@ -46,6 +46,12 @@ sph + + sph + + + sph + sph @@ -303,6 +309,9 @@ algo + + algo + algo @@ -395,6 +404,12 @@ sph + + sph + + + sph + sph diff --git a/miner.h b/miner.h index 9ecbb88a6..897ec7ba5 100644 --- a/miner.h +++ b/miner.h @@ -256,6 +256,7 @@ int scanhash_x15(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *ha int scanhash_x16r(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done); int scanhash_x16s(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done); int scanhash_x17(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done); +int scanhash_x20r(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done); int scanhash_xevan(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done); int scanhash_yescrypt(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done); int scanhash_zr5(int thr_id, struct work *work, uint32_t max_nonce, uint64_t *hashes_done); @@ -550,6 +551,7 @@ void x15hash(void *output, const void *input); void x16r_hash(void *output, const void *input); void x16s_hash(void *output, const void *input); void x17hash(void *output, const void *input); +void x20r_hash(void *output, const void *input); void zr5hash(void *output, const void *input); void yescrypthash(void *output, const void *input); void zr5hash_pok(void *output, uint32_t *pdata); diff --git a/sha3/sph_panama.c b/sha3/sph_panama.c new file mode 100644 index 000000000..f3c27c77a --- /dev/null +++ b/sha3/sph_panama.c @@ -0,0 +1,334 @@ +/* $Id: panama.c 216 2010-06-08 09:46:57Z tp $ */ +/* + * PANAMA implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + +#include +#include + +#include "sph_panama.h" + +#define LVAR17(b) sph_u32 \ + b ## 0, b ## 1, b ## 2, b ## 3, b ## 4, b ## 5, \ + b ## 6, b ## 7, b ## 8, b ## 9, b ## 10, b ## 11, \ + b ## 12, b ## 13, b ## 14, b ## 15, b ## 16; + +#define LVARS \ + LVAR17(a) \ + LVAR17(g) \ + LVAR17(p) \ + LVAR17(t) + +#define M17(macro) do { \ + macro( 0, 1, 2, 4); \ + macro( 1, 2, 3, 5); \ + macro( 2, 3, 4, 6); \ + macro( 3, 4, 5, 7); \ + macro( 4, 5, 6, 8); \ + macro( 5, 6, 7, 9); \ + macro( 6, 7, 8, 10); \ + macro( 7, 8, 9, 11); \ + macro( 8, 9, 10, 12); \ + macro( 9, 10, 11, 13); \ + macro(10, 11, 12, 14); \ + macro(11, 12, 13, 15); \ + macro(12, 13, 14, 16); \ + macro(13, 14, 15, 0); \ + macro(14, 15, 16, 1); \ + macro(15, 16, 0, 2); \ + macro(16, 0, 1, 3); \ + } while (0) + +#define BUPDATE1(n0, n2) do { \ + sc->buffer[ptr24][n0] ^= sc->buffer[ptr31][n2]; \ + sc->buffer[ptr31][n2] ^= INW1(n2); \ + } while (0) + +#define BUPDATE do { \ + BUPDATE1(0, 2); \ + BUPDATE1(1, 3); \ + BUPDATE1(2, 4); \ + BUPDATE1(3, 5); \ + BUPDATE1(4, 6); \ + BUPDATE1(5, 7); \ + BUPDATE1(6, 0); \ + BUPDATE1(7, 1); \ + } while (0) + +#define RSTATE(n0, n1, n2, n4) (a ## n0 = sc->state[n0]) + +#define WSTATE(n0, n1, n2, n4) (sc->state[n0] = a ## n0) + +#define GAMMA(n0, n1, n2, n4) \ + (g ## n0 = a ## n0 ^ (a ## n1 | SPH_T32(~a ## n2))) + +#define PI_ALL do { \ + p0 = g0; \ + p1 = SPH_ROTL32( g7, 1); \ + p2 = SPH_ROTL32(g14, 3); \ + p3 = SPH_ROTL32( g4, 6); \ + p4 = SPH_ROTL32(g11, 10); \ + p5 = SPH_ROTL32( g1, 15); \ + p6 = SPH_ROTL32( g8, 21); \ + p7 = SPH_ROTL32(g15, 28); \ + p8 = SPH_ROTL32( g5, 4); \ + p9 = SPH_ROTL32(g12, 13); \ + p10 = SPH_ROTL32( g2, 23); \ + p11 = SPH_ROTL32( g9, 2); \ + p12 = SPH_ROTL32(g16, 14); \ + p13 = SPH_ROTL32( g6, 27); \ + p14 = SPH_ROTL32(g13, 9); \ + p15 = SPH_ROTL32( g3, 24); \ + p16 = SPH_ROTL32(g10, 8); \ + } while (0) + +#define THETA(n0, n1, n2, n4) \ + (t ## n0 = p ## n0 ^ p ## n1 ^ p ## n4) + +#define SIGMA_ALL do { \ + a0 = t0 ^ 1; \ + a1 = t1 ^ INW2(0); \ + a2 = t2 ^ INW2(1); \ + a3 = t3 ^ INW2(2); \ + a4 = t4 ^ INW2(3); \ + a5 = t5 ^ INW2(4); \ + a6 = t6 ^ INW2(5); \ + a7 = t7 ^ INW2(6); \ + a8 = t8 ^ INW2(7); \ + a9 = t9 ^ sc->buffer[ptr16][0]; \ + a10 = t10 ^ sc->buffer[ptr16][1]; \ + a11 = t11 ^ sc->buffer[ptr16][2]; \ + a12 = t12 ^ sc->buffer[ptr16][3]; \ + a13 = t13 ^ sc->buffer[ptr16][4]; \ + a14 = t14 ^ sc->buffer[ptr16][5]; \ + a15 = t15 ^ sc->buffer[ptr16][6]; \ + a16 = t16 ^ sc->buffer[ptr16][7]; \ + } while (0) + +#define PANAMA_STEP do { \ + unsigned ptr16, ptr24, ptr31; \ + \ + ptr24 = (ptr0 - 8) & 31; \ + ptr31 = (ptr0 - 1) & 31; \ + BUPDATE; \ + M17(GAMMA); \ + PI_ALL; \ + M17(THETA); \ + ptr16 = ptr0 ^ 16; \ + SIGMA_ALL; \ + ptr0 = ptr31; \ + } while (0) + +/* + * These macros are used to compute + */ +#define INC0 1 +#define INC1 2 +#define INC2 3 +#define INC3 4 +#define INC4 5 +#define INC5 6 +#define INC6 7 +#define INC7 8 + +/* + * Push data by blocks of 32 bytes. "pbuf" must be 32-bit aligned. Each + * iteration processes 32 data bytes; "num" contains the number of + * iterations. + */ +static void +panama_push(sph_panama_context *sc, const unsigned char *pbuf, size_t num) +{ + LVARS + unsigned ptr0; +#if SPH_LITTLE_FAST +#define INW1(i) sph_dec32le_aligned(pbuf + 4 * (i)) +#else + sph_u32 X_var[8]; +#define INW1(i) X_var[i] +#endif +#define INW2(i) INW1(i) + + M17(RSTATE); + ptr0 = sc->buffer_ptr; + while (num -- > 0) { +#if !SPH_LITTLE_FAST + int i; + + for (i = 0; i < 8; i ++) + X_var[i] = sph_dec32le_aligned(pbuf + 4 * (i)); +#endif + PANAMA_STEP; + pbuf = (const unsigned char *)pbuf + 32; + } + M17(WSTATE); + sc->buffer_ptr = ptr0; + +#undef INW1 +#undef INW2 +} + +/* + * Perform the "pull" operation repeatedly ("num" times). The hash output + * will be extracted from the state afterwards. + */ +static void +panama_pull(sph_panama_context *sc, unsigned num) +{ + LVARS + unsigned ptr0; +#define INW1(i) INW_H1(INC ## i) +#define INW_H1(i) INW_H2(i) +#define INW_H2(i) a ## i +#define INW2(i) sc->buffer[ptr4][i] + + M17(RSTATE); + ptr0 = sc->buffer_ptr; + while (num -- > 0) { + unsigned ptr4; + + ptr4 = (ptr0 + 4) & 31; + PANAMA_STEP; + } + M17(WSTATE); + +#undef INW1 +#undef INW_H1 +#undef INW_H2 +#undef INW2 +} + +/* see sph_panama.h */ +void +sph_panama_init(void *cc) +{ + sph_panama_context *sc; + + sc = cc; + /* + * This is not completely conformant, but "it will work + * everywhere". Initial state consists of zeroes everywhere. + * Conceptually, the sph_u32 type may have padding bits which + * must not be set to 0; but such an architecture remains to + * be seen. + */ + sc->data_ptr = 0; + memset(sc->buffer, 0, sizeof sc->buffer); + sc->buffer_ptr = 0; + memset(sc->state, 0, sizeof sc->state); +} + +#ifdef SPH_UPTR +static void +panama_short(void *cc, const void *data, size_t len) +#else +void +sph_panama(void *cc, const void *data, size_t len) +#endif +{ + sph_panama_context *sc; + unsigned current; + + sc = cc; + current = sc->data_ptr; + while (len > 0) { + unsigned clen; + + clen = (sizeof sc->data) - current; + if (clen > len) + clen = len; + memcpy(sc->data + current, data, clen); + data = (const unsigned char *)data + clen; + len -= clen; + current += clen; + if (current == sizeof sc->data) { + current = 0; + panama_push(sc, sc->data, 1); + } + } + sc->data_ptr = current; +} + +#ifdef SPH_UPTR +/* see sph_panama.h */ +void +sph_panama(void *cc, const void *data, size_t len) +{ + sph_panama_context *sc; + unsigned current; + size_t rlen; + + if (len < (2 * sizeof sc->data)) { + panama_short(cc, data, len); + return; + } + sc = cc; + current = sc->data_ptr; + if (current > 0) { + unsigned t; + + t = (sizeof sc->data) - current; + panama_short(sc, data, t); + data = (const unsigned char *)data + t; + len -= t; + } +#if !SPH_UNALIGNED + if (((SPH_UPTR)data & 3) != 0) { + panama_short(sc, data, len); + return; + } +#endif + panama_push(sc, data, len >> 5); + rlen = len & 31; + if (rlen > 0) + memcpy(sc->data, + (const unsigned char *)data + len - rlen, rlen); + sc->data_ptr = rlen; +} +#endif + +/* see sph_panama.h */ +void +sph_panama_close(void *cc, void *dst) +{ + sph_panama_context *sc; + unsigned current; + int i; + + sc = cc; + current = sc->data_ptr; + sc->data[current ++] = 0x01; + memset(sc->data + current, 0, (sizeof sc->data) - current); + panama_push(sc, sc->data, 1); + panama_pull(sc, 32); + for (i = 0; i < 8; i ++) + sph_enc32le((unsigned char *)dst + 4 * i, sc->state[i + 9]); + sph_panama_init(sc); +} diff --git a/sha3/sph_panama.h b/sha3/sph_panama.h new file mode 100644 index 000000000..e4dc1073a --- /dev/null +++ b/sha3/sph_panama.h @@ -0,0 +1,118 @@ +/* $Id: sph_panama.h 154 2010-04-26 17:00:24Z tp $ */ +/** + * PANAMA interface. + * + * PANAMA has been published in: J. Daemen and C. Clapp, "Fast Hashing + * and Stream Encryption with PANAMA", Fast Software Encryption - + * FSE'98, LNCS 1372, Springer (1998), pp. 60--74. + * + * PANAMA is not fully defined with regards to endianness and related + * topics. This implementation follows strict little-endian conventions: + *
    + *
  • Each 32-byte input block is split into eight 32-bit words, the + * first (leftmost) word being numbered 0.
  • + *
  • Each such 32-bit word is decoded from memory in little-endian + * convention.
  • + *
  • The additional padding bit equal to "1" is added by considering + * the least significant bit in a byte to come first; practically, this + * means that a single byte of value 0x01 is appended to the (byte-oriented) + * message, and then 0 to 31 bytes of value 0x00.
  • + *
  • The output consists of eight 32-bit words; the word numbered 0 is + * written first (in leftmost position) and it is encoded in little-endian + * convention. + *
+ * With these conventions, PANAMA is sometimes known as "PANAMA-LE". The + * PANAMA reference implementation uses our conventions for input, but + * prescribes no convention for output. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @file sph_panama.h + * @author Thomas Pornin + */ + +#ifndef SPH_PANAMA_H__ +#define SPH_PANAMA_H__ + +#include +#include "sph_types.h" + +/** + * Output size (in bits) for PANAMA. + */ +#define SPH_SIZE_panama 256 + +/** + * This structure is a context for PANAMA computations: it contains the + * intermediate values and some data from the last entered block. Once + * a PANAMA computation has been performed, the context can be reused for + * another computation. + * + * The contents of this structure are private. A running PANAMA computation + * can be cloned by copying the context (e.g. with a simple + * memcpy()). + */ +typedef struct { +#ifndef DOXYGEN_IGNORE + unsigned char data[32]; /* first field, for alignment */ + unsigned data_ptr; + + sph_u32 buffer[32][8]; + unsigned buffer_ptr; + + sph_u32 state[17]; +#endif +} sph_panama_context; + +/** + * Initialize a PANAMA context. This process performs no memory allocation. + * + * @param cc the PANAMA context (pointer to a sph_panama_context) + */ +void sph_panama_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the PANAMA context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_panama(void *cc, const void *data, size_t len); + +/** + * Terminate the current PANAMA computation and output the result into the + * provided buffer. The destination buffer must be wide enough to + * accomodate the result (32 bytes). The context is automatically + * reinitialized. + * + * @param cc the PANAMA context + * @param dst the destination buffer + */ +void sph_panama_close(void *cc, void *dst); + +#endif diff --git a/sha3/sph_radiogatun.c b/sha3/sph_radiogatun.c new file mode 100644 index 000000000..888b028f9 --- /dev/null +++ b/sha3/sph_radiogatun.c @@ -0,0 +1,1003 @@ +/* $Id: radiogatun.c 226 2010-06-16 17:28:08Z tp $ */ +/* + * RadioGatun implementation. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + +#include +#include + +#include "sph_radiogatun.h" + +#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_RADIOGATUN +#define SPH_SMALL_FOOTPRINT_RADIOGATUN 1 +#endif + +/* ======================================================================= */ +/* + * The core macros. We want to unroll 13 successive rounds so that the + * belt rotation becomes pure routing, solved at compilation time, with + * no unnecessary copying. We also wish all state variables to be + * independant local variables, so that the C compiler becomes free to + * map these on registers at it sees fit. This requires some heavy + * preprocessor trickeries, including a full addition macro modulo 13. + * + * These macros are size-independent. Some macros must be defined before + * use: + * WT evaluates to the type for a word (32-bit or 64-bit) + * T truncates a value to the proper word size + * ROR(x, n) right rotation of a word x, with explicit modular + * reduction of the rotation count n by the word size + * INW(i, j) input word j (0, 1, or 2) of block i (0 to 12) + * + * For INW, the input buffer is pointed to by "buf" which has type + * "const unsigned char *". + */ + +#define MUL19(action) do { \ + action(0); \ + action(1); \ + action(2); \ + action(3); \ + action(4); \ + action(5); \ + action(6); \ + action(7); \ + action(8); \ + action(9); \ + action(10); \ + action(11); \ + action(12); \ + action(13); \ + action(14); \ + action(15); \ + action(16); \ + action(17); \ + action(18); \ + } while (0) + +#define DECL19(b) b ## 0, b ## 1, b ## 2, b ## 3, b ## 4, b ## 5, \ + b ## 6, b ## 7, b ## 8, b ## 9, b ## 10, b ## 11, \ + b ## 12, b ## 13, b ## 14, b ## 15, b ## 16, \ + b ## 17, b ## 18 + +#define M19_T7(i) M19_T7_(i) +#define M19_T7_(i) M19_T7_ ## i +#define M19_T7_0 0 +#define M19_T7_1 7 +#define M19_T7_2 14 +#define M19_T7_3 2 +#define M19_T7_4 9 +#define M19_T7_5 16 +#define M19_T7_6 4 +#define M19_T7_7 11 +#define M19_T7_8 18 +#define M19_T7_9 6 +#define M19_T7_10 13 +#define M19_T7_11 1 +#define M19_T7_12 8 +#define M19_T7_13 15 +#define M19_T7_14 3 +#define M19_T7_15 10 +#define M19_T7_16 17 +#define M19_T7_17 5 +#define M19_T7_18 12 + +#define M19_A1(i) M19_A1_(i) +#define M19_A1_(i) M19_A1_ ## i +#define M19_A1_0 1 +#define M19_A1_1 2 +#define M19_A1_2 3 +#define M19_A1_3 4 +#define M19_A1_4 5 +#define M19_A1_5 6 +#define M19_A1_6 7 +#define M19_A1_7 8 +#define M19_A1_8 9 +#define M19_A1_9 10 +#define M19_A1_10 11 +#define M19_A1_11 12 +#define M19_A1_12 13 +#define M19_A1_13 14 +#define M19_A1_14 15 +#define M19_A1_15 16 +#define M19_A1_16 17 +#define M19_A1_17 18 +#define M19_A1_18 0 + +#define M19_A2(i) M19_A2_(i) +#define M19_A2_(i) M19_A2_ ## i +#define M19_A2_0 2 +#define M19_A2_1 3 +#define M19_A2_2 4 +#define M19_A2_3 5 +#define M19_A2_4 6 +#define M19_A2_5 7 +#define M19_A2_6 8 +#define M19_A2_7 9 +#define M19_A2_8 10 +#define M19_A2_9 11 +#define M19_A2_10 12 +#define M19_A2_11 13 +#define M19_A2_12 14 +#define M19_A2_13 15 +#define M19_A2_14 16 +#define M19_A2_15 17 +#define M19_A2_16 18 +#define M19_A2_17 0 +#define M19_A2_18 1 + +#define M19_A4(i) M19_A4_(i) +#define M19_A4_(i) M19_A4_ ## i +#define M19_A4_0 4 +#define M19_A4_1 5 +#define M19_A4_2 6 +#define M19_A4_3 7 +#define M19_A4_4 8 +#define M19_A4_5 9 +#define M19_A4_6 10 +#define M19_A4_7 11 +#define M19_A4_8 12 +#define M19_A4_9 13 +#define M19_A4_10 14 +#define M19_A4_11 15 +#define M19_A4_12 16 +#define M19_A4_13 17 +#define M19_A4_14 18 +#define M19_A4_15 0 +#define M19_A4_16 1 +#define M19_A4_17 2 +#define M19_A4_18 3 + +#define ACC_a(i) ACC_a_(i) +#define ACC_a_(i) a ## i +#define ACC_atmp(i) ACC_atmp_(i) +#define ACC_atmp_(i) atmp ## i + +#define MILL1(i) (atmp ## i = a ## i ^ T(ACC_a(M19_A1(i)) \ + | ~ACC_a(M19_A2(i)))) +#define MILL2(i) (a ## i = ROR(ACC_atmp(M19_T7(i)), ((i * (i + 1)) >> 1))) +#define MILL3(i) (atmp ## i = a ## i ^ ACC_a(M19_A1(i)) ^ ACC_a(M19_A4(i))) +#define MILL4(i) (a ## i = atmp ## i ^ (i == 0)) + +#define MILL do { \ + WT DECL19(atmp); \ + MUL19(MILL1); \ + MUL19(MILL2); \ + MUL19(MILL3); \ + MUL19(MILL4); \ + } while (0) + +#define DECL13(b) b ## 0 ## _0, b ## 0 ## _1, b ## 0 ## _2, \ + b ## 1 ## _0, b ## 1 ## _1, b ## 1 ## _2, \ + b ## 2 ## _0, b ## 2 ## _1, b ## 2 ## _2, \ + b ## 3 ## _0, b ## 3 ## _1, b ## 3 ## _2, \ + b ## 4 ## _0, b ## 4 ## _1, b ## 4 ## _2, \ + b ## 5 ## _0, b ## 5 ## _1, b ## 5 ## _2, \ + b ## 6 ## _0, b ## 6 ## _1, b ## 6 ## _2, \ + b ## 7 ## _0, b ## 7 ## _1, b ## 7 ## _2, \ + b ## 8 ## _0, b ## 8 ## _1, b ## 8 ## _2, \ + b ## 9 ## _0, b ## 9 ## _1, b ## 9 ## _2, \ + b ## 10 ## _0, b ## 10 ## _1, b ## 10 ## _2, \ + b ## 11 ## _0, b ## 11 ## _1, b ## 11 ## _2, \ + b ## 12 ## _0, b ## 12 ## _1, b ## 12 ## _2 + +#define M13_A(i, j) M13_A_(i, j) +#define M13_A_(i, j) M13_A_ ## i ## _ ## j +#define M13_A_0_0 0 +#define M13_A_0_1 1 +#define M13_A_0_2 2 +#define M13_A_0_3 3 +#define M13_A_0_4 4 +#define M13_A_0_5 5 +#define M13_A_0_6 6 +#define M13_A_0_7 7 +#define M13_A_0_8 8 +#define M13_A_0_9 9 +#define M13_A_0_10 10 +#define M13_A_0_11 11 +#define M13_A_0_12 12 +#define M13_A_1_0 1 +#define M13_A_1_1 2 +#define M13_A_1_2 3 +#define M13_A_1_3 4 +#define M13_A_1_4 5 +#define M13_A_1_5 6 +#define M13_A_1_6 7 +#define M13_A_1_7 8 +#define M13_A_1_8 9 +#define M13_A_1_9 10 +#define M13_A_1_10 11 +#define M13_A_1_11 12 +#define M13_A_1_12 0 +#define M13_A_2_0 2 +#define M13_A_2_1 3 +#define M13_A_2_2 4 +#define M13_A_2_3 5 +#define M13_A_2_4 6 +#define M13_A_2_5 7 +#define M13_A_2_6 8 +#define M13_A_2_7 9 +#define M13_A_2_8 10 +#define M13_A_2_9 11 +#define M13_A_2_10 12 +#define M13_A_2_11 0 +#define M13_A_2_12 1 +#define M13_A_3_0 3 +#define M13_A_3_1 4 +#define M13_A_3_2 5 +#define M13_A_3_3 6 +#define M13_A_3_4 7 +#define M13_A_3_5 8 +#define M13_A_3_6 9 +#define M13_A_3_7 10 +#define M13_A_3_8 11 +#define M13_A_3_9 12 +#define M13_A_3_10 0 +#define M13_A_3_11 1 +#define M13_A_3_12 2 +#define M13_A_4_0 4 +#define M13_A_4_1 5 +#define M13_A_4_2 6 +#define M13_A_4_3 7 +#define M13_A_4_4 8 +#define M13_A_4_5 9 +#define M13_A_4_6 10 +#define M13_A_4_7 11 +#define M13_A_4_8 12 +#define M13_A_4_9 0 +#define M13_A_4_10 1 +#define M13_A_4_11 2 +#define M13_A_4_12 3 +#define M13_A_5_0 5 +#define M13_A_5_1 6 +#define M13_A_5_2 7 +#define M13_A_5_3 8 +#define M13_A_5_4 9 +#define M13_A_5_5 10 +#define M13_A_5_6 11 +#define M13_A_5_7 12 +#define M13_A_5_8 0 +#define M13_A_5_9 1 +#define M13_A_5_10 2 +#define M13_A_5_11 3 +#define M13_A_5_12 4 +#define M13_A_6_0 6 +#define M13_A_6_1 7 +#define M13_A_6_2 8 +#define M13_A_6_3 9 +#define M13_A_6_4 10 +#define M13_A_6_5 11 +#define M13_A_6_6 12 +#define M13_A_6_7 0 +#define M13_A_6_8 1 +#define M13_A_6_9 2 +#define M13_A_6_10 3 +#define M13_A_6_11 4 +#define M13_A_6_12 5 +#define M13_A_7_0 7 +#define M13_A_7_1 8 +#define M13_A_7_2 9 +#define M13_A_7_3 10 +#define M13_A_7_4 11 +#define M13_A_7_5 12 +#define M13_A_7_6 0 +#define M13_A_7_7 1 +#define M13_A_7_8 2 +#define M13_A_7_9 3 +#define M13_A_7_10 4 +#define M13_A_7_11 5 +#define M13_A_7_12 6 +#define M13_A_8_0 8 +#define M13_A_8_1 9 +#define M13_A_8_2 10 +#define M13_A_8_3 11 +#define M13_A_8_4 12 +#define M13_A_8_5 0 +#define M13_A_8_6 1 +#define M13_A_8_7 2 +#define M13_A_8_8 3 +#define M13_A_8_9 4 +#define M13_A_8_10 5 +#define M13_A_8_11 6 +#define M13_A_8_12 7 +#define M13_A_9_0 9 +#define M13_A_9_1 10 +#define M13_A_9_2 11 +#define M13_A_9_3 12 +#define M13_A_9_4 0 +#define M13_A_9_5 1 +#define M13_A_9_6 2 +#define M13_A_9_7 3 +#define M13_A_9_8 4 +#define M13_A_9_9 5 +#define M13_A_9_10 6 +#define M13_A_9_11 7 +#define M13_A_9_12 8 +#define M13_A_10_0 10 +#define M13_A_10_1 11 +#define M13_A_10_2 12 +#define M13_A_10_3 0 +#define M13_A_10_4 1 +#define M13_A_10_5 2 +#define M13_A_10_6 3 +#define M13_A_10_7 4 +#define M13_A_10_8 5 +#define M13_A_10_9 6 +#define M13_A_10_10 7 +#define M13_A_10_11 8 +#define M13_A_10_12 9 +#define M13_A_11_0 11 +#define M13_A_11_1 12 +#define M13_A_11_2 0 +#define M13_A_11_3 1 +#define M13_A_11_4 2 +#define M13_A_11_5 3 +#define M13_A_11_6 4 +#define M13_A_11_7 5 +#define M13_A_11_8 6 +#define M13_A_11_9 7 +#define M13_A_11_10 8 +#define M13_A_11_11 9 +#define M13_A_11_12 10 +#define M13_A_12_0 12 +#define M13_A_12_1 0 +#define M13_A_12_2 1 +#define M13_A_12_3 2 +#define M13_A_12_4 3 +#define M13_A_12_5 4 +#define M13_A_12_6 5 +#define M13_A_12_7 6 +#define M13_A_12_8 7 +#define M13_A_12_9 8 +#define M13_A_12_10 9 +#define M13_A_12_11 10 +#define M13_A_12_12 11 + +#define M13_N(i) M13_N_(i) +#define M13_N_(i) M13_N_ ## i +#define M13_N_0 12 +#define M13_N_1 11 +#define M13_N_2 10 +#define M13_N_3 9 +#define M13_N_4 8 +#define M13_N_5 7 +#define M13_N_6 6 +#define M13_N_7 5 +#define M13_N_8 4 +#define M13_N_9 3 +#define M13_N_10 2 +#define M13_N_11 1 +#define M13_N_12 0 + +#define ACC_b(i, k) ACC_b_(i, k) +#define ACC_b_(i, k) b ## i ## _ ## k + +#define ROUND_ELT(k, s) do { \ + if ((bj += 3) == 39) \ + bj = 0; \ + sc->b[bj + s] ^= a ## k; \ + } while (0) + +#define ROUND_SF(j) do { \ + size_t bj = (j) * 3; \ + ROUND_ELT(1, 0); \ + ROUND_ELT(2, 1); \ + ROUND_ELT(3, 2); \ + ROUND_ELT(4, 0); \ + ROUND_ELT(5, 1); \ + ROUND_ELT(6, 2); \ + ROUND_ELT(7, 0); \ + ROUND_ELT(8, 1); \ + ROUND_ELT(9, 2); \ + ROUND_ELT(10, 0); \ + ROUND_ELT(11, 1); \ + ROUND_ELT(12, 2); \ + MILL; \ + bj = (j) * 3; \ + a ## 13 ^= sc->b[bj + 0]; \ + a ## 14 ^= sc->b[bj + 1]; \ + a ## 15 ^= sc->b[bj + 2]; \ + } while (0) + +#define INPUT_SF(j, p0, p1, p2) do { \ + size_t bj = ((j) + 1) * 3; \ + if (bj == 39) \ + bj = 0; \ + sc->b[bj + 0] ^= (p0); \ + sc->b[bj + 1] ^= (p1); \ + sc->b[bj + 2] ^= (p2); \ + a16 ^= (p0); \ + a17 ^= (p1); \ + a18 ^= (p2); \ + } while (0) + + +#if SPH_SMALL_FOOTPRINT_RADIOGATUN + +#define ROUND ROUND_SF +#define INPUT INPUT_SF + +#else + +/* + * Round function R, on base j. The value j is such that B[0] is actually + * b[j] after the initial rotation. On the 13-round macro, j has the + * successive values 12, 11, 10... 1, 0. + */ +#define ROUND(j) do { \ + ACC_b(M13_A(1, j), 0) ^= a ## 1; \ + ACC_b(M13_A(2, j), 1) ^= a ## 2; \ + ACC_b(M13_A(3, j), 2) ^= a ## 3; \ + ACC_b(M13_A(4, j), 0) ^= a ## 4; \ + ACC_b(M13_A(5, j), 1) ^= a ## 5; \ + ACC_b(M13_A(6, j), 2) ^= a ## 6; \ + ACC_b(M13_A(7, j), 0) ^= a ## 7; \ + ACC_b(M13_A(8, j), 1) ^= a ## 8; \ + ACC_b(M13_A(9, j), 2) ^= a ## 9; \ + ACC_b(M13_A(10, j), 0) ^= a ## 10; \ + ACC_b(M13_A(11, j), 1) ^= a ## 11; \ + ACC_b(M13_A(12, j), 2) ^= a ## 12; \ + MILL; \ + a ## 13 ^= ACC_b(j, 0); \ + a ## 14 ^= ACC_b(j, 1); \ + a ## 15 ^= ACC_b(j, 2); \ + } while (0) + +#define INPUT(j, p0, p1, p2) do { \ + ACC_b(M13_A(1, j), 0) ^= (p0); \ + ACC_b(M13_A(1, j), 1) ^= (p1); \ + ACC_b(M13_A(1, j), 2) ^= (p2); \ + a16 ^= (p0); \ + a17 ^= (p1); \ + a18 ^= (p2); \ + } while (0) + +#endif + +#define MUL13(action) do { \ + action(0); \ + action(1); \ + action(2); \ + action(3); \ + action(4); \ + action(5); \ + action(6); \ + action(7); \ + action(8); \ + action(9); \ + action(10); \ + action(11); \ + action(12); \ + } while (0) + +#define MILL_READ_ELT(i) do { \ + a ## i = sc->a[i]; \ + } while (0) + +#define MILL_WRITE_ELT(i) do { \ + sc->a[i] = a ## i; \ + } while (0) + +#define STATE_READ_SF do { \ + MUL19(MILL_READ_ELT); \ + } while (0) + +#define STATE_WRITE_SF do { \ + MUL19(MILL_WRITE_ELT); \ + } while (0) + +#define PUSH13_SF do { \ + WT DECL19(a); \ + const unsigned char *buf; \ + \ + buf = data; \ + STATE_READ_SF; \ + while (len >= sizeof sc->data) { \ + size_t mk; \ + for (mk = 13; mk > 0; mk --) { \ + WT p0 = INW(0, 0); \ + WT p1 = INW(0, 1); \ + WT p2 = INW(0, 2); \ + INPUT_SF(mk - 1, p0, p1, p2); \ + ROUND_SF(mk - 1); \ + buf += (sizeof sc->data) / 13; \ + len -= (sizeof sc->data) / 13; \ + } \ + } \ + STATE_WRITE_SF; \ + return len; \ + } while (0) + +#if SPH_SMALL_FOOTPRINT_RADIOGATUN + +#define STATE_READ STATE_READ_SF +#define STATE_WRITE STATE_WRITE_SF +#define PUSH13 PUSH13_SF + +#else + +#define BELT_READ_ELT(i) do { \ + b ## i ## _0 = sc->b[3 * i + 0]; \ + b ## i ## _1 = sc->b[3 * i + 1]; \ + b ## i ## _2 = sc->b[3 * i + 2]; \ + } while (0) + +#define BELT_WRITE_ELT(i) do { \ + sc->b[3 * i + 0] = b ## i ## _0; \ + sc->b[3 * i + 1] = b ## i ## _1; \ + sc->b[3 * i + 2] = b ## i ## _2; \ + } while (0) + +#define STATE_READ do { \ + MUL13(BELT_READ_ELT); \ + MUL19(MILL_READ_ELT); \ + } while (0) + +#define STATE_WRITE do { \ + MUL13(BELT_WRITE_ELT); \ + MUL19(MILL_WRITE_ELT); \ + } while (0) + +/* + * Input data by chunks of 13*3 blocks. This is the body of the + * radiogatun32_push13() and radiogatun64_push13() functions. + */ +#define PUSH13 do { \ + WT DECL19(a), DECL13(b); \ + const unsigned char *buf; \ + \ + buf = data; \ + STATE_READ; \ + while (len >= sizeof sc->data) { \ + WT p0, p1, p2; \ + MUL13(PUSH13_ELT); \ + buf += sizeof sc->data; \ + len -= sizeof sc->data; \ + } \ + STATE_WRITE; \ + return len; \ + } while (0) + +#define PUSH13_ELT(k) do { \ + p0 = INW(k, 0); \ + p1 = INW(k, 1); \ + p2 = INW(k, 2); \ + INPUT(M13_N(k), p0, p1, p2); \ + ROUND(M13_N(k)); \ + } while (0) + +#endif + +#define BLANK13_SF do { \ + size_t mk = 13; \ + while (mk -- > 0) \ + ROUND_SF(mk); \ + } while (0) + +#define BLANK1_SF do { \ + WT tmp0, tmp1, tmp2; \ + ROUND_SF(12); \ + tmp0 = sc->b[36]; \ + tmp1 = sc->b[37]; \ + tmp2 = sc->b[38]; \ + memmove(sc->b + 3, sc->b, 36 * sizeof sc->b[0]); \ + sc->b[0] = tmp0; \ + sc->b[1] = tmp1; \ + sc->b[2] = tmp2; \ + } while (0) + +#if SPH_SMALL_FOOTPRINT_RADIOGATUN + +#define BLANK13 BLANK13_SF +#define BLANK1 BLANK1_SF + +#else + +/* + * Run 13 blank rounds. This macro expects the "a" and "b" state variables + * to be alread declared. + */ +#define BLANK13 MUL13(BLANK13_ELT) + +#define BLANK13_ELT(k) ROUND(M13_N(k)) + +#define MUL12(action) do { \ + action(0); \ + action(1); \ + action(2); \ + action(3); \ + action(4); \ + action(5); \ + action(6); \ + action(7); \ + action(8); \ + action(9); \ + action(10); \ + action(11); \ + } while (0) + +/* + * Run a single blank round, and physically rotate the belt. This is used + * for the last blank rounds, and the output rounds. This macro expects the + * "a" abd "b" state variables to be already declared. + */ +#define BLANK1 do { \ + WT tmp0, tmp1, tmp2; \ + ROUND(12); \ + tmp0 = b0_0; \ + tmp1 = b0_1; \ + tmp2 = b0_2; \ + MUL12(BLANK1_ELT); \ + b1_0 = tmp0; \ + b1_1 = tmp1; \ + b1_2 = tmp2; \ + } while (0) + +#define BLANK1_ELT(i) do { \ + ACC_b(M13_A(M13_N(i), 1), 0) = ACC_b(M13_N(i), 0); \ + ACC_b(M13_A(M13_N(i), 1), 1) = ACC_b(M13_N(i), 1); \ + ACC_b(M13_A(M13_N(i), 1), 2) = ACC_b(M13_N(i), 2); \ + } while (0) + +#endif + +#define NO_TOKEN + +/* + * Perform padding, then blank rounds, then output some words. This is + * the body of sph_radiogatun32_close() and sph_radiogatun64_close(). + */ +#define CLOSE_SF(width) CLOSE_GEN(width, \ + NO_TOKEN, STATE_READ_SF, BLANK1_SF, BLANK13_SF) + +#if SPH_SMALL_FOOTPRINT_RADIOGATUN +#define CLOSE CLOSE_SF +#else +#define CLOSE(width) CLOSE_GEN(width, \ + WT DECL13(b);, STATE_READ, BLANK1, BLANK13) +#endif + +#define CLOSE_GEN(width, WTb13, state_read, blank1, blank13) do { \ + unsigned ptr, num; \ + unsigned char *out; \ + WT DECL19(a); \ + WTb13 \ + \ + ptr = sc->data_ptr; \ + sc->data[ptr ++] = 0x01; \ + memset(sc->data + ptr, 0, (sizeof sc->data) - ptr); \ + radiogatun ## width ## _push13(sc, sc->data, sizeof sc->data); \ + \ + num = 17; \ + for (;;) { \ + ptr += 3 * (width >> 3); \ + if (ptr > sizeof sc->data) \ + break; \ + num --; \ + } \ + \ + state_read; \ + if (num >= 13) { \ + blank13; \ + num -= 13; \ + } \ + while (num -- > 0) \ + blank1; \ + \ + num = 0; \ + out = dst; \ + for (;;) { \ + OUTW(out, a1); \ + out += width >> 3; \ + OUTW(out, a2); \ + out += width >> 3; \ + num += 2 * (width >> 3); \ + if (num >= 32) \ + break; \ + blank1; \ + } \ + INIT; \ + } while (0) + +/* + * Initialize context structure. + */ +#if SPH_LITTLE_ENDIAN || SPH_BIG_ENDIAN + +#define INIT do { \ + memset(sc->a, 0, sizeof sc->a); \ + memset(sc->b, 0, sizeof sc->b); \ + sc->data_ptr = 0; \ + } while (0) + +#else + +#define INIT do { \ + size_t u; \ + for (u = 0; u < 19; u ++) \ + sc->a[u] = 0; \ + for (u = 0; u < 39; u ++) \ + sc->b[u] = 0; \ + sc->data_ptr = 0; \ + } while (0) + +#endif + +/* ======================================================================= */ +/* + * RadioGatun[32]. + */ + +#if !SPH_NO_RG32 + +#undef WT +#define WT sph_u32 +#undef T +#define T SPH_T32 +#undef ROR +#define ROR(x, n) SPH_T32(((x) << ((32 - (n)) & 31)) | ((x) >> ((n) & 31))) +#undef INW +#define INW(i, j) sph_dec32le_aligned(buf + (4 * (3 * (i) + (j)))) +#undef OUTW +#define OUTW(b, v) sph_enc32le(b, v) + +/* + * Insert data by big chunks of 13*12 = 156 bytes. Returned value is the + * number of remaining bytes (between 0 and 155). This method assumes that + * the input data is suitably aligned. + */ +static size_t +radiogatun32_push13(sph_radiogatun32_context *sc, const void *data, size_t len) +{ + PUSH13; +} + +/* see sph_radiogatun.h */ +void +sph_radiogatun32_init(void *cc) +{ + sph_radiogatun32_context *sc; + + sc = cc; + INIT; +} + +#ifdef SPH_UPTR +static void +radiogatun32_short(void *cc, const void *data, size_t len) +#else +/* see sph_radiogatun.h */ +void +sph_radiogatun32(void *cc, const void *data, size_t len) +#endif +{ + sph_radiogatun32_context *sc; + unsigned ptr; + + sc = cc; + ptr = sc->data_ptr; + while (len > 0) { + size_t clen; + + clen = (sizeof sc->data) - ptr; + if (clen > len) + clen = len; + memcpy(sc->data + ptr, data, clen); + data = (const unsigned char *)data + clen; + len -= clen; + ptr += clen; + if (ptr == sizeof sc->data) { + radiogatun32_push13(sc, sc->data, sizeof sc->data); + ptr = 0; + } + } + sc->data_ptr = ptr; +} + +#ifdef SPH_UPTR +/* see sph_radiogatun.h */ +void +sph_radiogatun32(void *cc, const void *data, size_t len) +{ + sph_radiogatun32_context *sc; + unsigned ptr; + size_t rlen; + + if (len < (2 * sizeof sc->data)) { + radiogatun32_short(cc, data, len); + return; + } + sc = cc; + ptr = sc->data_ptr; + if (ptr > 0) { + unsigned t; + + t = (sizeof sc->data) - ptr; + radiogatun32_short(sc, data, t); + data = (const unsigned char *)data + t; + len -= t; + } +#if !SPH_UNALIGNED + if (((SPH_UPTR)data & 3) != 0) { + radiogatun32_short(sc, data, len); + return; + } +#endif + rlen = radiogatun32_push13(sc, data, len); + memcpy(sc->data, (const unsigned char *)data + len - rlen, rlen); + sc->data_ptr = rlen; +} +#endif + +/* see sph_radiogatun.h */ +void +sph_radiogatun32_close(void *cc, void *dst) +{ + sph_radiogatun32_context *sc; + + sc = cc; + CLOSE(32); +} + +#endif + +/* ======================================================================= */ +/* + * RadioGatun[64]. Compiled only if a 64-bit or more type is available. + */ + +#if SPH_64 + +#if !SPH_NO_RG64 + +#undef WT +#define WT sph_u64 +#undef T +#define T SPH_T64 +#undef ROR +#define ROR(x, n) SPH_T64(((x) << ((64 - (n)) & 63)) | ((x) >> ((n) & 63))) +#undef INW +#define INW(i, j) sph_dec64le_aligned(buf + (8 * (3 * (i) + (j)))) +#undef OUTW +#define OUTW(b, v) sph_enc64le(b, v) + +/* + * On 32-bit x86, register pressure is such that using the small + * footprint version is a net gain (x2 speed), because that variant + * uses fewer local variables. + */ +#if SPH_I386_MSVC || SPH_I386_GCC || defined __i386__ +#undef PUSH13 +#define PUSH13 PUSH13_SF +#undef CLOSE +#define CLOSE CLOSE_SF +#endif + +/* + * Insert data by big chunks of 13*24 = 312 bytes. Returned value is the + * number of remaining bytes (between 0 and 311). This method assumes that + * the input data is suitably aligned. + */ +static size_t +radiogatun64_push13(sph_radiogatun64_context *sc, const void *data, size_t len) +{ + PUSH13; +} + +/* see sph_radiogatun.h */ +void +sph_radiogatun64_init(void *cc) +{ + sph_radiogatun64_context *sc; + + sc = cc; + INIT; +} + +#ifdef SPH_UPTR +static void +radiogatun64_short(void *cc, const void *data, size_t len) +#else +/* see sph_radiogatun.h */ +void +sph_radiogatun64(void *cc, const void *data, size_t len) +#endif +{ + sph_radiogatun64_context *sc; + unsigned ptr; + + sc = cc; + ptr = sc->data_ptr; + while (len > 0) { + size_t clen; + + clen = (sizeof sc->data) - ptr; + if (clen > len) + clen = len; + memcpy(sc->data + ptr, data, clen); + data = (const unsigned char *)data + clen; + len -= clen; + ptr += clen; + if (ptr == sizeof sc->data) { + radiogatun64_push13(sc, sc->data, sizeof sc->data); + ptr = 0; + } + } + sc->data_ptr = ptr; +} + +#ifdef SPH_UPTR +/* see sph_radiogatun.h */ +void +sph_radiogatun64(void *cc, const void *data, size_t len) +{ + sph_radiogatun64_context *sc; + unsigned ptr; + size_t rlen; + + if (len < (2 * sizeof sc->data)) { + radiogatun64_short(cc, data, len); + return; + } + sc = cc; + ptr = sc->data_ptr; + if (ptr > 0) { + unsigned t; + + t = (sizeof sc->data) - ptr; + radiogatun64_short(sc, data, t); + data = (const unsigned char *)data + t; + len -= t; + } +#if !SPH_UNALIGNED + if (((SPH_UPTR)data & 7) != 0) { + radiogatun64_short(sc, data, len); + return; + } +#endif + rlen = radiogatun64_push13(sc, data, len); + memcpy(sc->data, (const unsigned char *)data + len - rlen, rlen); + sc->data_ptr = rlen; +} +#endif + +/* see sph_radiogatun.h */ +void +sph_radiogatun64_close(void *cc, void *dst) +{ + sph_radiogatun64_context *sc; + + sc = cc; + CLOSE(64); +} + +#endif + +#endif diff --git a/sha3/sph_radiogatun.h b/sha3/sph_radiogatun.h new file mode 100644 index 000000000..763839771 --- /dev/null +++ b/sha3/sph_radiogatun.h @@ -0,0 +1,186 @@ +/* $Id: sph_radiogatun.h 226 2010-06-16 17:28:08Z tp $ */ +/** + * RadioGatun interface. + * + * RadioGatun has been published in: G. Bertoni, J. Daemen, M. Peeters + * and G. Van Assche, "RadioGatun, a belt-and-mill hash function", + * presented at the Second Cryptographic Hash Workshop, Santa Barbara, + * August 24-25, 2006. The main Web site, containing that article, the + * reference code and some test vectors, appears to be currently located + * at the following URL: http://radiogatun.noekeon.org/ + * + * The presentation article does not specify endianness or padding. The + * reference code uses the following conventions, which we also apply + * here: + *
    + *
  • The input message is an integral number of sequences of three + * words. Each word is either a 32-bit of 64-bit word (depending on + * the version of RadioGatun).
  • + *
  • Input bytes are decoded into words using little-endian + * convention.
  • + *
  • Padding consists of a single bit of value 1, using little-endian + * convention within bytes (i.e. for a byte-oriented input, a single + * byte of value 0x01 is appended), then enough bits of value 0 to finish + * the current block.
  • + *
  • Output consists of 256 bits. Successive output words are encoded + * with little-endian convention.
  • + *
+ * These conventions are very close to those we use for PANAMA, which is + * a close ancestor or RadioGatun. + * + * RadioGatun is actually a family of functions, depending on some + * internal parameters. We implement here two functions, with a "belt + * length" of 13, a "belt width" of 3, and a "mill length" of 19. The + * RadioGatun[32] version uses 32-bit words, while the RadioGatun[64] + * variant uses 64-bit words. + * + * Strictly speaking, the name "RadioGatun" should use an acute accent + * on the "u", which we omitted here to keep strict ASCII-compatibility + * of this file. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @file sph_radiogatun.h + * @author Thomas Pornin + */ + +#ifndef SPH_RADIOGATUN_H__ +#define SPH_RADIOGATUN_H__ + +#include +#include "sph_types.h" + +/** + * Output size (in bits) for RadioGatun[32]. + */ +#define SPH_SIZE_radiogatun32 256 + +/** + * This structure is a context for RadioGatun[32] computations: it + * contains intermediate values and some data from the last entered + * block. Once a RadioGatun[32] computation has been performed, the + * context can be reused for another computation. + * + * The contents of this structure are private. A running RadioGatun[32] + * computation can be cloned by copying the context (e.g. with a + * simple memcpy()). + */ +typedef struct { +#ifndef DOXYGEN_IGNORE + unsigned char data[156]; /* first field, for alignment */ + unsigned data_ptr; + sph_u32 a[19], b[39]; +#endif +} sph_radiogatun32_context; + +/** + * Initialize a RadioGatun[32] context. This process performs no + * memory allocation. + * + * @param cc the RadioGatun[32] context (pointer to a + * sph_radiogatun32_context) + */ +void sph_radiogatun32_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the RadioGatun[32] context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_radiogatun32(void *cc, const void *data, size_t len); + +/** + * Terminate the current RadioGatun[32] computation and output the + * result into the provided buffer. The destination buffer must be wide + * enough to accomodate the result (32 bytes). The context is + * automatically reinitialized. + * + * @param cc the RadioGatun[32] context + * @param dst the destination buffer + */ +void sph_radiogatun32_close(void *cc, void *dst); + +#if SPH_64 + +/** + * Output size (in bits) for RadioGatun[64]. + */ +#define SPH_SIZE_radiogatun64 256 + +/** + * This structure is a context for RadioGatun[64] computations: it + * contains intermediate values and some data from the last entered + * block. Once a RadioGatun[64] computation has been performed, the + * context can be reused for another computation. + * + * The contents of this structure are private. A running RadioGatun[64] + * computation can be cloned by copying the context (e.g. with a + * simple memcpy()). + */ +typedef struct { +#ifndef DOXYGEN_IGNORE + unsigned char data[312]; /* first field, for alignment */ + unsigned data_ptr; + sph_u64 a[19], b[39]; +#endif +} sph_radiogatun64_context; + +/** + * Initialize a RadioGatun[64] context. This process performs no + * memory allocation. + * + * @param cc the RadioGatun[64] context (pointer to a + * sph_radiogatun64_context) + */ +void sph_radiogatun64_init(void *cc); + +/** + * Process some data bytes. It is acceptable that len is zero + * (in which case this function does nothing). + * + * @param cc the RadioGatun[64] context + * @param data the input data + * @param len the input data length (in bytes) + */ +void sph_radiogatun64(void *cc, const void *data, size_t len); + +/** + * Terminate the current RadioGatun[64] computation and output the + * result into the provided buffer. The destination buffer must be wide + * enough to accomodate the result (32 bytes). The context is + * automatically reinitialized. + * + * @param cc the RadioGatun[64] context + * @param dst the destination buffer + */ +void sph_radiogatun64_close(void *cc, void *dst); + +#endif + +#endif \ No newline at end of file diff --git a/util.c b/util.c index e28f2c9bd..0bdbf7543 100644 --- a/util.c +++ b/util.c @@ -2496,6 +2496,9 @@ void print_hash_tests(void) x17hash(&hash[0], &buf[0]); printpfx("x17", hash); + x20r_hash(&hash[0], &buf[0]); + printpfx("x20r", hash); + yescrypthash(&hash[0], &buf[0]); printpfx("yescrypt", hash);