-
Notifications
You must be signed in to change notification settings - Fork 2
/
thash_shake_robustx2.c
173 lines (144 loc) · 6.2 KB
/
thash_shake_robustx2.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
#include <stdint.h>
#include <string.h>
#include "thash.h"
#include "thashx2.h"
#include "address.h"
#include "params.h"
#include "f1600x2.h"
#include "fips202x2.h"
void thash(unsigned char *out,
const unsigned char *in,
unsigned int inblocks,
const spx_ctx *ctx, uint32_t addr[8]) {
uint32_t addrx2 [2*8] = {
addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], addr[6], addr[7],
addr[0], addr[1], addr[2], addr[3], addr[4], addr[5], addr[6], addr[7]
};
thashx2(out, out, in, in, inblocks, ctx, addrx2);
}
/**
* 2-way parallel version of thash; takes 2x as much input and output
*/
void thashx2(unsigned char *out0,
unsigned char *out1,
const unsigned char *in0,
const unsigned char *in1,
unsigned int inblocks,
const spx_ctx *ctx, uint32_t addrx2[2*8])
{
if (SPX_N <= 32 && (inblocks == 1 || inblocks == 2)) {
/* As we write and read only a few quadwords, it is more efficient to
* build and extract from the twoway SHAKE256 state by hand. */
uint64_t state[50] = {0};
uint64_t state2[50];
for (int i = 0; i < SPX_N/8; i++) {
uint64_t x = load64(ctx->pub_seed + 8*i);
state[2*i] = x;
state[2*i+1] = x;
}
for (int i = 0; i < 4; i++) {
state[2*(SPX_N/8 + i)] = (((uint64_t)addrx2[1+2*i]) << 32)
| (uint64_t)addrx2[2*i];
state[2*(SPX_N/8 + i) + 1] = (((uint64_t)addrx2[8+1+2*i]) << 32)
| (uint64_t)addrx2[8+2*i];
}
/* Domain separator and padding. */
state[2*16] = 0x80ll << 56;
state[2*16+1] = 0x80ll << 56;
state[2*((SPX_N/8)+4)] ^= 0x1f;
state[2*((SPX_N/8)+4)+1] ^= 0x1f;
/* We will permutate state2 with f1600x2 to compute the bitmask,
* but first we'll copy it to state2 which will be used to compute
* the final output, as its input is almost identical. */
memcpy(state2, state, 400);
f1600x2(state);
/* By copying from state, state2 already contains the pub_seed
* and address. We just need to copy in the input blocks xorred with
* the bitmask we just computed. */
for (unsigned int i = 0; i < (SPX_N/8) * inblocks; i++) {
state2[2*(SPX_N/8+4+i)] = state[2*i] ^ load64(in0 + 8*i);
state2[2*(SPX_N/8+4+i)+1] = state[2*i+1] ^ load64(in1 + 8*i);
}
/* Domain separator and start of padding. Note that the quadwords
* around are already zeroed for state from which we copied.
* We do a XOR instead of a set as this might be the 16th quadword
* when N=32 and inblocks=2, which already contains the end
* of the padding. */
state2[2*((SPX_N/8)*(1+inblocks)+4)] ^= 0x1f;
state2[2*((SPX_N/8)*(1+inblocks)+4)+1] ^= 0x1f;
f1600x2(state2);
for (int i = 0; i < SPX_N/8; i++) {
store64(out0 + 8*i, state2[2*i]);
store64(out1 + 8*i, state2[2*i+1]);
}
} else if (SPX_N == 64 && (inblocks == 1 || inblocks == 2)) {
/* As we write and read only a few quadwords, it is more efficient to
* build and extract from the fourway SHAKE256 state by hand. */
uint64_t state[50] = {0};
uint64_t state2[50];
for (int i = 0; i < SPX_N/8; i++) {
uint64_t x = load64(ctx->pub_seed + 8*i);
state[2*i] = x;
state[2*i+1] = x;
}
for (int i = 0; i < 4; i++) {
state[2*(SPX_N/8 + i)] = (((uint64_t)addrx2[1+2*i]) << 32)
| (uint64_t)addrx2[2*i];
state[2*(SPX_N/8 + i) + 1] = (((uint64_t)addrx2[8+1+2*i]) << 32)
| (uint64_t)addrx2[8+2*i];
}
/* Domain separator and padding. */
state[2*16] = 0x80ll << 56;
state[2*16+1] = 0x80ll << 56;
state[2*((SPX_N/8)*+4)] ^= 0x1f;
state[2*((SPX_N/8)*+4)+1] ^= 0x1f;
/* We will permutate state2 with f1600x2 to compute the bitmask,
* but first we'll copy it to state2 which will be used to compute
* the final output, as its input is almost identical. */
memcpy(state2, state, 400);
f1600x2(state);
/* We will won't be able to fit all input in on go.
* By copying from state, state2 already contains the pub_seed
* and address. We just need to copy in the input blocks xorred with
* the bitmask we just computed. */
for (int i = 0; i < 5; i++) {
state2[2*(8+4+i)] = state[2*i] ^ load64(in0 + 8*i);
state2[2*(8+4+i)+1] = state[2*i+1] ^ load64(in1 + 8*i);
}
f1600x2(state2);
/* Final input. */
for (int i = 0; i < 3+8*(inblocks-1); i++) {
state2[2*i] = state2[2*i] ^ state[2*(i+5)] ^ load64(in0 + 8*(i+5));
state2[2*i+1] = state2[2*i+1] ^ state[2*(i+5)+1]
^ load64(in1 + 8*(i+5));
}
/* Domain separator and padding. */
state2[2*(3+8*(inblocks-1))] ^= 0x1f;
state2[2*(3+8*(inblocks-1))+1] ^= 0x1f;
state2[16] ^= 0x80ll << 56;
state2[16] ^= 0x80ll << 56;
f1600x2(state2);
for (int i = 0; i < 8; i++) {
store64(out0 + 8*i, state2[2*i]);
store64(out1 + 8*i, state2[2*i+1]);
}
} else {
unsigned char buf0[SPX_N + SPX_ADDR_BYTES + inblocks*SPX_N];
unsigned char buf1[SPX_N + SPX_ADDR_BYTES + inblocks*SPX_N];
unsigned char bitmask0[inblocks * SPX_N];
unsigned char bitmask1[inblocks * SPX_N];
unsigned int i;
memcpy(buf0, ctx->pub_seed, SPX_N);
memcpy(buf1, ctx->pub_seed, SPX_N);
memcpy(buf0 + SPX_N, addrx2 + 0*8, SPX_ADDR_BYTES);
memcpy(buf1 + SPX_N, addrx2 + 1*8, SPX_ADDR_BYTES);
shake256x2(bitmask0, bitmask1, inblocks * SPX_N,
buf0, buf1, SPX_N + SPX_ADDR_BYTES);
for (i = 0; i < inblocks * SPX_N; i++) {
buf0[SPX_N + SPX_ADDR_BYTES + i] = in0[i] ^ bitmask0[i];
buf1[SPX_N + SPX_ADDR_BYTES + i] = in1[i] ^ bitmask1[i];
}
shake256x2(out0, out1, SPX_N,
buf0, buf1, SPX_N + SPX_ADDR_BYTES + inblocks*SPX_N);
}
}