Skip to content

Commit

Permalink
Improve Dilithium (speed) verification stack usage (#346)
Browse files Browse the repository at this point in the history
* Improve Dilithium (speed) verification stack usage

Once upon a time, we wrote a paper on memory-efficient Dilithium [1]
which included a speed-optimized version of verification that still
included some memory optimizations that don't come at a performance
penalty.

Unfortunately with the update of the reference code to round 3 that
version did not get migrated leading to some complaints about
verification memory consumption.

I finally found some time to port these.
Verficication speed is essentially unchanged, but stack consumption is
much better.

[1] https://eprint.iacr.org/2020/1278.pdf

* update benchmarks
  • Loading branch information
mkannwischer authored Aug 6, 2024
1 parent cda61fb commit f2b698a
Show file tree
Hide file tree
Showing 9 changed files with 289 additions and 64 deletions.
24 changes: 12 additions & 12 deletions benchmarks.csv
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,13 @@ cross-sha3-r-sdpg-1-small (10 executions),ref,290135,287741,297757,102853622,102
cross-sha3-r-sdpg-3-fast (10 executions),ref,627948,625525,637639,43573841,43565461,43582933,27513830,27493024,27525746
cross-sha3-r-sdpg-5-fast (10 executions),ref,1146280,1142409,1153794,93557878,93547167,93566329,59948216,59857434,60043852
dilithium2 (1000 executions),clean,1874167,1827645,1914566,7493877,3321630,40762756,2062795,2062255,2063222
dilithium2 (1000 executions),m4f,1426036,1379636,1466394,3807970,1813656,18528070,1417745,1417203,1418192
dilithium2 (1000 executions),m4f,1425723,1379410,1466445,3835095,1813682,16068642,1421307,1420219,1422056
dilithium2 (1000 executions),m4fstack,1801523,1684895,1902114,12170976,3900911,86281518,3241353,3194028,3281144
dilithium3 (1000 executions),clean,3205551,3204090,3207411,12696585,5097364,74392293,3376992,3376581,3377393
dilithium3 (1000 executions),m4f,2515969,2514498,2517634,5884832,2917322,25268693,2411257,2410858,2411717
dilithium3 (1000 executions),m4f,2515915,2514307,2517413,6054094,2917316,27829552,2415526,2414696,2416440
dilithium3 (1000 executions),m4fstack,3412759,3406659,3419247,23673016,6733971,145803146,5733307,5688893,5778120
dilithium5 (1000 executions),clean,5341477,5286872,5395822,15710371,7953367,75940093,5609679,5609217,5610183
dilithium5 (1000 executions),m4f,4275029,4210286,4329519,7977781,4882524,25936176,4185417,4184925,4185896
dilithium5 (1000 executions),m4f,4275033,4220989,4350945,8349360,4882552,29688762,4192692,4191427,4193764
dilithium5 (1000 executions),m4fstack,5816287,5474236,6115061,33452872,11170780,185259803,9912851,9845789,9981834
falcon-1024 (10 executions),clean,602066436,377135260,1488065363,136241759,136017549,136556585,1678109,1677732,1678566
falcon-1024 (10 executions),m4-ct,408725773,314885208,712370124,87706019,87549942,87839508,990541,984448,997160
Expand Down Expand Up @@ -200,13 +200,13 @@ cross-sha3-r-sdpg-1-small,ref,2328,466400,245512,,,,,,
cross-sha3-r-sdpg-3-fast,ref,4032,205080,108236,,,,,,
cross-sha3-r-sdpg-5-fast,ref,6824,398600,213436,,,,,,
dilithium2,clean,38304,51968,36192,,,,,,
dilithium2,m4f,38296,49416,36220,,,,,,
dilithium2,m4f,38296,49416,9012,,,,,,
dilithium2,m4fstack,4408,5072,2704,,,,,,
dilithium3,clean,60832,79616,57728,,,,,,
dilithium3,m4f,60824,68864,57720,,,,,,
dilithium3,m4f,60824,68864,9880,,,,,,
dilithium3,m4fstack,4408,6608,2704,,,,,,
dilithium5,clean,97696,122724,92940,,,,,,
dilithium5,m4f,97688,116076,92932,,,,,,
dilithium5,m4f,97688,116076,11944,,,,,,
dilithium5,m4fstack,4408,8136,2712,,,,,,
falcon-1024,clean,35076,84604,8776,,,,,,
falcon-1024,m4-ct,1156,2508,376,,,,,,
Expand Down Expand Up @@ -355,13 +355,13 @@ cross-sha3-r-sdpg-1-small,ref,71.8,74.7,78.4,,,,,,
cross-sha3-r-sdpg-3-fast,ref,71.7,68.2,68.7,,,,,,
cross-sha3-r-sdpg-5-fast,ref,71.1,66.1,66.8,,,,,,
dilithium2,clean,61.0,30.9,52.9,,,,,,
dilithium2,m4f,79.9,60.6,76.8,,,,,,
dilithium2,m4f,79.9,60.7,76.6,,,,,,
dilithium2,m4fstack,74.8,55.2,40.8,,,,,,
dilithium3,clean,64.7,31.3,56.8,,,,,,
dilithium3,m4f,82.3,60.3,79.4,,,,,,
dilithium3,m4f,82.3,60.7,79.2,,,,,,
dilithium3,m4fstack,77.1,54.6,41.0,,,,,,
dilithium5,clean,67.0,35.7,61.1,,,,,,
dilithium5,m4f,83.5,65.0,81.7,,,,,,
dilithium5,m4f,83.5,65.3,81.6,,,,,,
dilithium5,m4fstack,76.1,54.5,42.6,,,,,,
falcon-1024,clean,8.9,0.3,23.7,,,,,,
falcon-1024,m4-ct,8.6,0.4,32.2,,,,,,
Expand Down Expand Up @@ -509,13 +509,13 @@ cross-sha3-r-sdpg-1-small,ref,18846,0,208,19054,,,,,
cross-sha3-r-sdpg-3-fast,ref,19689,0,208,19897,,,,,
cross-sha3-r-sdpg-5-fast,ref,18593,0,208,18801,,,,,
dilithium2,clean,8064,0,0,8064,,,,,
dilithium2,m4f,18596,0,0,18596,,,,,
dilithium2,m4f,19180,0,0,19180,,,,,
dilithium2,m4fstack,24184,0,0,24184,,,,,
dilithium3,clean,7580,0,0,7580,,,,,
dilithium3,m4f,18588,0,0,18588,,,,,
dilithium3,m4f,19188,0,0,19188,,,,,
dilithium3,m4fstack,23448,0,0,23448,,,,,
dilithium5,clean,7808,0,0,7808,,,,,
dilithium5,m4f,18468,0,0,18468,,,,,
dilithium5,m4f,19096,0,0,19096,,,,,
dilithium5,m4fstack,23820,0,0,23820,,,,,
falcon-1024,clean,82703,0,0,82703,,,,,
falcon-1024,m4-ct,81825,0,79872,161697,,,,,
Expand Down
24 changes: 12 additions & 12 deletions benchmarks.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,13 @@
| cross-sha3-r-sdpg-3-fast (10 executions) | ref | AVG: 627,948 <br /> MIN: 625,525 <br /> MAX: 637,639 | AVG: 43,573,841 <br /> MIN: 43,565,461 <br /> MAX: 43,582,933 | AVG: 27,513,830 <br /> MIN: 27,493,024 <br /> MAX: 27,525,746 |
| cross-sha3-r-sdpg-5-fast (10 executions) | ref | AVG: 1,146,280 <br /> MIN: 1,142,409 <br /> MAX: 1,153,794 | AVG: 93,557,878 <br /> MIN: 93,547,167 <br /> MAX: 93,566,329 | AVG: 59,948,216 <br /> MIN: 59,857,434 <br /> MAX: 60,043,852 |
| dilithium2 (1000 executions) | clean | AVG: 1,874,167 <br /> MIN: 1,827,645 <br /> MAX: 1,914,566 | AVG: 7,493,877 <br /> MIN: 3,321,630 <br /> MAX: 40,762,756 | AVG: 2,062,795 <br /> MIN: 2,062,255 <br /> MAX: 2,063,222 |
| dilithium2 (1000 executions) | m4f | AVG: 1,426,036 <br /> MIN: 1,379,636 <br /> MAX: 1,466,394 | AVG: 3,807,970 <br /> MIN: 1,813,656 <br /> MAX: 18,528,070 | AVG: 1,417,745 <br /> MIN: 1,417,203 <br /> MAX: 1,418,192 |
| dilithium2 (1000 executions) | m4f | AVG: 1,425,723 <br /> MIN: 1,379,410 <br /> MAX: 1,466,445 | AVG: 3,835,095 <br /> MIN: 1,813,682 <br /> MAX: 16,068,642 | AVG: 1,421,307 <br /> MIN: 1,420,219 <br /> MAX: 1,422,056 |
| dilithium2 (1000 executions) | m4fstack | AVG: 1,801,523 <br /> MIN: 1,684,895 <br /> MAX: 1,902,114 | AVG: 12,170,976 <br /> MIN: 3,900,911 <br /> MAX: 86,281,518 | AVG: 3,241,353 <br /> MIN: 3,194,028 <br /> MAX: 3,281,144 |
| dilithium3 (1000 executions) | clean | AVG: 3,205,551 <br /> MIN: 3,204,090 <br /> MAX: 3,207,411 | AVG: 12,696,585 <br /> MIN: 5,097,364 <br /> MAX: 74,392,293 | AVG: 3,376,992 <br /> MIN: 3,376,581 <br /> MAX: 3,377,393 |
| dilithium3 (1000 executions) | m4f | AVG: 2,515,969 <br /> MIN: 2,514,498 <br /> MAX: 2,517,634 | AVG: 5,884,832 <br /> MIN: 2,917,322 <br /> MAX: 25,268,693 | AVG: 2,411,257 <br /> MIN: 2,410,858 <br /> MAX: 2,411,717 |
| dilithium3 (1000 executions) | m4f | AVG: 2,515,915 <br /> MIN: 2,514,307 <br /> MAX: 2,517,413 | AVG: 6,054,094 <br /> MIN: 2,917,316 <br /> MAX: 27,829,552 | AVG: 2,415,526 <br /> MIN: 2,414,696 <br /> MAX: 2,416,440 |
| dilithium3 (1000 executions) | m4fstack | AVG: 3,412,759 <br /> MIN: 3,406,659 <br /> MAX: 3,419,247 | AVG: 23,673,016 <br /> MIN: 6,733,971 <br /> MAX: 145,803,146 | AVG: 5,733,307 <br /> MIN: 5,688,893 <br /> MAX: 5,778,120 |
| dilithium5 (1000 executions) | clean | AVG: 5,341,477 <br /> MIN: 5,286,872 <br /> MAX: 5,395,822 | AVG: 15,710,371 <br /> MIN: 7,953,367 <br /> MAX: 75,940,093 | AVG: 5,609,679 <br /> MIN: 5,609,217 <br /> MAX: 5,610,183 |
| dilithium5 (1000 executions) | m4f | AVG: 4,275,029 <br /> MIN: 4,210,286 <br /> MAX: 4,329,519 | AVG: 7,977,781 <br /> MIN: 4,882,524 <br /> MAX: 25,936,176 | AVG: 4,185,417 <br /> MIN: 4,184,925 <br /> MAX: 4,185,896 |
| dilithium5 (1000 executions) | m4f | AVG: 4,275,033 <br /> MIN: 4,220,989 <br /> MAX: 4,350,945 | AVG: 8,349,360 <br /> MIN: 4,882,552 <br /> MAX: 29,688,762 | AVG: 4,192,692 <br /> MIN: 4,191,427 <br /> MAX: 4,193,764 |
| dilithium5 (1000 executions) | m4fstack | AVG: 5,816,287 <br /> MIN: 5,474,236 <br /> MAX: 6,115,061 | AVG: 33,452,872 <br /> MIN: 11,170,780 <br /> MAX: 185,259,803 | AVG: 9,912,851 <br /> MIN: 9,845,789 <br /> MAX: 9,981,834 |
| falcon-1024 (10 executions) | clean | AVG: 602,066,436 <br /> MIN: 377,135,260 <br /> MAX: 1,488,065,363 | AVG: 136,241,759 <br /> MIN: 136,017,549 <br /> MAX: 136,556,585 | AVG: 1,678,109 <br /> MIN: 1,677,732 <br /> MAX: 1,678,566 |
| falcon-1024 (10 executions) | m4-ct | AVG: 408,725,773 <br /> MIN: 314,885,208 <br /> MAX: 712,370,124 | AVG: 87,706,019 <br /> MIN: 87,549,942 <br /> MAX: 87,839,508 | AVG: 990,541 <br /> MIN: 984,448 <br /> MAX: 997,160 |
Expand Down Expand Up @@ -204,13 +204,13 @@
| cross-sha3-r-sdpg-3-fast | ref | 4,032 | 205,080 | 108,236 |
| cross-sha3-r-sdpg-5-fast | ref | 6,824 | 398,600 | 213,436 |
| dilithium2 | clean | 38,304 | 51,968 | 36,192 |
| dilithium2 | m4f | 38,296 | 49,416 | 36,220 |
| dilithium2 | m4f | 38,296 | 49,416 | 9,012 |
| dilithium2 | m4fstack | 4,408 | 5,072 | 2,704 |
| dilithium3 | clean | 60,832 | 79,616 | 57,728 |
| dilithium3 | m4f | 60,824 | 68,864 | 57,720 |
| dilithium3 | m4f | 60,824 | 68,864 | 9,880 |
| dilithium3 | m4fstack | 4,408 | 6,608 | 2,704 |
| dilithium5 | clean | 97,696 | 122,724 | 92,940 |
| dilithium5 | m4f | 97,688 | 116,076 | 92,932 |
| dilithium5 | m4f | 97,688 | 116,076 | 11,944 |
| dilithium5 | m4fstack | 4,408 | 8,136 | 2,712 |
| falcon-1024 | clean | 35,076 | 84,604 | 8,776 |
| falcon-1024 | m4-ct | 1,156 | 2,508 | 376 |
Expand Down Expand Up @@ -361,13 +361,13 @@
| cross-sha3-r-sdpg-3-fast | ref | 71.7% | 68.2% | 68.7% |
| cross-sha3-r-sdpg-5-fast | ref | 71.1% | 66.1% | 66.8% |
| dilithium2 | clean | 61.0% | 30.9% | 52.9% |
| dilithium2 | m4f | 79.9% | 60.6% | 76.8% |
| dilithium2 | m4f | 79.9% | 60.7% | 76.6% |
| dilithium2 | m4fstack | 74.8% | 55.2% | 40.8% |
| dilithium3 | clean | 64.7% | 31.3% | 56.8% |
| dilithium3 | m4f | 82.3% | 61.4% | 79.4% |
| dilithium3 | m4f | 82.3% | 60.7% | 79.2% |
| dilithium3 | m4fstack | 77.1% | 54.6% | 41.0% |
| dilithium5 | clean | 67.0% | 35.7% | 61.1% |
| dilithium5 | m4f | 83.5% | 65.0% | 81.7% |
| dilithium5 | m4f | 83.5% | 65.3% | 81.6% |
| dilithium5 | m4fstack | 76.1% | 54.5% | 42.6% |
| falcon-1024 | clean | 8.9% | 0.3% | 23.7% |
| falcon-1024 | m4-ct | 8.6% | 0.4% | 32.2% |
Expand Down Expand Up @@ -517,13 +517,13 @@
| cross-sha3-r-sdpg-3-fast | ref | 19,689 | 0 | 208 | 19,897 |
| cross-sha3-r-sdpg-5-fast | ref | 18,593 | 0 | 208 | 18,801 |
| dilithium2 | clean | 8,064 | 0 | 0 | 8,064 |
| dilithium2 | m4f | 18,596 | 0 | 0 | 18,596 |
| dilithium2 | m4f | 19,180 | 0 | 0 | 19,180 |
| dilithium2 | m4fstack | 24,184 | 0 | 0 | 24,184 |
| dilithium3 | clean | 7,580 | 0 | 0 | 7,580 |
| dilithium3 | m4f | 18,588 | 0 | 0 | 18,588 |
| dilithium3 | m4f | 19,188 | 0 | 0 | 19,188 |
| dilithium3 | m4fstack | 23,448 | 0 | 0 | 23,448 |
| dilithium5 | clean | 7,808 | 0 | 0 | 7,808 |
| dilithium5 | m4f | 18,468 | 0 | 0 | 18,468 |
| dilithium5 | m4f | 19,096 | 0 | 0 | 19,096 |
| dilithium5 | m4fstack | 23,820 | 0 | 0 | 23,820 |
| falcon-1024 | clean | 82,703 | 0 | 0 | 82,703 |
| falcon-1024 | m4-ct | 81,825 | 0 | 79,872 | 161,697 |
Expand Down
106 changes: 105 additions & 1 deletion crypto_sign/dilithium2/m4f/packing.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include "packing.h"
#include "polyvec.h"
#include "poly.h"
#include <stddef.h>

/*************************************************
* Name: pack_pk
Expand Down Expand Up @@ -49,6 +50,21 @@ void unpack_pk(uint8_t rho[SEEDBYTES],
polyt1_unpack(&t1->vec[i], pk + i*POLYT1_PACKEDBYTES);
}

/*************************************************
* Name: unpack_pk_t1
*
* Description: Unpack public key pk = (rho, t1).
*
* Arguments: - const polyvec *t1: pointer to output vector t1
* - const size_t idx: unpack n'th element from t1
* - unsigned char pk[]: byte array containing bit-packed pk
**************************************************/
void unpack_pk_t1(poly *t1, size_t idx, const unsigned char pk[CRYPTO_PUBLICKEYBYTES]) {
pk += SEEDBYTES;
polyt1_unpack(t1, pk + idx * POLYT1_PACKEDBYTES);
}


/*************************************************
* Name: pack_sk
*
Expand Down Expand Up @@ -283,4 +299,92 @@ int unpack_sig(uint8_t c[CTILDEBYTES],
return 1;

return 0;
}
}

/*************************************************
* Name: unpack_sig_c
*
* Description: Unpack only c from signature sig = (z, h, c).
*
* Arguments: - poly *c: pointer to output challenge polynomial
* - const unsigned char sig[]: byte array containing
* bit-packed signature
*
* Returns 1 in case of malformed signature; otherwise 0.
**************************************************/
int unpack_sig_c(uint8_t c[CTILDEBYTES], const unsigned char sig[CRYPTO_BYTES]) {
for(size_t i = 0; i < CTILDEBYTES; ++i)
c[i] = sig[i];
sig += CTILDEBYTES;
return 0;
}

/*************************************************
* Name: unpack_sig_z
*
* Description: Unpack only z from signature sig = (z, h, c).
*
* Arguments: - polyvecl *z: pointer to output vector z
* - const unsigned char sig[]: byte array containing
* bit-packed signature
*
* Returns 1 in case of malformed signature; otherwise 0.
**************************************************/
int unpack_sig_z(polyvecl *z, const unsigned char sig[CRYPTO_BYTES]) {
sig += CTILDEBYTES;
for (size_t i = 0; i < L; ++i) {
polyz_unpack(&z->vec[i], sig + i * POLYZ_PACKEDBYTES);
}
return 0;
}

/*************************************************
* Name: unpack_sig_h
*
* Description: Unpack only h from signature sig = (z, h, c).
*
* Arguments: - polyveck *h: pointer to output hint vector h
* - const unsigned char sig[]: byte array containing
* bit-packed signature
*
* Returns 1 in case of malformed signature; otherwise 0.
**************************************************/
int unpack_sig_h(poly *h, size_t idx, const unsigned char sig[CRYPTO_BYTES]) {
sig += CTILDEBYTES;
sig += L * POLYZ_PACKEDBYTES;

/* Decode h */
size_t k = 0;
for (size_t i = 0; i < K; ++i) {
for (size_t j = 0; j < N; ++j) {
if (i == idx) {
h->coeffs[j] = 0;
}
}

if (sig[OMEGA + i] < k || sig[OMEGA + i] > OMEGA) {
return 1;
}

for (size_t j = k; j < sig[OMEGA + i]; ++j) {
/* Coefficients are ordered for strong unforgeability */
if (j > k && sig[j] <= sig[j - 1]) {
return 1;
}
if (i == idx) {
h->coeffs[sig[j]] = 1;
}
}

k = sig[OMEGA + i];
}

/* Extra indices are zero for strong unforgeability */
for (size_t j = k; j < OMEGA; ++j) {
if (sig[j]) {
return 1;
}
}
return 0;
}

13 changes: 13 additions & 0 deletions crypto_sign/dilithium2/m4f/packing.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define PACKING_H

#include <stdint.h>
#include <stddef.h>
#include "params.h"
#include "polyvec.h"
#include "smallpoly.h"
Expand All @@ -24,6 +25,9 @@ void pack_sig(uint8_t sig[CRYPTO_BYTES], const uint8_t c[CTILDEBYTES], const pol
#define unpack_pk DILITHIUM_NAMESPACE(unpack_pk)
void unpack_pk(uint8_t rho[SEEDBYTES], polyveck *t1, const uint8_t pk[CRYPTO_PUBLICKEYBYTES]);

#define unpack_pk_t1 DILITHIUM_NAMESPACE(unpack_pk_t1)
void unpack_pk_t1(poly *t1, size_t idx, const unsigned char pk[CRYPTO_PUBLICKEYBYTES]);

#define unpack_sk DILITHIUM_NAMESPACE(unpack_sk)
void unpack_sk(uint8_t rho[SEEDBYTES],
uint8_t tr[TRBYTES],
Expand All @@ -36,6 +40,15 @@ void unpack_sk(uint8_t rho[SEEDBYTES],
#define unpack_sig DILITHIUM_NAMESPACE(unpack_sig)
int unpack_sig(uint8_t c[CTILDEBYTES], polyvecl *z, polyveck *h, const uint8_t sig[CRYPTO_BYTES]);


#define unpack_sig_z DILITHIUM_NAMESPACE(unpack_sig_z)
int unpack_sig_z(polyvecl *z, const unsigned char sig[CRYPTO_BYTES]);
#define unpack_sig_h DILITHIUM_NAMESPACE(unpack_sig_h)
int unpack_sig_h(poly *h, size_t idx, const unsigned char sig[CRYPTO_BYTES]);
#define unpack_sig_c DILITHIUM_NAMESPACE(unpack_sig_c)
int unpack_sig_c(uint8_t c[CTILDEBYTES], const unsigned char sig[CRYPTO_BYTES]);


#define pack_sig_c DILITHIUM_NAMESPACE(pack_sig_c)
void pack_sig_c(uint8_t sig[CRYPTO_BYTES], const uint8_t c[CTILDEBYTES]);

Expand Down
12 changes: 12 additions & 0 deletions crypto_sign/dilithium2/m4f/poly.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,18 @@ void poly_caddq(poly *a) {
asm_caddq(a->coeffs);
}

/*************************************************
* Name: poly_csubq
*
* Description: For all coefficients of input polynomial subtract Q if
* coefficient is bigger than Q; add Q if coefficient is negative.
*
* Arguments: - poly *a: pointer to input/output polynomial
**************************************************/
void poly_csubq(poly *a) {
asm_caddq(a->coeffs);
}

#if 0
/*************************************************
* Name: poly_freeze
Expand Down
2 changes: 2 additions & 0 deletions crypto_sign/dilithium2/m4f/poly.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ typedef struct {
void poly_reduce(poly *a);
#define poly_caddq DILITHIUM_NAMESPACE(poly_caddq)
void poly_caddq(poly *a);
#define poly_csubq DILITHIUM_NAMESPACE(poly_csubq)
void poly_csubq(poly *a);
#define poly_freeze DILITHIUM_NAMESPACE(poly_freeze)
void poly_freeze(poly *a);

Expand Down
Loading

0 comments on commit f2b698a

Please sign in to comment.