Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve Dilithium (speed) verification stack usage #346

Merged
merged 2 commits into from
Aug 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 12 additions & 12 deletions benchmarks.csv
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,13 @@ cross-sha3-r-sdpg-1-small (10 executions),ref,290135,287741,297757,102853622,102
cross-sha3-r-sdpg-3-fast (10 executions),ref,627948,625525,637639,43573841,43565461,43582933,27513830,27493024,27525746
cross-sha3-r-sdpg-5-fast (10 executions),ref,1146280,1142409,1153794,93557878,93547167,93566329,59948216,59857434,60043852
dilithium2 (1000 executions),clean,1874167,1827645,1914566,7493877,3321630,40762756,2062795,2062255,2063222
dilithium2 (1000 executions),m4f,1426036,1379636,1466394,3807970,1813656,18528070,1417745,1417203,1418192
dilithium2 (1000 executions),m4f,1425723,1379410,1466445,3835095,1813682,16068642,1421307,1420219,1422056
dilithium2 (1000 executions),m4fstack,1801523,1684895,1902114,12170976,3900911,86281518,3241353,3194028,3281144
dilithium3 (1000 executions),clean,3205551,3204090,3207411,12696585,5097364,74392293,3376992,3376581,3377393
dilithium3 (1000 executions),m4f,2515969,2514498,2517634,5884832,2917322,25268693,2411257,2410858,2411717
dilithium3 (1000 executions),m4f,2515915,2514307,2517413,6054094,2917316,27829552,2415526,2414696,2416440
dilithium3 (1000 executions),m4fstack,3412759,3406659,3419247,23673016,6733971,145803146,5733307,5688893,5778120
dilithium5 (1000 executions),clean,5341477,5286872,5395822,15710371,7953367,75940093,5609679,5609217,5610183
dilithium5 (1000 executions),m4f,4275029,4210286,4329519,7977781,4882524,25936176,4185417,4184925,4185896
dilithium5 (1000 executions),m4f,4275033,4220989,4350945,8349360,4882552,29688762,4192692,4191427,4193764
dilithium5 (1000 executions),m4fstack,5816287,5474236,6115061,33452872,11170780,185259803,9912851,9845789,9981834
falcon-1024 (10 executions),m4-ct,354880005,284902033,635131652,87741288,87506676,87922628,991320,982548,997219
falcon-1024 (10 executions),opt-ct,555202324,284912829,1157528581,87710190,87606677,87841235,993584,983066,997523
Expand Down Expand Up @@ -196,13 +196,13 @@ cross-sha3-r-sdpg-1-small,ref,2328,466400,245512,,,,,,
cross-sha3-r-sdpg-3-fast,ref,4032,205080,108236,,,,,,
cross-sha3-r-sdpg-5-fast,ref,6824,398600,213436,,,,,,
dilithium2,clean,38304,51968,36192,,,,,,
dilithium2,m4f,38296,49416,36220,,,,,,
dilithium2,m4f,38296,49416,9012,,,,,,
dilithium2,m4fstack,4408,5072,2704,,,,,,
dilithium3,clean,60832,79616,57728,,,,,,
dilithium3,m4f,60824,68864,57720,,,,,,
dilithium3,m4f,60824,68864,9880,,,,,,
dilithium3,m4fstack,4408,6608,2704,,,,,,
dilithium5,clean,97696,122724,92940,,,,,,
dilithium5,m4f,97688,116076,92932,,,,,,
dilithium5,m4f,97688,116076,11944,,,,,,
dilithium5,m4fstack,4408,8136,2712,,,,,,
falcon-1024,clean,34988,84604,8784,,,,,,
falcon-1024,m4-ct,1156,2508,376,,,,,,
Expand Down Expand Up @@ -349,13 +349,13 @@ cross-sha3-r-sdpg-1-small,ref,71.8,74.7,78.4,,,,,,
cross-sha3-r-sdpg-3-fast,ref,71.7,68.2,68.7,,,,,,
cross-sha3-r-sdpg-5-fast,ref,71.1,66.1,66.8,,,,,,
dilithium2,clean,61.0,30.9,52.9,,,,,,
dilithium2,m4f,79.9,60.6,76.8,,,,,,
dilithium2,m4f,79.9,60.7,76.6,,,,,,
dilithium2,m4fstack,74.8,55.2,40.8,,,,,,
dilithium3,clean,64.7,31.3,56.8,,,,,,
dilithium3,m4f,82.3,60.3,79.4,,,,,,
dilithium3,m4f,82.3,60.7,79.2,,,,,,
dilithium3,m4fstack,77.1,54.6,41.0,,,,,,
dilithium5,clean,67.0,35.7,61.1,,,,,,
dilithium5,m4f,83.5,65.0,81.7,,,,,,
dilithium5,m4f,83.5,65.3,81.6,,,,,,
dilithium5,m4fstack,76.1,54.5,42.6,,,,,,
falcon-1024,clean,6.5,0.3,23.7,,,,,,
falcon-1024,m4-ct,7.4,0.4,32.4,,,,,,
Expand Down Expand Up @@ -501,13 +501,13 @@ cross-sha3-r-sdpg-1-small,ref,18846,0,208,19054,,,,,
cross-sha3-r-sdpg-3-fast,ref,19689,0,208,19897,,,,,
cross-sha3-r-sdpg-5-fast,ref,18593,0,208,18801,,,,,
dilithium2,clean,8064,0,0,8064,,,,,
dilithium2,m4f,18596,0,0,18596,,,,,
dilithium2,m4f,19180,0,0,19180,,,,,
dilithium2,m4fstack,24184,0,0,24184,,,,,
dilithium3,clean,7580,0,0,7580,,,,,
dilithium3,m4f,18588,0,0,18588,,,,,
dilithium3,m4f,19188,0,0,19188,,,,,
dilithium3,m4fstack,23448,0,0,23448,,,,,
dilithium5,clean,7808,0,0,7808,,,,,
dilithium5,m4f,18468,0,0,18468,,,,,
dilithium5,m4f,19096,0,0,19096,,,,,
dilithium5,m4fstack,23820,0,0,23820,,,,,
falcon-1024,clean,82647,0,0,82647,,,,,
falcon-1024,m4-ct,81825,0,79872,161697,,,,,
Expand Down
24 changes: 12 additions & 12 deletions benchmarks.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,13 @@
| cross-sha3-r-sdpg-3-fast (10 executions) | ref | AVG: 627,948 <br /> MIN: 625,525 <br /> MAX: 637,639 | AVG: 43,573,841 <br /> MIN: 43,565,461 <br /> MAX: 43,582,933 | AVG: 27,513,830 <br /> MIN: 27,493,024 <br /> MAX: 27,525,746 |
| cross-sha3-r-sdpg-5-fast (10 executions) | ref | AVG: 1,146,280 <br /> MIN: 1,142,409 <br /> MAX: 1,153,794 | AVG: 93,557,878 <br /> MIN: 93,547,167 <br /> MAX: 93,566,329 | AVG: 59,948,216 <br /> MIN: 59,857,434 <br /> MAX: 60,043,852 |
| dilithium2 (1000 executions) | clean | AVG: 1,874,167 <br /> MIN: 1,827,645 <br /> MAX: 1,914,566 | AVG: 7,493,877 <br /> MIN: 3,321,630 <br /> MAX: 40,762,756 | AVG: 2,062,795 <br /> MIN: 2,062,255 <br /> MAX: 2,063,222 |
| dilithium2 (1000 executions) | m4f | AVG: 1,426,036 <br /> MIN: 1,379,636 <br /> MAX: 1,466,394 | AVG: 3,807,970 <br /> MIN: 1,813,656 <br /> MAX: 18,528,070 | AVG: 1,417,745 <br /> MIN: 1,417,203 <br /> MAX: 1,418,192 |
| dilithium2 (1000 executions) | m4f | AVG: 1,425,723 <br /> MIN: 1,379,410 <br /> MAX: 1,466,445 | AVG: 3,835,095 <br /> MIN: 1,813,682 <br /> MAX: 16,068,642 | AVG: 1,421,307 <br /> MIN: 1,420,219 <br /> MAX: 1,422,056 |
| dilithium2 (1000 executions) | m4fstack | AVG: 1,801,523 <br /> MIN: 1,684,895 <br /> MAX: 1,902,114 | AVG: 12,170,976 <br /> MIN: 3,900,911 <br /> MAX: 86,281,518 | AVG: 3,241,353 <br /> MIN: 3,194,028 <br /> MAX: 3,281,144 |
| dilithium3 (1000 executions) | clean | AVG: 3,205,551 <br /> MIN: 3,204,090 <br /> MAX: 3,207,411 | AVG: 12,696,585 <br /> MIN: 5,097,364 <br /> MAX: 74,392,293 | AVG: 3,376,992 <br /> MIN: 3,376,581 <br /> MAX: 3,377,393 |
| dilithium3 (1000 executions) | m4f | AVG: 2,515,969 <br /> MIN: 2,514,498 <br /> MAX: 2,517,634 | AVG: 5,884,832 <br /> MIN: 2,917,322 <br /> MAX: 25,268,693 | AVG: 2,411,257 <br /> MIN: 2,410,858 <br /> MAX: 2,411,717 |
| dilithium3 (1000 executions) | m4f | AVG: 2,515,915 <br /> MIN: 2,514,307 <br /> MAX: 2,517,413 | AVG: 6,054,094 <br /> MIN: 2,917,316 <br /> MAX: 27,829,552 | AVG: 2,415,526 <br /> MIN: 2,414,696 <br /> MAX: 2,416,440 |
| dilithium3 (1000 executions) | m4fstack | AVG: 3,412,759 <br /> MIN: 3,406,659 <br /> MAX: 3,419,247 | AVG: 23,673,016 <br /> MIN: 6,733,971 <br /> MAX: 145,803,146 | AVG: 5,733,307 <br /> MIN: 5,688,893 <br /> MAX: 5,778,120 |
| dilithium5 (1000 executions) | clean | AVG: 5,341,477 <br /> MIN: 5,286,872 <br /> MAX: 5,395,822 | AVG: 15,710,371 <br /> MIN: 7,953,367 <br /> MAX: 75,940,093 | AVG: 5,609,679 <br /> MIN: 5,609,217 <br /> MAX: 5,610,183 |
| dilithium5 (1000 executions) | m4f | AVG: 4,275,029 <br /> MIN: 4,210,286 <br /> MAX: 4,329,519 | AVG: 7,977,781 <br /> MIN: 4,882,524 <br /> MAX: 25,936,176 | AVG: 4,185,417 <br /> MIN: 4,184,925 <br /> MAX: 4,185,896 |
| dilithium5 (1000 executions) | m4f | AVG: 4,275,033 <br /> MIN: 4,220,989 <br /> MAX: 4,350,945 | AVG: 8,349,360 <br /> MIN: 4,882,552 <br /> MAX: 29,688,762 | AVG: 4,192,692 <br /> MIN: 4,191,427 <br /> MAX: 4,193,764 |
| dilithium5 (1000 executions) | m4fstack | AVG: 5,816,287 <br /> MIN: 5,474,236 <br /> MAX: 6,115,061 | AVG: 33,452,872 <br /> MIN: 11,170,780 <br /> MAX: 185,259,803 | AVG: 9,912,851 <br /> MIN: 9,845,789 <br /> MAX: 9,981,834 |
| falcon-1024 (10 executions) | m4-ct | AVG: 354,880,005 <br /> MIN: 284,902,033 <br /> MAX: 635,131,652 | AVG: 87,741,288 <br /> MIN: 87,506,676 <br /> MAX: 87,922,628 | AVG: 991,320 <br /> MIN: 982,548 <br /> MAX: 997,219 |
| falcon-1024 (10 executions) | opt-ct | AVG: 555,202,324 <br /> MIN: 284,912,829 <br /> MAX: 1,157,528,581 | AVG: 87,710,190 <br /> MIN: 87,606,677 <br /> MAX: 87,841,235 | AVG: 993,584 <br /> MIN: 983,066 <br /> MAX: 997,523 |
Expand Down Expand Up @@ -200,13 +200,13 @@
| cross-sha3-r-sdpg-3-fast | ref | 4,032 | 205,080 | 108,236 |
| cross-sha3-r-sdpg-5-fast | ref | 6,824 | 398,600 | 213,436 |
| dilithium2 | clean | 38,304 | 51,968 | 36,192 |
| dilithium2 | m4f | 38,296 | 49,416 | 36,220 |
| dilithium2 | m4f | 38,296 | 49,416 | 9,012 |
| dilithium2 | m4fstack | 4,408 | 5,072 | 2,704 |
| dilithium3 | clean | 60,832 | 79,616 | 57,728 |
| dilithium3 | m4f | 60,824 | 68,864 | 57,720 |
| dilithium3 | m4f | 60,824 | 68,864 | 9,880 |
| dilithium3 | m4fstack | 4,408 | 6,608 | 2,704 |
| dilithium5 | clean | 97,696 | 122,724 | 92,940 |
| dilithium5 | m4f | 97,688 | 116,076 | 92,932 |
| dilithium5 | m4f | 97,688 | 116,076 | 11,944 |
| dilithium5 | m4fstack | 4,408 | 8,136 | 2,712 |
| falcon-1024 | clean | 34,988 | 84,604 | 8,784 |
| falcon-1024 | m4-ct | 1,156 | 2,508 | 376 |
Expand Down Expand Up @@ -355,13 +355,13 @@
| cross-sha3-r-sdpg-3-fast | ref | 71.7% | 68.2% | 68.7% |
| cross-sha3-r-sdpg-5-fast | ref | 71.1% | 66.1% | 66.8% |
| dilithium2 | clean | 61.0% | 30.9% | 52.9% |
| dilithium2 | m4f | 79.9% | 60.6% | 76.8% |
| dilithium2 | m4f | 79.9% | 60.7% | 76.6% |
| dilithium2 | m4fstack | 74.8% | 55.2% | 40.8% |
| dilithium3 | clean | 64.7% | 31.3% | 56.8% |
| dilithium3 | m4f | 82.3% | 61.4% | 79.4% |
| dilithium3 | m4f | 82.3% | 60.7% | 79.2% |
| dilithium3 | m4fstack | 77.1% | 54.6% | 41.0% |
| dilithium5 | clean | 67.0% | 35.7% | 61.1% |
| dilithium5 | m4f | 83.5% | 65.0% | 81.7% |
| dilithium5 | m4f | 83.5% | 65.3% | 81.6% |
| dilithium5 | m4fstack | 76.1% | 54.5% | 42.6% |
| falcon-1024 | clean | 6.5% | 0.3% | 23.7% |
| falcon-1024 | m4-ct | 7.4% | 0.4% | 32.4% |
Expand Down Expand Up @@ -509,13 +509,13 @@
| cross-sha3-r-sdpg-3-fast | ref | 19,689 | 0 | 208 | 19,897 |
| cross-sha3-r-sdpg-5-fast | ref | 18,593 | 0 | 208 | 18,801 |
| dilithium2 | clean | 8,064 | 0 | 0 | 8,064 |
| dilithium2 | m4f | 18,596 | 0 | 0 | 18,596 |
| dilithium2 | m4f | 19,180 | 0 | 0 | 19,180 |
| dilithium2 | m4fstack | 24,184 | 0 | 0 | 24,184 |
| dilithium3 | clean | 7,580 | 0 | 0 | 7,580 |
| dilithium3 | m4f | 18,588 | 0 | 0 | 18,588 |
| dilithium3 | m4f | 19,188 | 0 | 0 | 19,188 |
| dilithium3 | m4fstack | 23,448 | 0 | 0 | 23,448 |
| dilithium5 | clean | 7,808 | 0 | 0 | 7,808 |
| dilithium5 | m4f | 18,468 | 0 | 0 | 18,468 |
| dilithium5 | m4f | 19,096 | 0 | 0 | 19,096 |
| dilithium5 | m4fstack | 23,820 | 0 | 0 | 23,820 |
| falcon-1024 | clean | 82,647 | 0 | 0 | 82,647 |
| falcon-1024 | m4-ct | 81,825 | 0 | 79,872 | 161,697 |
Expand Down
106 changes: 105 additions & 1 deletion crypto_sign/dilithium2/m4f/packing.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include "packing.h"
#include "polyvec.h"
#include "poly.h"
#include <stddef.h>

/*************************************************
* Name: pack_pk
Expand Down Expand Up @@ -49,6 +50,21 @@ void unpack_pk(uint8_t rho[SEEDBYTES],
polyt1_unpack(&t1->vec[i], pk + i*POLYT1_PACKEDBYTES);
}

/*************************************************
* Name: unpack_pk_t1
*
* Description: Unpack public key pk = (rho, t1).
*
* Arguments: - const polyvec *t1: pointer to output vector t1
* - const size_t idx: unpack n'th element from t1
* - unsigned char pk[]: byte array containing bit-packed pk
**************************************************/
void unpack_pk_t1(poly *t1, size_t idx, const unsigned char pk[CRYPTO_PUBLICKEYBYTES]) {
pk += SEEDBYTES;
polyt1_unpack(t1, pk + idx * POLYT1_PACKEDBYTES);
}


/*************************************************
* Name: pack_sk
*
Expand Down Expand Up @@ -283,4 +299,92 @@ int unpack_sig(uint8_t c[CTILDEBYTES],
return 1;

return 0;
}
}

/*************************************************
* Name: unpack_sig_c
*
* Description: Unpack only c from signature sig = (z, h, c).
*
* Arguments: - poly *c: pointer to output challenge polynomial
* - const unsigned char sig[]: byte array containing
* bit-packed signature
*
* Returns 1 in case of malformed signature; otherwise 0.
**************************************************/
int unpack_sig_c(uint8_t c[CTILDEBYTES], const unsigned char sig[CRYPTO_BYTES]) {
for(size_t i = 0; i < CTILDEBYTES; ++i)
c[i] = sig[i];
sig += CTILDEBYTES;
return 0;
}

/*************************************************
* Name: unpack_sig_z
*
* Description: Unpack only z from signature sig = (z, h, c).
*
* Arguments: - polyvecl *z: pointer to output vector z
* - const unsigned char sig[]: byte array containing
* bit-packed signature
*
* Returns 1 in case of malformed signature; otherwise 0.
**************************************************/
int unpack_sig_z(polyvecl *z, const unsigned char sig[CRYPTO_BYTES]) {
sig += CTILDEBYTES;
for (size_t i = 0; i < L; ++i) {
polyz_unpack(&z->vec[i], sig + i * POLYZ_PACKEDBYTES);
}
return 0;
}

/*************************************************
* Name: unpack_sig_h
*
* Description: Unpack only h from signature sig = (z, h, c).
*
* Arguments: - polyveck *h: pointer to output hint vector h
* - const unsigned char sig[]: byte array containing
* bit-packed signature
*
* Returns 1 in case of malformed signature; otherwise 0.
**************************************************/
int unpack_sig_h(poly *h, size_t idx, const unsigned char sig[CRYPTO_BYTES]) {
sig += CTILDEBYTES;
sig += L * POLYZ_PACKEDBYTES;

/* Decode h */
size_t k = 0;
for (size_t i = 0; i < K; ++i) {
for (size_t j = 0; j < N; ++j) {
if (i == idx) {
h->coeffs[j] = 0;
}
}

if (sig[OMEGA + i] < k || sig[OMEGA + i] > OMEGA) {
return 1;
}

for (size_t j = k; j < sig[OMEGA + i]; ++j) {
/* Coefficients are ordered for strong unforgeability */
if (j > k && sig[j] <= sig[j - 1]) {
return 1;
}
if (i == idx) {
h->coeffs[sig[j]] = 1;
}
}

k = sig[OMEGA + i];
}

/* Extra indices are zero for strong unforgeability */
for (size_t j = k; j < OMEGA; ++j) {
if (sig[j]) {
return 1;
}
}
return 0;
}

13 changes: 13 additions & 0 deletions crypto_sign/dilithium2/m4f/packing.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#define PACKING_H

#include <stdint.h>
#include <stddef.h>
#include "params.h"
#include "polyvec.h"
#include "smallpoly.h"
Expand All @@ -24,6 +25,9 @@ void pack_sig(uint8_t sig[CRYPTO_BYTES], const uint8_t c[CTILDEBYTES], const pol
#define unpack_pk DILITHIUM_NAMESPACE(unpack_pk)
void unpack_pk(uint8_t rho[SEEDBYTES], polyveck *t1, const uint8_t pk[CRYPTO_PUBLICKEYBYTES]);

#define unpack_pk_t1 DILITHIUM_NAMESPACE(unpack_pk_t1)
void unpack_pk_t1(poly *t1, size_t idx, const unsigned char pk[CRYPTO_PUBLICKEYBYTES]);

#define unpack_sk DILITHIUM_NAMESPACE(unpack_sk)
void unpack_sk(uint8_t rho[SEEDBYTES],
uint8_t tr[TRBYTES],
Expand All @@ -36,6 +40,15 @@ void unpack_sk(uint8_t rho[SEEDBYTES],
#define unpack_sig DILITHIUM_NAMESPACE(unpack_sig)
int unpack_sig(uint8_t c[CTILDEBYTES], polyvecl *z, polyveck *h, const uint8_t sig[CRYPTO_BYTES]);


#define unpack_sig_z DILITHIUM_NAMESPACE(unpack_sig_z)
int unpack_sig_z(polyvecl *z, const unsigned char sig[CRYPTO_BYTES]);
#define unpack_sig_h DILITHIUM_NAMESPACE(unpack_sig_h)
int unpack_sig_h(poly *h, size_t idx, const unsigned char sig[CRYPTO_BYTES]);
#define unpack_sig_c DILITHIUM_NAMESPACE(unpack_sig_c)
int unpack_sig_c(uint8_t c[CTILDEBYTES], const unsigned char sig[CRYPTO_BYTES]);


#define pack_sig_c DILITHIUM_NAMESPACE(pack_sig_c)
void pack_sig_c(uint8_t sig[CRYPTO_BYTES], const uint8_t c[CTILDEBYTES]);

Expand Down
12 changes: 12 additions & 0 deletions crypto_sign/dilithium2/m4f/poly.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,18 @@ void poly_caddq(poly *a) {
asm_caddq(a->coeffs);
}

/*************************************************
* Name: poly_csubq
*
* Description: For all coefficients of input polynomial subtract Q if
* coefficient is bigger than Q; add Q if coefficient is negative.
*
* Arguments: - poly *a: pointer to input/output polynomial
**************************************************/
void poly_csubq(poly *a) {
asm_caddq(a->coeffs);
}

#if 0
/*************************************************
* Name: poly_freeze
Expand Down
2 changes: 2 additions & 0 deletions crypto_sign/dilithium2/m4f/poly.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ typedef struct {
void poly_reduce(poly *a);
#define poly_caddq DILITHIUM_NAMESPACE(poly_caddq)
void poly_caddq(poly *a);
#define poly_csubq DILITHIUM_NAMESPACE(poly_csubq)
void poly_csubq(poly *a);
#define poly_freeze DILITHIUM_NAMESPACE(poly_freeze)
void poly_freeze(poly *a);

Expand Down
Loading
Loading