Skip to content

Commit

Permalink
Make aes_hw_ctr32_encrypt_blocks handle len=0 correctly (#1690)
Browse files Browse the repository at this point in the history
When the input length is 0 blocks, `aes_hw_ctr32_encrypt_blocks` encrypts (and writes to the output)
2 blocks in the case of AArch64 and 1 block in the case of x86_64 and x86.
(The function is guarded in the surrounding code wherever it's called by checks that `len != 0`.)
This change fixes this behaviour without taxing the performance.

Tested the performance on Graviton3 and on Mac x86_64.
  • Loading branch information
nebeid authored Aug 2, 2024
1 parent b32e641 commit b929d74
Show file tree
Hide file tree
Showing 15 changed files with 90 additions and 67 deletions.
11 changes: 11 additions & 0 deletions crypto/fipsmodule/aes/aes_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,17 @@ TEST(AESTest, ABI) {
SCOPED_TRACE(blocks);
CHECK_ABI(aes_hw_cbc_encrypt, buf, buf, AES_BLOCK_SIZE * blocks, &key,
block, AES_ENCRYPT);
if (blocks == 0) {
// Without this initialization, valgrind complains
// about using an unitialized value.
for (size_t i = 0; i < 64; i++) {
buf[i] = i;
}
std::string buf_before = testing::PrintToString(Bytes(buf,64));
CHECK_ABI(aes_hw_ctr32_encrypt_blocks, buf, buf, blocks, &key, block);
EXPECT_EQ(buf_before, testing::PrintToString(Bytes(buf,64)));
}

CHECK_ABI(aes_hw_ctr32_encrypt_blocks, buf, buf, blocks, &key, block);
#if defined(HWAES_ECB)
CHECK_ABI(aes_hw_ecb_encrypt, buf, buf, AES_BLOCK_SIZE * blocks, &key,
Expand Down
1 change: 1 addition & 0 deletions crypto/fipsmodule/aes/asm/aesni-x86.pl
Original file line number Diff line number Diff line change
Expand Up @@ -897,6 +897,7 @@ sub aesni_generate6
&mov (&DWP(80,"esp"),$key_);

&cmp ($len,1);
&jb (&label("ctr32_ret"));
&je (&label("ctr32_one_shortcut"));

&movdqu ($inout5,&QWP(0,$rounds_)); # load ivec
Expand Down
1 change: 1 addition & 0 deletions crypto/fipsmodule/aes/asm/aesni-x86_64.pl
Original file line number Diff line number Diff line change
Expand Up @@ -1214,6 +1214,7 @@ sub aesni_generate8 {
movb \$1,BORINGSSL_function_hit(%rip)
#endif
cmp \$1,$len
jb .Lctr32_epilogue # if $len < 1, go to done
jne .Lctr32_bulk
# handle single block without allocating stack frame,
Expand Down
17 changes: 12 additions & 5 deletions crypto/fipsmodule/aes/asm/aesv8-armx.pl
Original file line number Diff line number Diff line change
Expand Up @@ -775,7 +775,6 @@ ()
vld1.32 {$rndlast},[$key_]
add $key_,$key,#32
mov $cnt,$rounds
cclr $step,lo
// ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
// affected by silicon errata #1742098 [0] and #1655431 [1],
Expand Down Expand Up @@ -895,11 +894,12 @@ ()
adds $len,$len,#3
b.eq .Lctr32_done
cmp $len,#1
mov $step,#16
cclr $step,eq
.Lctr32_tail:
cmp $len,#1
mov $step,#16
cclr $step,eq
b.lt .Lctr32_done // if len = 0, go to done
aese $dat0,q8
aesmc $dat0,$dat0
aese $dat1,q8
Expand Down Expand Up @@ -940,11 +940,18 @@ ()
aese $dat0,q15
aese $dat1,q15
cmp $len,#1
veor $in0,$in0,$dat0
veor $in1,$in1,$dat1
vst1.8 {$in0},[$out],#16
___
$code.=<<___ if ($flavour =~ /64/);
cbz $step,.Lctr32_done // if step = 0 (len = 1), go to done
___
$code.=<<___ if ($flavour !~ /64/);
cmp $step, #0
b.eq .Lctr32_done
___
$code.=<<___;
vst1.8 {$in1},[$out]
.Lctr32_done:
Expand Down
9 changes: 4 additions & 5 deletions generated-src/ios-aarch64/crypto/fipsmodule/aesv8-armx.S
Original file line number Diff line number Diff line change
Expand Up @@ -639,7 +639,6 @@ _aes_hw_ctr32_encrypt_blocks:
ld1 {v7.4s},[x7]
add x7,x3,#32
mov w6,w5
csel x12,xzr,x12,lo

// ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
// affected by silicon errata #1742098 [0] and #1655431 [1],
Expand Down Expand Up @@ -759,11 +758,12 @@ Loop3x_ctr32:

adds x2,x2,#3
b.eq Lctr32_done

Lctr32_tail:
cmp x2,#1
mov x12,#16
csel x12,xzr,x12,eq

Lctr32_tail:
b.lt Lctr32_done // if len = 0, go to done
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
aese v1.16b,v16.16b
Expand Down Expand Up @@ -804,11 +804,10 @@ Lctr32_tail:
aese v0.16b,v23.16b
aese v1.16b,v23.16b

cmp x2,#1
eor v2.16b,v2.16b,v0.16b
eor v3.16b,v3.16b,v1.16b
st1 {v2.16b},[x1],#16
b.eq Lctr32_done
cbz x12,Lctr32_done // if step = 0 (len = 1), go to done
st1 {v3.16b},[x1]

Lctr32_done:
Expand Down
8 changes: 4 additions & 4 deletions generated-src/ios-arm/crypto/fipsmodule/aesv8-armx.S
Original file line number Diff line number Diff line change
Expand Up @@ -621,7 +621,6 @@ _aes_hw_ctr32_encrypt_blocks:
vld1.32 {q7},[r7]
add r7,r3,#32
mov r6,r5
movlo r12,#0

@ ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
@ affected by silicon errata #1742098 [0] and #1655431 [1],
Expand Down Expand Up @@ -741,11 +740,12 @@ Loop3x_ctr32:

adds r2,r2,#3
beq Lctr32_done

Lctr32_tail:
cmp r2,#1
mov r12,#16
moveq r12,#0

Lctr32_tail:
blt Lctr32_done @ if len = 0, go to done
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
Expand Down Expand Up @@ -786,10 +786,10 @@ Lctr32_tail:
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
.byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15

cmp r2,#1
veor q2,q2,q0
veor q3,q3,q1
vst1.8 {q2},[r1]!
cmp r12, #0
beq Lctr32_done
vst1.8 {q3},[r1]

Expand Down
9 changes: 4 additions & 5 deletions generated-src/linux-aarch64/crypto/fipsmodule/aesv8-armx.S
Original file line number Diff line number Diff line change
Expand Up @@ -639,7 +639,6 @@ aes_hw_ctr32_encrypt_blocks:
ld1 {v7.4s},[x7]
add x7,x3,#32
mov w6,w5
csel x12,xzr,x12,lo

// ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
// affected by silicon errata #1742098 [0] and #1655431 [1],
Expand Down Expand Up @@ -759,11 +758,12 @@ aes_hw_ctr32_encrypt_blocks:

adds x2,x2,#3
b.eq .Lctr32_done

.Lctr32_tail:
cmp x2,#1
mov x12,#16
csel x12,xzr,x12,eq

.Lctr32_tail:
b.lt .Lctr32_done // if len = 0, go to done
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
aese v1.16b,v16.16b
Expand Down Expand Up @@ -804,11 +804,10 @@ aes_hw_ctr32_encrypt_blocks:
aese v0.16b,v23.16b
aese v1.16b,v23.16b

cmp x2,#1
eor v2.16b,v2.16b,v0.16b
eor v3.16b,v3.16b,v1.16b
st1 {v2.16b},[x1],#16
b.eq .Lctr32_done
cbz x12,.Lctr32_done // if step = 0 (len = 1), go to done
st1 {v3.16b},[x1]

.Lctr32_done:
Expand Down
8 changes: 4 additions & 4 deletions generated-src/linux-arm/crypto/fipsmodule/aesv8-armx.S
Original file line number Diff line number Diff line change
Expand Up @@ -609,7 +609,6 @@ aes_hw_ctr32_encrypt_blocks:
vld1.32 {q7},[r7]
add r7,r3,#32
mov r6,r5
movlo r12,#0

@ ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
@ affected by silicon errata #1742098 [0] and #1655431 [1],
Expand Down Expand Up @@ -729,11 +728,12 @@ aes_hw_ctr32_encrypt_blocks:

adds r2,r2,#3
beq .Lctr32_done

.Lctr32_tail:
cmp r2,#1
mov r12,#16
moveq r12,#0

.Lctr32_tail:
blt .Lctr32_done @ if len = 0, go to done
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
Expand Down Expand Up @@ -774,10 +774,10 @@ aes_hw_ctr32_encrypt_blocks:
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
.byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15

cmp r2,#1
veor q2,q2,q0
veor q3,q3,q1
vst1.8 {q2},[r1]!
cmp r12, #0
beq .Lctr32_done
vst1.8 {q3},[r1]

Expand Down
27 changes: 14 additions & 13 deletions generated-src/linux-x86/crypto/fipsmodule/aesni-x86.S
Original file line number Diff line number Diff line change
Expand Up @@ -866,7 +866,8 @@ aes_hw_ctr32_encrypt_blocks:
andl $-16,%esp
movl %ebp,80(%esp)
cmpl $1,%eax
je .L039ctr32_one_shortcut
jb .L039ctr32_ret
je .L040ctr32_one_shortcut
movdqu (%ebx),%xmm7
movl $202182159,(%esp)
movl $134810123,4(%esp)
Expand Down Expand Up @@ -904,7 +905,7 @@ aes_hw_ctr32_encrypt_blocks:
pshufd $192,%xmm0,%xmm2
pshufd $128,%xmm0,%xmm3
cmpl $6,%eax
jb .L040ctr32_tail
jb .L041ctr32_tail
pxor %xmm6,%xmm7
shll $4,%ecx
movl $16,%ebx
Expand All @@ -913,9 +914,9 @@ aes_hw_ctr32_encrypt_blocks:
subl %ecx,%ebx
leal 32(%edx,%ecx,1),%edx
subl $6,%eax
jmp .L041ctr32_loop6
jmp .L042ctr32_loop6
.align 16
.L041ctr32_loop6:
.L042ctr32_loop6:
pshufd $64,%xmm0,%xmm4
movdqa 32(%esp),%xmm0
pshufd $192,%xmm1,%xmm5
Expand Down Expand Up @@ -969,14 +970,14 @@ aes_hw_ctr32_encrypt_blocks:
leal 96(%edi),%edi
pshufd $128,%xmm0,%xmm3
subl $6,%eax
jnc .L041ctr32_loop6
jnc .L042ctr32_loop6
addl $6,%eax
jz .L042ctr32_ret
jz .L039ctr32_ret
movdqu (%ebp),%xmm7
movl %ebp,%edx
pxor 32(%esp),%xmm7
movl 240(%ebp),%ecx
.L040ctr32_tail:
.L041ctr32_tail:
por %xmm7,%xmm2
cmpl $2,%eax
jb .L043ctr32_one
Expand Down Expand Up @@ -1007,9 +1008,9 @@ aes_hw_ctr32_encrypt_blocks:
movups %xmm4,32(%edi)
movups %xmm5,48(%edi)
movups %xmm6,64(%edi)
jmp .L042ctr32_ret
jmp .L039ctr32_ret
.align 16
.L039ctr32_one_shortcut:
.L040ctr32_one_shortcut:
movups (%ebx),%xmm2
movl 240(%edx),%ecx
.L043ctr32_one:
Expand All @@ -1027,7 +1028,7 @@ aes_hw_ctr32_encrypt_blocks:
movups (%esi),%xmm6
xorps %xmm2,%xmm6
movups %xmm6,(%edi)
jmp .L042ctr32_ret
jmp .L039ctr32_ret
.align 16
.L044ctr32_two:
call _aesni_encrypt2
Expand All @@ -1037,7 +1038,7 @@ aes_hw_ctr32_encrypt_blocks:
xorps %xmm6,%xmm3
movups %xmm2,(%edi)
movups %xmm3,16(%edi)
jmp .L042ctr32_ret
jmp .L039ctr32_ret
.align 16
.L045ctr32_three:
call _aesni_encrypt3
Expand All @@ -1050,7 +1051,7 @@ aes_hw_ctr32_encrypt_blocks:
xorps %xmm7,%xmm4
movups %xmm3,16(%edi)
movups %xmm4,32(%edi)
jmp .L042ctr32_ret
jmp .L039ctr32_ret
.align 16
.L046ctr32_four:
call _aesni_encrypt4
Expand All @@ -1066,7 +1067,7 @@ aes_hw_ctr32_encrypt_blocks:
xorps %xmm0,%xmm5
movups %xmm4,32(%edi)
movups %xmm5,48(%edi)
.L042ctr32_ret:
.L039ctr32_ret:
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -886,6 +886,7 @@ _CET_ENDBR
movb $1,BORINGSSL_function_hit(%rip)
#endif
cmpq $1,%rdx
jb .Lctr32_epilogue
jne .Lctr32_bulk


Expand Down
Loading

0 comments on commit b929d74

Please sign in to comment.