Skip to content

Commit

Permalink
AArch64: Fix potential out of bounds access in DotProd H/HV filters
Browse files Browse the repository at this point in the history
The DotProd/I8MM horizontal and HV/2D subpel filters use -4 offset
for sampling instead of -3 to be better aligned in some cases. This
resulted in an out of bounds access, which led to crashes.

This patch fixes it.

(cherry picked from commit 92f592ed104ba92ad35c781ee93f354525eef503)
  • Loading branch information
Arpad Panyik authored and fbossen committed Jul 18, 2024
1 parent a6a2373 commit 1dbb78d
Showing 1 changed file with 7 additions and 9 deletions.
16 changes: 7 additions & 9 deletions src/arm/64/mc_dotprod.S
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,12 @@ L(hv_tbl_neon_dotprod):

// Shuffle indices to permute horizontal samples in preparation for input to
// SDOT instructions. The 8-tap horizontal convolution uses sample indices in the
// interval of [-3, 4] relative to the current sample position. We load samples
// from index value -4 to keep loads word aligned, so the shuffle bytes are
// translated by 1 to handle this.
// interval of [-3, 4] relative to the current sample position.
.align 4
L(h_tbl_neon_dotprod):
.byte 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6, 4, 5, 6, 7
.byte 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10, 8, 9, 10, 11
.byte 9, 10, 11, 12, 10, 11, 12, 13, 11, 12, 13, 14, 12, 13, 14, 15
.byte 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6
.byte 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10
.byte 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 11, 12, 13, 14

// Vertical convolutions are also using SDOT instructions, where a 128-bit
// register contains a transposed 4x4 matrix of values. Subsequent iterations of
Expand Down Expand Up @@ -683,7 +681,7 @@ L(\type\()_8tap_h_hv_\isa):
mov w13, 0x2002 // FILTER_WEIGHT * 128 + rounding
dup v27.4s, w13 // put H overrides this
.endif
sub \src, \src, #4 // src - 4
sub \src, \src, #3 // src - 3
ubfx w9, \mx, #7, #7
and \mx, \mx, #0x7F
ubfx w11, w14, #7, #7 // for HV
Expand All @@ -699,7 +697,7 @@ L(\type\()_8tap_h_hv_\isa):
// HV cases
cmp \h, #4
csel w14, w14, w11, le
sub \src, \src, \s_strd, lsl #1 // src - s_strd * 2 - 4
sub \src, \src, \s_strd, lsl #1 // src - s_strd * 2 - 3
add \xmy, x12, x14, lsl #3 // subpel V filter address
mov x15, x30
ldr d7, [\xmy]
Expand All @@ -711,7 +709,7 @@ L(\type\()_8tap_h_hv_\isa):
b.ne L(\type\()_6tap_hv_\isa) // vertical != SHARP1

// HV 8-tap cases
sub \src, \src, \s_strd // src - s_strd * 3 - 4
sub \src, \src, \s_strd // src - s_strd * 3 - 3
cmp \w, #4
b.eq 40f
.ifc \type, put
Expand Down

0 comments on commit 1dbb78d

Please sign in to comment.