diff --git a/lib.rs b/lib.rs index b024d6271..567d1434f 100644 --- a/lib.rs +++ b/lib.rs @@ -4,6 +4,7 @@ any(target_arch = "riscv32", target_arch = "riscv64"), feature(stdarch_riscv_feature_detection) )] +#![deny(unsafe_op_in_unsafe_fn)] #![allow(clippy::all)] #[cfg(not(any(feature = "bitdepth_8", feature = "bitdepth_16")))] diff --git a/src/cdef.rs b/src/cdef.rs index d4847fd38..a768431af 100644 --- a/src/cdef.rs +++ b/src/cdef.rs @@ -1,3 +1,5 @@ +#![deny(unsafe_op_in_unsafe_fn)] + use crate::include::common::bitdepth::AsPrimitive; use crate::include::common::bitdepth::BitDepth; use crate::include::common::bitdepth::DynPixel; @@ -505,83 +507,158 @@ fn cdef_find_dir_rust( best_dir as c_int } +#[deny(unsafe_op_in_unsafe_fn)] #[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] -wrap_fn_ptr!(unsafe extern "C" fn padding( - tmp: *mut u16, - src: *const DynPixel, - src_stride: ptrdiff_t, - left: *const [LeftPixelRow2px; 8], - top: *const DynPixel, - bottom: *const DynPixel, - h: c_int, - edges: CdefEdgeFlags, -) -> ()); +mod neon { + use super::*; + + wrap_fn_ptr!(unsafe extern "C" fn padding( + tmp: *mut u16, + src: *const DynPixel, + src_stride: ptrdiff_t, + left: *const [LeftPixelRow2px; 8], + top: *const DynPixel, + bottom: *const DynPixel, + h: c_int, + edges: CdefEdgeFlags, + ) -> ()); + + impl padding::Fn { + fn call( + &self, + tmp: &mut [u16], + src: *const BD::Pixel, + src_stride: ptrdiff_t, + left: *const [LeftPixelRow2px; 8], + top: *const BD::Pixel, + bottom: *const BD::Pixel, + h: usize, + edges: CdefEdgeFlags, + ) { + let tmp = tmp.as_mut_ptr(); + let src = src.cast(); + let left = left.cast(); + let top = top.cast(); + let bottom = bottom.cast(); + let h = h as c_int; + // SAFETY: asm should be safe. + unsafe { self.get()(tmp, src, src_stride, left, top, bottom, h, edges) } + } -#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] -wrap_fn_ptr!(unsafe extern "C" fn filter( - dst: *mut DynPixel, - dst_stride: ptrdiff_t, - tmp: *const u16, - pri_strength: c_int, - sec_strength: c_int, - dir: c_int, - damping: c_int, - h: c_int, - edges: usize, - bitdepth_max: c_int, -) -> ()); + const fn neon() -> Self { + match W { + 4 => bd_fn!(padding::decl_fn, BD, cdef_padding4, neon), + 8 => bd_fn!(padding::decl_fn, BD, cdef_padding8, neon), + _ => unreachable!(), + } + } + } -#[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] -unsafe extern "C" fn cdef_filter_neon_erased< - BD: BitDepth, - const W: usize, - const H: usize, - const TMP_STRIDE: usize, - const TMP_LEN: usize, ->( - dst: *mut DynPixel, - stride: ptrdiff_t, - left: *const [LeftPixelRow2px; 8], - top: *const DynPixel, - bottom: *const DynPixel, - pri_strength: c_int, - sec_strength: c_int, - dir: c_int, - damping: c_int, - edges: CdefEdgeFlags, - bitdepth_max: c_int, - _dst: *const FFISafe, - _top: *const FFISafe, - _bottom: *const FFISafe, -) { - use crate::src::align::Align16; - - let mut tmp_buf = Align16([0; TMP_LEN]); - let tmp = tmp_buf.0[2 * TMP_STRIDE + 8..].as_mut_ptr(); - let (padding, filter) = match W { - 4 => ( - bd_fn!(padding::decl_fn, BD, cdef_padding4, neon), - bd_fn!(filter::decl_fn, BD, cdef_filter4, neon), - ), - 8 => ( - bd_fn!(padding::decl_fn, BD, cdef_padding8, neon), - bd_fn!(filter::decl_fn, BD, cdef_filter8, neon), - ), - _ => unreachable!(), - }; - padding.get()(tmp, dst, stride, left, top, bottom, H as c_int, edges); - filter.get()( - dst, - stride, - tmp, - pri_strength, - sec_strength, - dir, - damping, - H as c_int, - edges.bits() as usize, - bitdepth_max, - ); + wrap_fn_ptr!(unsafe extern "C" fn filter( + dst: *mut DynPixel, + dst_stride: ptrdiff_t, + tmp: *const u16, + pri_strength: c_int, + sec_strength: c_int, + dir: c_int, + damping: c_int, + h: c_int, + edges: usize, + bitdepth_max: c_int, + ) -> ()); + + impl filter::Fn { + fn call( + &self, + dst: *mut BD::Pixel, + dst_stride: ptrdiff_t, + tmp: &[u16], + pri_strength: c_int, + sec_strength: c_int, + dir: c_int, + damping: c_int, + h: usize, + edges: CdefEdgeFlags, + bd: BD, + ) { + let dst = dst.cast(); + let tmp = tmp.as_ptr(); + let h = h as c_int; + let edges = edges.bits() as usize; + let bd = bd.into_c(); + // SAFETY: asm should be safe. + unsafe { + self.get()( + dst, + dst_stride, + tmp, + pri_strength, + sec_strength, + dir, + damping, + h, + edges, + bd, + ) + } + } + + const fn neon() -> Self { + match W { + 4 => bd_fn!(filter::decl_fn, BD, cdef_filter4, neon), + 8 => bd_fn!(filter::decl_fn, BD, cdef_filter8, neon), + _ => unreachable!(), + } + } + } + + #[deny(unsafe_op_in_unsafe_fn)] + pub unsafe extern "C" fn cdef_filter_neon_erased< + BD: BitDepth, + const W: usize, + const H: usize, + const TMP_STRIDE: usize, + const TMP_LEN: usize, + >( + dst: *mut DynPixel, + stride: ptrdiff_t, + left: *const [LeftPixelRow2px; 8], + top: *const DynPixel, + bottom: *const DynPixel, + pri_strength: c_int, + sec_strength: c_int, + dir: c_int, + damping: c_int, + edges: CdefEdgeFlags, + bitdepth_max: c_int, + _dst: *const FFISafe, + _top: *const FFISafe, + _bottom: *const FFISafe, + ) { + use crate::src::align::Align16; + + let dst = dst.cast(); + let left = left.cast(); + let top = top.cast(); + let bottom = bottom.cast(); + let bd = BD::from_c(bitdepth_max); + + let mut tmp_buf = Align16([0; TMP_LEN]); + let tmp = &mut tmp_buf.0[2 * TMP_STRIDE + 8..]; + padding::Fn::neon::().call::(tmp, dst, stride, left, top, bottom, H, edges); + filter::Fn::neon::().call( + dst, + stride, + tmp, + pri_strength, + sec_strength, + dir, + damping, + H, + edges, + bd, + ); + } } impl Rav1dCdefDSPContext { @@ -655,6 +732,8 @@ impl Rav1dCdefDSPContext { #[cfg(all(feature = "asm", any(target_arch = "arm", target_arch = "aarch64")))] #[inline(always)] const fn init_arm(mut self, flags: CpuFlags) -> Self { + use self::neon::cdef_filter_neon_erased; + if !flags.contains(CpuFlags::NEON) { return self; }