From 513891bfd54c01cb9dcc0400384f226fd329d571 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Mon, 23 Sep 2024 02:21:01 -0700 Subject: [PATCH] `fn DisjointMutArcSlice::new_zeroed_slice`: Add constructor that uses zeroed allocations to optimize out initialization, and use this on `f.mvs`. --- .github/workflows/build-and-benchmark-x86.yml | 2 +- .../build-and-test-aarch64-android.yml | 4 +-- .../build-and-test-aarch64-darwin.yml | 2 +- .../workflows/build-and-test-x86-extra.yml | 4 +-- .github/workflows/build-and-test-x86.yml | 6 ++-- lib.rs | 1 + src/decode.rs | 9 +++-- src/disjoint_mut.rs | 33 +++++++++++++++++++ src/refmvs.rs | 2 +- 9 files changed, 48 insertions(+), 15 deletions(-) diff --git a/.github/workflows/build-and-benchmark-x86.yml b/.github/workflows/build-and-benchmark-x86.yml index 865e05894..34dda02b5 100644 --- a/.github/workflows/build-and-benchmark-x86.yml +++ b/.github/workflows/build-and-benchmark-x86.yml @@ -15,7 +15,7 @@ jobs: - name: release build main branch run: | git fetch --depth 1 origin main && git checkout origin/main - nice cargo +stable build --release --target-dir target.main + nice cargo build --release --target-dir target.main - name: benchmark on chimera 8-bit test data run: | mkdir -p `dirname $LOCAL_FILE` diff --git a/.github/workflows/build-and-test-aarch64-android.yml b/.github/workflows/build-and-test-aarch64-android.yml index feccc1942..ccfb3dd4b 100644 --- a/.github/workflows/build-and-test-aarch64-android.yml +++ b/.github/workflows/build-and-test-aarch64-android.yml @@ -29,8 +29,8 @@ jobs: key: aarch64-android-cargo-and-target-${{ hashFiles('**/Cargo.lock') }} - name: cargo build for aarch64-linux-android run: | - rustup target add --toolchain stable aarch64-linux-android - cargo +stable build --target aarch64-linux-android --release + rustup target add aarch64-linux-android + cargo build --target aarch64-linux-android --release env: AR: llvm-ar CC: aarch64-linux-android26-clang diff --git a/.github/workflows/build-and-test-aarch64-darwin.yml b/.github/workflows/build-and-test-aarch64-darwin.yml index b341c5ad3..0f77c98ec 100644 --- a/.github/workflows/build-and-test-aarch64-darwin.yml +++ b/.github/workflows/build-and-test-aarch64-darwin.yml @@ -34,7 +34,7 @@ jobs: target/ key: arm-darwin-cargo-and-target-${{ hashFiles('**/Cargo.lock') }} - name: cargo ${{ matrix.build.name }} build for aarch64-apple-darwin - run: cargo +stable build ${{ matrix.build.cargo_flags }} + run: cargo build ${{ matrix.build.cargo_flags }} - name: test ${{ matrix.build.name }} build without frame delay run: | .github/workflows/test.sh \ diff --git a/.github/workflows/build-and-test-x86-extra.yml b/.github/workflows/build-and-test-x86-extra.yml index d7f02ea00..8dcf2be13 100644 --- a/.github/workflows/build-and-test-x86-extra.yml +++ b/.github/workflows/build-and-test-x86-extra.yml @@ -60,8 +60,8 @@ jobs: argon_coveragetool_av1_base_and_extended_profiles_v2.1.1.zip - name: cargo build for ${{ matrix.target }} ${{ matrix.build.name }} run: | - rustup target add --toolchain stable ${{ matrix.target }} - cargo +stable build --target ${{ matrix.target }} ${{ matrix.build.flags }} + rustup target add ${{ matrix.target }} + cargo build --target ${{ matrix.target }} ${{ matrix.build.flags }} env: RUSTFLAGS: "-C overflow-checks=on" - name: download, check, and unpack argon test vectors diff --git a/.github/workflows/build-and-test-x86.yml b/.github/workflows/build-and-test-x86.yml index 329e0a129..69f0bf339 100644 --- a/.github/workflows/build-and-test-x86.yml +++ b/.github/workflows/build-and-test-x86.yml @@ -57,8 +57,8 @@ jobs: - name: cargo build for ${{ matrix.target }} ${{ matrix.build.name }} run: | cargo clean - rustup target add --toolchain stable ${{ matrix.target }} - cargo +stable build --target ${{ matrix.target }} ${{ matrix.build.flags }} + rustup target add ${{ matrix.target }} + cargo build --target ${{ matrix.target }} ${{ matrix.build.flags }} - name: meson test for ${{ matrix.target }} ${{ matrix.build.name }} run: | .github/workflows/test.sh \ @@ -110,7 +110,7 @@ jobs: key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} - name: cargo build for x86_64-apple-darwin run: | - cargo +stable build --release + cargo build --release - name: meson test for x86_64-apple-darwin run: | .github/workflows/test.sh \ diff --git a/lib.rs b/lib.rs index 3bc3ec59c..c90270bf4 100644 --- a/lib.rs +++ b/lib.rs @@ -4,6 +4,7 @@ any(target_arch = "riscv32", target_arch = "riscv64"), feature(stdarch_riscv_feature_detection) )] +#![feature(new_uninit)] #![deny(unsafe_op_in_unsafe_fn)] #![allow(clippy::all)] #![deny(clippy::undocumented_unsafe_blocks)] diff --git a/src/decode.rs b/src/decode.rs index 6695e9798..2f29ab6fc 100644 --- a/src/decode.rs +++ b/src/decode.rs @@ -29,6 +29,7 @@ use crate::src::cdf::CdfThreadContext; use crate::src::ctx::CaseSet; use crate::src::dequant_tables::dav1d_dq_tbl; use crate::src::disjoint_mut::DisjointMut; +use crate::src::disjoint_mut::DisjointMutArcSlice; use crate::src::disjoint_mut::DisjointMutSlice; use crate::src::enum_map::enum_map; use crate::src::enum_map::enum_map_ty; @@ -5220,11 +5221,9 @@ pub fn rav1d_submit_frame(c: &Rav1dContext, state: &mut Rav1dState) -> Rav1dResu // ref_mvs if frame_hdr.frame_type.is_inter_or_switch() || frame_hdr.allow_intrabc { // TODO fallible allocation - f.mvs = Some( - (0..f.sb128h as usize * 16 * (f.b4_stride >> 1) as usize) - .map(|_| Default::default()) - .collect(), - ); + f.mvs = Some(DisjointMutArcSlice::new_zeroed_slice( + f.sb128h as usize * 16 * (f.b4_stride >> 1) as usize, + )); if !frame_hdr.allow_intrabc { for i in 0..7 { f.refpoc[i] = f.refp[i].p.frame_hdr.as_ref().unwrap().frame_offset as c_uint; diff --git a/src/disjoint_mut.rs b/src/disjoint_mut.rs index ea3e65f50..8a71be36b 100644 --- a/src/disjoint_mut.rs +++ b/src/disjoint_mut.rs @@ -13,6 +13,7 @@ use std::fmt::Formatter; use std::marker::PhantomData; use std::mem; use std::mem::ManuallyDrop; +use std::mem::MaybeUninit; use std::ops::Deref; use std::ops::DerefMut; use std::ops::Index; @@ -27,6 +28,7 @@ use std::ptr::addr_of_mut; use std::sync::Arc; use zerocopy::AsBytes; use zerocopy::FromBytes; +use zerocopy::FromZeroes; /// Wraps an indexable collection to allow unchecked concurrent mutable borrows. /// @@ -1212,3 +1214,34 @@ impl Default for DisjointMutArcSlice { [].into_iter().collect() } } + +impl DisjointMutArcSlice { + pub fn new_zeroed_slice(len: usize) -> Self { + #[cfg(debug_assertions)] + let inner = { + let box_slice = Box::new_zeroed_slice(len); + Arc::new(DisjointMut::new(box_slice)) + }; + #[cfg(not(debug_assertions))] + let inner = { + use std::mem; + + let arc_slice = Arc::<[T]>::new_zeroed_slice(len); + + // Do our best to check that `DisjointMut` is in fact `#[repr(transparent)]`. + type A = Vec; // Some concrete sized type. + const _: () = assert!(mem::size_of::>() == mem::size_of::()); + const _: () = assert!(mem::align_of::>() == mem::align_of::()); + + // SAFETY: When `#[cfg(not(debug_assertions))]`, `DisjointMut` is `#[repr(transparent)]`, + // containing only an `UnsafeCell`, which is also `#[repr(transparent)]`. + unsafe { mem::transmute::, Arc>>(arc_slice) } + }; + // SAFETY: `T: FromZeroes`, and the `MaybeUninit` is all zeros, + // since it is allocated with `new_zeroed_slice`. + let inner = unsafe { + mem::transmute::>>, Arc>>(inner) + }; + Self { inner } + } +} diff --git a/src/refmvs.rs b/src/refmvs.rs index fc2c2bc36..4ffec2c84 100644 --- a/src/refmvs.rs +++ b/src/refmvs.rs @@ -30,7 +30,7 @@ use std::ptr; use std::slice; use zerocopy::FromZeroes; -#[derive(Clone, Copy, Default, PartialEq, Eq)] +#[derive(Clone, Copy, Default, PartialEq, Eq, FromZeroes)] #[repr(C, packed)] pub struct RefMvsTemporalBlock { pub mv: Mv,