diff --git a/Cargo.toml b/Cargo.toml index bcddd7a..2f513ae 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "davis-edi-rs" -version = "0.2.5" +version = "0.2.6" edition = "2021" authors = ["Andrew C. Freeman"] description = "A fast, Rust-based, open-source implementation of the paper \"Bringing a Blurry Frame Alive at High Frame-Rate with an Event Camera\" (2019) by Pan et al." diff --git a/src/util/mod.rs b/src/util/mod.rs index 3beeb6b..07ab667 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -1,5 +1,10 @@ +use std::arch::x86_64::{_mm_loadu_pd, _mm_storeu_pd}; use nalgebra::{Dyn, OMatrix}; use opencv::core::{Mat, MatExprTraitConst, MatTrait, CV_64F}; +use std::arch::x86_64::*; +use std::arch::x86_64::{_mm256_loadu_pd, _mm256_storeu_pd}; + + pub(crate) mod event_adder; pub mod reconstructor; @@ -10,9 +15,24 @@ fn omatrix_to_mat(omatrix: &OMatrix) -> Mat { let cols = omatrix.ncols() as i32; let mut mat = Mat::zeros(rows, cols, CV_64F).unwrap().to_mat().unwrap(); - for i in 0..rows { - for j in 0..cols { - *mat.at_2d_mut::(i, j).unwrap() = omatrix[(i as usize, j as usize)]; + unsafe { + let mat_ptr = mat.ptr_mut(0).unwrap() as *mut f64; + let omatrix_ptr = omatrix.transpose().as_slice().as_ptr(); + + let len = (rows * cols) as isize; + let simd_width = 4; // Number of f64 values processed per SIMD operation + + let mut i = 0; + while i <= len - simd_width { + let data = _mm256_loadu_pd(omatrix_ptr.offset(i)); + _mm256_storeu_pd(mat_ptr.offset(i), data); + i += simd_width; + } + + // Handle remaining elements + while i < len { + *mat_ptr.offset(i) = *omatrix_ptr.offset(i); + i += 1; } }