From ac347f2c0e11f11fbed9f61a2f4f1f4535c3a562 Mon Sep 17 00:00:00 2001 From: Soumya Date: Sat, 28 Sep 2024 21:21:24 +0200 Subject: [PATCH 01/17] Added rayon to Cargo.toml - Will be used for parallelization --- Cargo.toml | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 89ef53a9..18f5455b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,14 @@ categories = ["science"] readme = "README.md" documentation = "https://axect.github.io/Peroxide_Doc" keywords = ["Numeric", "Science", "Dataframe", "Plot", "LinearAlgebra"] -exclude = ["example_data/", "src/bin/", "benches/", "example/", "test_data/", "peroxide-ad2"] +exclude = [ + "example_data/", + "src/bin/", + "benches/", + "example/", + "test_data/", + "peroxide-ad2", +] [badges] travis-ci = { repository = "axect/peroxide" } @@ -32,17 +39,27 @@ anyhow = "1.0" paste = "1.0" #num-complex = "0.3" netcdf = { version = "0.7", optional = true, default-features = false } -pyo3 = { version = "0.22", optional = true, features = ["auto-initialize", "gil-refs"] } +pyo3 = { version = "0.22", optional = true, features = [ + "auto-initialize", + "gil-refs", +] } blas = { version = "0.22", optional = true } lapack = { version = "0.19", optional = true } serde = { version = "1.0", features = ["derive"], optional = true } json = { version = "0.12", optional = true } -arrow2 = { version = "0.18", features = ["io_parquet", "io_parquet_compression"], optional = true } +arrow2 = { version = "0.18", features = [ + "io_parquet", + "io_parquet_compression", +], optional = true } num-complex = { version = "0.4", optional = true } -lambert_w = { version = "0.3.0", default-features = false, features = ["24bits", "50bits"] } +lambert_w = { version = "0.3.0", default-features = false, features = [ + "24bits", + "50bits", +] } +rayon = "1.10" [package.metadata.docs.rs] -rustdoc-args = [ "--html-in-header", "katex-header.html", "--cfg", "docsrs"] +rustdoc-args = ["--html-in-header", "katex-header.html", "--cfg", "docsrs"] [features] default = [] From 36fb8bfe3f5b2c9295a1186d4c517ce7cdfbbdef Mon Sep 17 00:00:00 2001 From: Soumya Date: Sun, 29 Sep 2024 00:21:45 +0200 Subject: [PATCH 02/17] Use rayon for FPVector - Parallelized impl FPVector for Vec using Rayon. - Added Sync+Send traits are needed --- src/lib.rs | 2 + src/numerical/integral.rs | 48 +++++++------- src/prelude/simpler.rs | 24 +++---- src/statistics/dist.rs | 131 ++++++++++++++++++++++---------------- src/statistics/rand.rs | 23 +++++-- src/structure/vector.rs | 67 ++++++++++++++----- src/traits/fp.rs | 10 +-- src/traits/sugar.rs | 22 ++++--- 8 files changed, 199 insertions(+), 128 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index e1929555..916878b5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -208,3 +208,5 @@ pub mod util; #[cfg(feature = "complex")] pub mod complex; + +extern crate rayon; diff --git a/src/numerical/integral.rs b/src/numerical/integral.rs index ac2165dd..a04d745f 100644 --- a/src/numerical/integral.rs +++ b/src/numerical/integral.rs @@ -161,19 +161,19 @@ impl Integral { /// * `G30K61R` pub fn integrate(f: F, (a, b): (f64, f64), method: Integral) -> f64 where - F: Fn(f64) -> f64 + Copy, + F: Fn(f64) -> f64 + Copy + Send + Sync, { match method { Integral::GaussLegendre(n) => gauss_legendre_quadrature(f, n, (a, b)), Integral::NewtonCotes(n) => newton_cotes_quadrature(f, n, (a, b)), - method => gauss_kronrod_quadrature(f, (a,b), method), + method => gauss_kronrod_quadrature(f, (a, b), method), } } /// Newton Cotes Quadrature pub fn newton_cotes_quadrature(f: F, n: usize, (a, b): (f64, f64)) -> f64 where - F: Fn(f64) -> f64, + F: Fn(f64) -> f64 + Send + Sync, { let h = (b - a) / (n as f64); let node_x = seq(a, b, h); @@ -217,9 +217,9 @@ where #[allow(non_snake_case)] pub fn gauss_kronrod_quadrature(f: F, (a, b): (T, S), method: Integral) -> f64 where - F: Fn(f64) -> f64 + Copy, - T: Into, - S: Into, + F: Fn(f64) -> f64 + Copy, + T: Into, + S: Into, { let (g, k) = method.get_gauss_kronrod_order(); let tol = method.get_tol(); @@ -234,13 +234,9 @@ where let G = gauss_legendre_quadrature(f, g as usize, (a, b)); let K = kronrod_quadrature(f, k as usize, (a, b)); let c = (a + b) / 2f64; - let tol_curr = if method.is_relative() { - tol * G - } else { - tol - }; + let tol_curr = if method.is_relative() { tol * G } else { tol }; if (G - K).abs() < tol_curr || a == b || max_iter == 0 { - if ! G.is_finite() { + if !G.is_finite() { return G; } I += G; @@ -255,11 +251,11 @@ where I } -pub fn kronrod_quadrature(f: F, n: usize, (a, b): (f64, f64)) -> f64 +pub fn kronrod_quadrature(f: F, n: usize, (a, b): (f64, f64)) -> f64 where F: Fn(f64) -> f64, { - (b - a) / 2f64 * unit_kronrod_quadrature(|x| f(x * (b-a) / 2f64 + (a + b) / 2f64), n) + (b - a) / 2f64 * unit_kronrod_quadrature(|x| f(x * (b - a) / 2f64 + (a + b) / 2f64), n) } // ============================================================================= @@ -382,9 +378,9 @@ fn unit_kronrod_quadrature(f: F, n: usize) -> f64 where F: Fn(f64) -> f64, { - let (a,x) = kronrod_table(n); + let (a, x) = kronrod_table(n); let mut s = 0f64; - for i in 0 .. a.len() { + for i in 0..a.len() { s += a[i] * f(x[i]); } s @@ -414,28 +410,28 @@ fn kronrod_table(n: usize) -> (Vec, Vec) { match n % 2 { 0 => { - for i in 0 .. ref_node.len() { + for i in 0..ref_node.len() { result_node[i] = ref_node[i]; result_weight[i] = ref_weight[i]; } - for i in ref_node.len() .. n { - result_node[i] = -ref_node[n-i-1]; - result_weight[i] = ref_weight[n-i-1]; + for i in ref_node.len()..n { + result_node[i] = -ref_node[n - i - 1]; + result_weight[i] = ref_weight[n - i - 1]; } } 1 => { - for i in 0 .. ref_node.len() { + for i in 0..ref_node.len() { result_node[i] = ref_node[i]; result_weight[i] = ref_weight[i]; } - for i in ref_node.len() .. n { - result_node[i] = -ref_node[n-i]; - result_weight[i] = ref_weight[n-i]; + for i in ref_node.len()..n { + result_node[i] = -ref_node[n - i]; + result_weight[i] = ref_weight[n - i]; } } - _ => unreachable!() + _ => unreachable!(), } (result_weight, result_node) } @@ -922,7 +918,7 @@ const LEGENDRE_WEIGHT_25: [f64; 13] = [ 0.054904695975835192, 0.040939156701306313, 0.026354986615032137, - 0.011393798501026288, + 0.011393798501026288, ]; const LEGENDRE_WEIGHT_26: [f64; 13] = [ 0.118321415279262277, diff --git a/src/prelude/simpler.rs b/src/prelude/simpler.rs index 819bc38f..5b786b6a 100644 --- a/src/prelude/simpler.rs +++ b/src/prelude/simpler.rs @@ -1,5 +1,3 @@ -#[cfg(feature="parquet")] -use std::error::Error; use crate::numerical::{ eigen, eigen::{Eigen, EigenMethod::Jacobi}, @@ -8,13 +6,15 @@ use crate::numerical::{ spline, spline::{CubicHermiteSpline, SlopeMethod::Quadratic}, }; +#[cfg(feature = "parquet")] +use crate::structure::dataframe::{DataFrame, WithParquet}; use crate::structure::matrix::{self, Matrix}; use crate::structure::polynomial; use crate::traits::math::{Norm, Normed}; -#[cfg(feature="parquet")] -use crate::structure::dataframe::{DataFrame, WithParquet}; -#[cfg(feature="parquet")] +#[cfg(feature = "parquet")] use arrow2::io::parquet::write::CompressionOptions; +#[cfg(feature = "parquet")] +use std::error::Error; /// Simple Norm pub trait SimpleNorm: Normed { @@ -23,7 +23,7 @@ pub trait SimpleNorm: Normed { } /// Simple integrate -pub fn integrate f64 + Copy>(f: F, (a, b): (f64, f64)) -> f64 { +pub fn integrate f64 + Copy + Send + Sync>(f: F, (a, b): (f64, f64)) -> f64 { integral::integrate(f, (a, b), G7K15R(1e-4, 20)) } @@ -35,7 +35,7 @@ pub trait SimplerLinearAlgebra { fn waz_diag(&self) -> Option; fn waz(&self) -> Option; fn qr(&self) -> matrix::QR; - #[cfg(feature="O3")] + #[cfg(feature = "O3")] fn cholesky(&self) -> Matrix; fn rref(&self) -> Matrix; fn det(&self) -> f64; @@ -99,7 +99,7 @@ impl SimplerLinearAlgebra for Matrix { matrix::LinearAlgebra::qr(self) } - #[cfg(feature="O3")] + #[cfg(feature = "O3")] fn cholesky(&self) -> Matrix { matrix::LinearAlgebra::cholesky(self, matrix::UPLO::Lower) } @@ -161,7 +161,7 @@ use crate::special::function::{ /// Returns [`NAN`](f64::NAN) if the given input is smaller than -1/e (≈ -0.36787944117144233). /// /// Accurate to 50 bits. -/// +/// /// Wrapper of `lambert_w_0` function of `lambert_w` crate. /// /// # Reference @@ -176,7 +176,7 @@ pub fn lambert_w0(z: f64) -> f64 { /// Returns [`NAN`](f64::NAN) if the given input is positive or smaller than -1/e (≈ -0.36787944117144233). /// /// Accurate to 50 bits. -/// +/// /// Wrapper of `lambert_w_m1` function of `lambert_w` crate. /// /// # Reference @@ -187,13 +187,13 @@ pub fn lambert_wm1(z: f64) -> f64 { } /// Simple handle parquet -#[cfg(feature="parquet")] +#[cfg(feature = "parquet")] pub trait SimpleParquet: Sized { fn write_parquet(&self, path: &str) -> Result<(), Box>; fn read_parquet(path: &str) -> Result>; } -#[cfg(feature="parquet")] +#[cfg(feature = "parquet")] impl SimpleParquet for DataFrame { fn write_parquet(&self, path: &str) -> Result<(), Box> { WithParquet::write_parquet(self, path, CompressionOptions::Uncompressed) diff --git a/src/statistics/dist.rs b/src/statistics/dist.rs index b409c197..d3f1f6cb 100644 --- a/src/statistics/dist.rs +++ b/src/statistics/dist.rs @@ -234,15 +234,15 @@ use self::rand::distributions::uniform::SampleUniform; use self::rand::prelude::*; pub use self::OPDist::*; pub use self::TPDist::*; -use crate::traits::fp::FPVector; use crate::special::function::*; +use crate::traits::fp::FPVector; //use statistics::rand::ziggurat; +use self::WeightedUniformError::*; use crate::statistics::{ops::C, stat::Statistics}; use crate::util::non_macro::{linspace, seq}; use crate::util::useful::{auto_zip, find_interval}; +use anyhow::{bail, Result}; use std::f64::consts::E; -use self::WeightedUniformError::*; -use anyhow::{Result, bail}; /// One parameter distribution /// @@ -271,7 +271,7 @@ pub enum TPDist> { pub struct WeightedUniform> { weights: Vec, sum: T, - intervals: Vec<(T, T)> + intervals: Vec<(T, T)>, } #[derive(Debug, Clone, Copy)] @@ -286,7 +286,9 @@ impl std::fmt::Display for WeightedUniformError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { WeightedUniformError::AllZeroWeightError => write!(f, "all weights are zero"), - WeightedUniformError::LengthMismatchError => write!(f, "weights and intervals have different length"), + WeightedUniformError::LengthMismatchError => { + write!(f, "weights and intervals have different length") + } WeightedUniformError::NoNonZeroIntervalError => write!(f, "no non-zero interval found"), WeightedUniformError::EmptyWeightError => write!(f, "weights are empty"), } @@ -295,11 +297,11 @@ impl std::fmt::Display for WeightedUniformError { impl WeightedUniform { /// Create a new weighted uniform distribution - /// + /// /// # Examples /// ``` /// use peroxide::fuga::*; - /// + /// /// fn main() -> Result<(), Box> { /// let weights = vec![1f64, 3f64, 0f64, 2f64]; /// let intervals = vec![0f64, 1f64, 2f64, 4f64, 5f64]; @@ -336,32 +338,31 @@ impl WeightedUniform { } } - let sum = weights.iter() + let sum = weights + .iter() .zip(intervals.iter()) .fold(0f64, |acc, (w, (a, b))| acc + w * (b - a)); - Ok( - WeightedUniform { - weights, - sum, - intervals - } - ) + Ok(WeightedUniform { + weights, + sum, + intervals, + }) } /// Create WeightedUniform from max pooling - /// + /// /// # Examples /// ``` /// use peroxide::fuga::*; - /// + /// /// fn main() -> Result<(), Box> { /// let w = WeightedUniform::from_max_pool_1d(f, (-2f64, 3f64), 10, 1e-3)?; /// w.weights().print(); /// /// Ok(()) /// } - /// + /// /// fn f(x: f64) -> f64 { /// if x.abs() < 1f64 { /// 1f64 - x.abs() @@ -371,40 +372,39 @@ impl WeightedUniform { /// } /// ``` pub fn from_max_pool_1d(f: F, (a, b): (f64, f64), n: usize, eps: f64) -> Result - where F: Fn(f64) -> f64 + Copy { + where + F: Fn(f64) -> f64 + Copy + Send + Sync, + { // Find non-zero intervals let mut a = a; let mut b = b; let trial = seq(a, b, eps); - for i in 0 .. trial.len() { + for i in 0..trial.len() { let x = trial[i]; if f(x) > 0f64 { - a = if i > 0 { trial[i-1] } else { x }; + a = if i > 0 { trial[i - 1] } else { x }; break; } } - for i in (0 .. trial.len()).rev() { + for i in (0..trial.len()).rev() { let x = trial[i]; if f(x) > 0f64 { - b = if i < trial.len() - 1 { trial[i+1] } else { x }; + b = if i < trial.len() - 1 { trial[i + 1] } else { x }; break; } } if a >= b { bail!(NoNonZeroIntervalError); } - let domain = linspace(a, b, n+1); + let domain = linspace(a, b, n + 1); // Find intervals let intervals = auto_zip(&domain); // Find weights - let weights: Vec = intervals.iter() - .map( - |(a, b)| { - seq(*a, *b+eps, eps).reduce(0f64, |acc, x| acc.max(f(x))) - } - ) + let weights: Vec = intervals + .iter() + .map(|(a, b)| seq(*a, *b + eps, eps).reduce(0f64, |acc, x| acc.max(f(x)))) .collect(); Self::new(weights, domain) @@ -419,11 +419,19 @@ impl WeightedUniform { } pub fn domain_linspace(&self, n: usize) -> Vec { - linspace(self.intervals[0].0, self.intervals[self.intervals.len()-1].1, n) + linspace( + self.intervals[0].0, + self.intervals[self.intervals.len() - 1].1, + n, + ) } pub fn domain_seq(&self, step: f64) -> Vec { - seq(self.intervals[0].0, self.intervals[self.intervals.len()-1].1, step) + seq( + self.intervals[0].0, + self.intervals[self.intervals.len() - 1].1, + step, + ) } pub fn sum(&self) -> f64 { @@ -433,15 +441,19 @@ impl WeightedUniform { pub fn update_weights(&mut self, weights: Vec) { assert_eq!(self.intervals.len(), weights.len()); self.weights = weights; - self.sum = self.weights.iter() + self.sum = self + .weights + .iter() .zip(self.intervals.iter()) .fold(0f64, |acc, (w, (a, b))| acc + w * (b - a)); } pub fn update_intervals(&mut self, intervals: Vec) { - assert_eq!(self.weights.len()+1, intervals.len()); + assert_eq!(self.weights.len() + 1, intervals.len()); self.intervals = auto_zip(&intervals); - self.sum = self.weights.iter() + self.sum = self + .weights + .iter() .zip(self.intervals.iter()) .fold(0f64, |acc, (w, (a, b))| acc + w * (b - a)); } @@ -493,7 +505,11 @@ impl> ParametricDist for Weight fn params(&self) -> Self::Parameter { let weights = self.weights.iter().map(|x| (*x).into()).collect(); - let intervals = self.intervals.iter().map(|(x, y)| ((*x).into(), (*y).into())).collect(); + let intervals = self + .intervals + .iter() + .map(|(x, y)| ((*x).into(), (*y).into())) + .collect(); (weights, intervals) } } @@ -619,11 +635,7 @@ impl> RNG for TPDist { Binomial(num, mu) => { let binom = rand_distr::Binomial::new(*num as u64, (*mu).into()).unwrap(); - binom - .sample_iter(rng) - .take(n) - .map(|t| t as f64) - .collect() + binom.sample_iter(rng).take(n).map(|t| t as f64).collect() } Normal(m, s) => { @@ -789,10 +801,12 @@ impl RNG for WeightedUniform { let ics: Vec = w.sample_iter(&mut rng_clip).take(n).collect(); *rng = rng_clip; - ics.into_iter().map(|idx| { - let (l, r) = self.intervals[idx]; - rng.gen_range(l ..=r) - }).collect::>() + ics.into_iter() + .map(|idx| { + let (l, r) = self.intervals[idx]; + rng.gen_range(l..=r) + }) + .collect::>() } fn pdf>(&self, x: S) -> f64 { @@ -812,11 +826,11 @@ impl RNG for WeightedUniform { return 1f64; } let idx = find_interval(self.intervals(), x); - self.weights[0 ..=idx].iter() - .zip(self.intervals[0 ..=idx].iter()) - .fold(0f64, |acc, (w, (a, b))| { - acc + w * (b - a) - }) / self.sum + self.weights[0..=idx] + .iter() + .zip(self.intervals[0..=idx].iter()) + .fold(0f64, |acc, (w, (a, b))| acc + w * (b - a)) + / self.sum } } @@ -912,16 +926,23 @@ impl Statistics for WeightedUniform { type Value = f64; fn mean(&self) -> Self::Value { - self.intervals().iter().zip(self.weights().iter()) - .map(|((l, r), w)| (r.powi(2)-l.powi(2))/2f64 * w) - .sum::() / self.sum + self.intervals() + .iter() + .zip(self.weights().iter()) + .map(|((l, r), w)| (r.powi(2) - l.powi(2)) / 2f64 * w) + .sum::() + / self.sum } fn var(&self) -> Self::Value { let mean = self.mean(); - self.intervals().iter().zip(self.weights().iter()) + self.intervals() + .iter() + .zip(self.weights().iter()) .map(|((l, r), w)| w * (r.powi(3) - l.powi(3)) / 3f64) - .sum::() / self.sum - mean * mean + .sum::() + / self.sum + - mean * mean } fn sd(&self) -> Self::Value { diff --git a/src/statistics/rand.rs b/src/statistics/rand.rs index 36590085..7b36bc1d 100644 --- a/src/statistics/rand.rs +++ b/src/statistics/rand.rs @@ -24,9 +24,9 @@ extern crate rand; use self::rand::distributions::uniform::SampleUniform; use self::rand::prelude::*; +use crate::statistics::dist::{WeightedUniform, RNG}; #[allow(unused_imports)] use crate::structure::matrix::*; -use crate::statistics::dist::{RNG, WeightedUniform}; /// Small random number generator from seed /// @@ -466,7 +466,9 @@ pub fn ziggurat(rng: &mut ThreadRng, sigma: f64) -> f64 { /// Ok(()) /// } pub fn prs(f: F, n: usize, (a, b): (f64, f64), m: usize, eps: f64) -> anyhow::Result> -where F: Fn(f64) -> f64 + Copy { +where + F: Fn(f64) -> f64 + Copy + Send + Sync, +{ let mut rng = thread_rng(); let mut result = vec![0f64; n]; @@ -482,7 +484,7 @@ where F: Fn(f64) -> f64 + Copy { if weight <= 0f64 { continue; } else { - let y = rng.gen_range(0f64 ..=weight); + let y = rng.gen_range(0f64..=weight); if y <= f(x) { result[n - left_num] = x; @@ -527,8 +529,17 @@ where F: Fn(f64) -> f64 + Copy { /// /// Ok(()) /// } -pub fn prs_with_rng(f: F, n: usize, (a, b): (f64, f64), m: usize, eps: f64, rng: &mut R) -> anyhow::Result> - where F: Fn(f64) -> f64 + Copy { +pub fn prs_with_rng( + f: F, + n: usize, + (a, b): (f64, f64), + m: usize, + eps: f64, + rng: &mut R, +) -> anyhow::Result> +where + F: Fn(f64) -> f64 + Copy + Send + Sync, +{ let mut result = vec![0f64; n]; let w = WeightedUniform::from_max_pool_1d(f, (a, b), m, eps)?; @@ -542,7 +553,7 @@ pub fn prs_with_rng(f: F, n: usize, (a, b): (f64, f64), m: us if weight <= 0f64 { continue; } else { - let y = rng.gen_range(0f64 ..=weight); + let y = rng.gen_range(0f64..=weight); if y <= f(x) { result[n - left_num] = x; diff --git a/src/structure/vector.rs b/src/structure/vector.rs index 0e977361..0d57ae42 100644 --- a/src/structure/vector.rs +++ b/src/structure/vector.rs @@ -267,6 +267,8 @@ extern crate blas; #[cfg(feature = "O3")] use blas::{daxpy, ddot, dnrm2, idamax}; +use rayon::prelude::*; + use crate::structure::matrix::{matrix, Matrix, Row}; use crate::traits::{ fp::FPVector, @@ -295,10 +297,14 @@ impl FPVector for Vec { /// ``` fn fmap(&self, f: F) -> Vec where - F: Fn(f64) -> f64, + F: Fn(f64) -> f64 + Sync + Send, { + // let mut v = self.clone(); + // v.iter_mut().for_each(|x| *x = f(*x)); + // v + let mut v = self.clone(); - v.iter_mut().for_each(|x| *x = f(*x)); + v.par_iter_mut().for_each(|x| *x = f(*x)); v } @@ -317,17 +323,27 @@ impl FPVector for Vec { /// ``` fn reduce(&self, init: T, f: F) -> f64 where - F: Fn(f64, f64) -> f64, - T: Into, + F: Fn(f64, f64) -> f64 + Send + Sync, + T: Into + Send + Sync + Copy, { self.iter().fold(init.into(), |x, &y| f(x, y)) + // self.par_iter() + // .cloned() + // .fold(|| init.into(), |x, y| f(x, y)) + // .sum::() // Combining fold and reduce to produce a single value + // // .reduce(|| init.into(), |x, y| f(x, y)) // can not use reduce instead of .sum(), since the fn f used might not be associative (e.g. check test_max_pool_1d) + // // https://docs.rs/rayon/latest/rayon/iter/trait.ParallelIterator.html#method.reduce } fn zip_with(&self, f: F, other: &Vec) -> Vec where - F: Fn(f64, f64) -> f64, + F: Fn(f64, f64) -> f64 + Send + Sync, { - self.iter() + // self.iter() + // .zip(other) + // .map(|(x, y)| f(*x, *y)) + // .collect::>() + self.par_iter() .zip(other) .map(|(x, y)| f(*x, *y)) .collect::>() @@ -349,10 +365,14 @@ impl FPVector for Vec { /// ``` fn filter(&self, f: F) -> Vec where - F: Fn(f64) -> bool, + F: Fn(f64) -> bool + Send + Sync, { + // self.clone() + // .into_iter() + // .filter(|x| f(*x)) + // .collect::>() self.clone() - .into_iter() + .into_par_iter() .filter(|x| f(*x)) .collect::>() } @@ -372,8 +392,16 @@ impl FPVector for Vec { /// } /// ``` fn take(&self, n: usize) -> Vec { - let mut v = vec![0f64; n]; - v[..n].copy_from_slice(&self[..n]); + // let mut v = vec![0f64; n]; + // v[..n].copy_from_slice(&self[..n]); + // v + let mut v = vec![0_f64; n]; + v[..n] + .par_iter_mut() + .zip(self[..n].par_iter()) + .for_each(|(dest, src)| { + *dest = *src; + }); // parallel equivalent of copy_from_slice() v } @@ -392,20 +420,29 @@ impl FPVector for Vec { /// } /// ``` fn skip(&self, n: usize) -> Vec { + // let l = self.len(); + // let mut v = vec![0f64; l - n]; + // for (i, j) in (n..l).enumerate() { + // v[i] = self[j]; + // } + // v let l = self.len(); let mut v = vec![0f64; l - n]; - for (i, j) in (n..l).enumerate() { - v[i] = self[j]; - } + v[..(l - n)] + .par_iter_mut() + .zip(self[n..l].par_iter()) + .for_each(|(dest, src)| *dest = *src); v } fn sum(&self) -> f64 { - self.iter().sum() + // self.iter().sum() + self.par_iter().sum() } fn prod(&self) -> f64 { - self.iter().product() + // self.iter().product() + self.par_iter().product() } } diff --git a/src/traits/fp.rs b/src/traits/fp.rs index 41f11463..9204df38 100644 --- a/src/traits/fp.rs +++ b/src/traits/fp.rs @@ -6,17 +6,17 @@ pub trait FPVector { fn fmap(&self, f: F) -> Self where - F: Fn(Self::Scalar) -> Self::Scalar; + F: Fn(Self::Scalar) -> Self::Scalar + Send + Sync; fn reduce(&self, init: T, f: F) -> Self::Scalar where - F: Fn(Self::Scalar, Self::Scalar) -> Self::Scalar, - T: Into; + F: Fn(Self::Scalar, Self::Scalar) -> Self::Scalar + Send + Sync, + T: Into + Send + Sync + Copy; fn zip_with(&self, f: F, other: &Self) -> Self where - F: Fn(Self::Scalar, Self::Scalar) -> Self::Scalar; + F: Fn(Self::Scalar, Self::Scalar) -> Self::Scalar + Send + Sync; fn filter(&self, f: F) -> Self where - F: Fn(Self::Scalar) -> bool; + F: Fn(Self::Scalar) -> bool + Send + Sync; fn take(&self, n: usize) -> Self; fn skip(&self, n: usize) -> Self; fn sum(&self) -> Self::Scalar; diff --git a/src/traits/sugar.rs b/src/traits/sugar.rs index a51d9269..7bb9b989 100644 --- a/src/traits/sugar.rs +++ b/src/traits/sugar.rs @@ -1,15 +1,19 @@ -use crate::structure::matrix::{Matrix, Shape, matrix}; +use crate::structure::matrix::{matrix, Matrix, Shape}; use crate::traits::fp::FPVector; use crate::util::non_macro::zeros_shape; -use std::ops::{Add, Sub, Mul, Div}; +use std::ops::{Add, Div, Mul, Sub}; /// Syntactic sugar for Vector operations -pub trait VecOps: Sized + FPVector -where Self::Scalar: Copy + Clone - + Add - + Sub - + Mul - + Div +pub trait VecOps: Sized + FPVector +where + Self::Scalar: Copy + + Clone + + Add + + Sub + + Mul + + Div + + Send + + Sync, { //type Scalar; fn add_v(&self, v: &Self) -> Self { @@ -449,7 +453,7 @@ impl ConvToMat for Vec { fn to_col(&self) -> Matrix { matrix(self.clone(), self.len(), 1, Shape::Col) } - + fn to_row(&self) -> Matrix { matrix(self.clone(), 1, self.len(), Shape::Row) } From e916c8858c4047d21c5a0553e2a12e93ee8ffeaf Mon Sep 17 00:00:00 2001 From: Soumya Date: Sun, 29 Sep 2024 11:13:20 +0200 Subject: [PATCH 03/17] Added parallelism to other VecOps - Used rayon to parallelize vector operations like innerprod, crossprod --- src/structure/vector.rs | 118 ++++++++++++++++++++++++++-------------- src/traits/mutable.rs | 4 +- 2 files changed, 79 insertions(+), 43 deletions(-) diff --git a/src/structure/vector.rs b/src/structure/vector.rs index 0d57ae42..58c4315e 100644 --- a/src/structure/vector.rs +++ b/src/structure/vector.rs @@ -277,7 +277,7 @@ use crate::traits::{ mutable::MutFP, pointer::{Oxide, Redox, RedoxCommon}, }; -use std::cmp::min; +// use std::cmp::min; impl FPVector for Vec { type Scalar = f64; @@ -449,14 +449,17 @@ impl FPVector for Vec { /// Explicit version of `map` pub fn map(f: F, xs: &[T]) -> Vec where - F: Fn(T) -> T, - T: Default + Copy, + F: Fn(T) -> T + Send + Sync, + T: Default + Copy + Send + Sync, { - let l = xs.len(); - let mut result = vec![T::default(); l]; - for i in 0..l { - result[i] = f(xs[i]); - } + // let l = xs.len(); + // let mut result = vec![T::default(); l]; + // for i in 0..l { + // result[i] = f(xs[i]); + // } + // result + let mut result = vec![T::default(); xs.len()]; + result.par_iter_mut().for_each(|x| *x = f(*x)); result } @@ -471,20 +474,25 @@ where s = f(s, x); } s + // Parallel version: !unimplemented() } /// Explicit version of `zip_with` pub fn zip_with(f: F, xs: &[T], ys: &[T]) -> Vec where - F: Fn(T, T) -> T, - T: Default + Copy, + F: Fn(T, T) -> T + Send + Sync, + T: Default + Copy + Send + Sync, { - let l = min(xs.len(), ys.len()); - let mut result = vec![T::default(); l]; - for i in 0..l { - result[i] = f(xs[i], ys[i]); - } - result + // let l = min(xs.len(), ys.len()); + // let mut result = vec![T::default(); l]; + // for i in 0..l { + // result[i] = f(xs[i], ys[i]); + // } + // result + xs.par_iter() + .zip(ys.into_par_iter()) + .map(|(x, y)| f(*x, *y)) + .collect::>() } impl MutFP for Vec { @@ -492,20 +500,24 @@ impl MutFP for Vec { fn mut_map(&mut self, f: F) where - F: Fn(Self::Scalar) -> Self::Scalar, + F: Fn(Self::Scalar) -> Self::Scalar + Send + Sync, { - for x in self.iter_mut() { - *x = f(*x); - } + // for x in self.iter_mut() { + // *x = f(*x); + // } + self.par_iter_mut().for_each(|x| *x = f(*x)); } fn mut_zip_with(&mut self, f: F, other: &Self) where - F: Fn(Self::Scalar, Self::Scalar) -> Self::Scalar, + F: Fn(Self::Scalar, Self::Scalar) -> Self::Scalar + Send + Sync, { - for i in 0..self.len() { - self[i] = f(self[i], other[i]); - } + // for i in 0..self.len() { + // self[i] = f(self[i], other[i]); + // } + self.par_iter_mut() + .zip(other.into_par_iter()) + .for_each(|(x, y)| *x = f(*x, *y)); } } @@ -705,7 +717,11 @@ impl Normed for Vec { } #[cfg(not(feature = "O3"))] { - self.iter().map(|x| x.powi(2)).sum::().sqrt() + // self.iter().map(|x| x.powi(2)).sum::().sqrt() + self.par_iter() + .map(|x| x.powi(2)) + .sum::() + .sqrt() } } Norm::Lp(p) => { @@ -714,7 +730,11 @@ impl Normed for Vec { "lp norm is only defined for p>=1, the given value was p={}", p ); - self.iter().map(|x| x.powf(p)).sum::().powf(1f64 / p) + // self.iter().map(|x| x.powf(p)).sum::().powf(1f64 / p) + self.par_iter() + .map(|x| x.powf(p)) + .sum::() + .powf(1_f64 / p) } Norm::LInf => self.iter().fold(0f64, |x, y| x.max(y.abs())), Norm::F => unimplemented!(), @@ -743,9 +763,13 @@ impl InnerProduct for Vec { } #[cfg(not(feature = "O3"))] { - self.iter() - .zip(rhs.iter()) - .fold(0f64, |x, (y1, y2)| x + y1 * y2) + // self.iter() + // .zip(rhs.iter()) + // .fold(0f64, |x, (y1, y2)| x + y1 * y2) + self.par_iter() + .zip(rhs.into_par_iter()) + .fold(|| 0_f64, |x, (y1, y2)| x + y1 * y2) + .sum::() } } } @@ -766,18 +790,30 @@ impl VectorProduct for Vec { fn cross(&self, other: &Self) -> Self { assert_eq!(self.len(), other.len()); // 2D cross product is ill-defined - if self.len() == 2 { - let mut v = vec![0f64; 1]; - v[0] = self[0] * other[1] - self[1] * other[0]; - v - } else if self.len() == 3 { - let mut v = vec![0f64; 3]; - v[0] = self[1] * other[2] - self[2] * other[1]; - v[1] = self[2] * other[0] - self[0] * other[2]; - v[2] = self[0] * other[1] - self[1] * other[0]; - v - } else { - panic!("Cross product can be defined only in 2 or 3 dimension") + match self.len() { + 2 => { + let mut v = vec![0f64; 1]; + v[0] = self[0] * other[1] - self[1] * other[0]; + v + } + 3 => { + // let mut v = vec![0f64; 3]; + // v[0] = self[1] * other[2] - self[2] * other[1]; + // v[1] = self[2] * other[0] - self[0] * other[2]; + // v[2] = self[0] * other[1] - self[1] * other[0]; + // v + let v = (0..3) + .into_par_iter() + .map(|index| { + self[(index + 1) % 3] * other[(index + 2) % 3] + - self[(index + 2) % 3] * other[(index + 1) % 3] + }) + .collect::>(); + v + } + _ => { + panic!("Cross product can be defined only in 2 or 3 dimension") + } } } diff --git a/src/traits/mutable.rs b/src/traits/mutable.rs index 7b0932fc..a8ef80a9 100644 --- a/src/traits/mutable.rs +++ b/src/traits/mutable.rs @@ -4,10 +4,10 @@ pub trait MutFP { type Scalar; fn mut_map(&mut self, f: F) where - F: Fn(Self::Scalar) -> Self::Scalar; + F: Fn(Self::Scalar) -> Self::Scalar + Send + Sync; fn mut_zip_with(&mut self, f: F, other: &Self) where - F: Fn(Self::Scalar, Self::Scalar) -> Self::Scalar; + F: Fn(Self::Scalar, Self::Scalar) -> Self::Scalar + Send + Sync; } pub trait MutMatrix { From 454f82c2d2dde5031166051a9cc33497a0b36184 Mon Sep 17 00:00:00 2001 From: Soumya Date: Sun, 29 Sep 2024 13:21:23 +0200 Subject: [PATCH 04/17] Intial attempt to parallelize matrix - Used rayon to parallelize impl Matrix --- src/structure/matrix.rs | 368 +++++++++++++++++++++++++--------------- src/structure/vector.rs | 2 +- 2 files changed, 228 insertions(+), 142 deletions(-) diff --git a/src/structure/matrix.rs b/src/structure/matrix.rs index 797792ca..4fa1f00f 100644 --- a/src/structure/matrix.rs +++ b/src/structure/matrix.rs @@ -192,7 +192,7 @@ //! To write columns or rows, `DataFrame` and `nc` feature could be the best choice. //! //! * `nc` feature should be required - `netcdf` or `libnetcdf` are prerequisites. -//! +//! //! ```no_run //! #[macro_use] //! extern crate peroxide; @@ -596,10 +596,10 @@ //! } //! ``` -#[cfg(feature="csv")] +#[cfg(feature = "csv")] extern crate csv; -#[cfg(feature="csv")] +#[cfg(feature = "csv")] use self::csv::{ReaderBuilder, StringRecord, WriterBuilder}; #[cfg(feature = "O3")] @@ -609,30 +609,32 @@ extern crate lapack; #[cfg(feature = "O3")] use blas::{daxpy, dgemm, dgemv}; #[cfg(feature = "O3")] -use lapack::{dgecon, dgeqrf, dgetrf, dgetri, dgetrs, dorgqr, dgesvd, dpotrf}; +use lapack::{dgecon, dgeqrf, dgesvd, dgetrf, dgetri, dgetrs, dorgqr, dpotrf}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; +use crate::rayon::prelude::*; + pub use self::Shape::{Col, Row}; use crate::numerical::eigen::{eigen, EigenMethod}; +use crate::structure::dataframe::{Series, TypedVector}; +use crate::traits::sugar::ScalableMut; use crate::traits::{ - general::Algorithm, fp::{FPMatrix, FPVector}, + general::Algorithm, math::{InnerProduct, LinearOp, MatrixProduct, Norm, Normed, Vector}, mutable::MutMatrix, }; use crate::util::{ - low_level::{swap_vec_ptr, copy_vec_ptr}, + low_level::{copy_vec_ptr, swap_vec_ptr}, non_macro::{cbind, eye, rbind, zeros}, useful::{nearly_eq, tab}, }; -use crate::structure::dataframe::{Series, TypedVector}; +use peroxide_num::{ExpLogOps, Numeric, PowOps, TrigOps}; use std::cmp::{max, min}; pub use std::error::Error; use std::fmt; use std::ops::{Add, Div, Index, IndexMut, Mul, Neg, Sub}; -use crate::traits::sugar::ScalableMut; -use peroxide_num::{ExpLogOps, PowOps, TrigOps, Numeric}; pub type Perms = Vec<(usize, usize)>; @@ -711,10 +713,10 @@ pub struct Matrix { /// ``` pub fn matrix(v: Vec, r: usize, c: usize, shape: Shape) -> Matrix where - T: Into, + T: Into + Send + Sync, { Matrix { - data: v.into_iter().map(|t| t.into()).collect::>(), + data: v.into_par_iter().map(|t| t.into()).collect::>(), row: r, col: c, shape, @@ -724,7 +726,7 @@ where /// R-like matrix constructor (Explicit ver.) pub fn r_matrix(v: Vec, r: usize, c: usize, shape: Shape) -> Matrix where - T: Into, + T: Into + Send + Sync, { matrix(v, r, c, shape) } @@ -745,17 +747,30 @@ where /// ``` pub fn py_matrix(v: Vec>) -> Matrix where - T: Into + Copy, + T: Into + Copy + Send + Sync, { + // let r = v.len(); + // let c = v[0].len(); + // let mut data = vec![0f64; r * c]; + // for i in 0..r { + // for j in 0..c { + // let idx = i * c + j; + // data[idx] = v[i][j].into(); + // } + // } + // matrix(data, r, c, Row) + let r = v.len(); let c = v[0].len(); - let mut data = vec![0f64; r * c]; - for i in 0..r { - for j in 0..c { - let idx = i * c + j; - data[idx] = v[i][j].into(); - } - } + let data = (0..r) + .into_par_iter() + .flat_map(|i| { + (0..c) + .into_par_iter() + .map(|j| v[i][j].into()) + .collect::>() + }) + .collect::>(); matrix(data, r, c, Row) } @@ -774,6 +789,23 @@ where /// } /// ``` pub fn ml_matrix(s: &str) -> Matrix where { + // let str_rows: Vec<&str> = s.split(';').collect(); + // let r = str_rows.len(); + // let str_data = str_rows + // .into_iter() + // .map(|x| x.trim().split(' ').collect::>()) + // .collect::>>(); + // let c = str_data[0].len(); + // let data = str_data + // .into_iter() + // .flat_map(|t| { + // t.into_iter() + // .map(|x| x.parse::().unwrap()) + // .collect::>() + // }) + // .collect::>(); + // matrix(data, r, c, Row) + let str_rows: Vec<&str> = s.split(';').collect(); let r = str_rows.len(); let str_data = str_rows @@ -781,10 +813,11 @@ pub fn ml_matrix(s: &str) -> Matrix where { .map(|x| x.trim().split(' ').collect::>()) .collect::>>(); let c = str_data[0].len(); + let data = str_data - .into_iter() + .into_par_iter() .flat_map(|t| { - t.into_iter() + t.into_par_iter() .map(|x| x.parse::().unwrap()) .collect::>() }) @@ -803,9 +836,15 @@ impl fmt::Display for Matrix { impl PartialEq for Matrix { fn eq(&self, other: &Matrix) -> bool { if self.shape == other.shape { + // self.data + // .clone() + // .into_iter() + // .zip(other.data.clone()) + // .all(|(x, y)| nearly_eq(x, y)) + // && self.row == other.row self.data .clone() - .into_iter() + .into_par_iter() .zip(other.data.clone()) .all(|(x, y)| nearly_eq(x, y)) && self.row == other.row @@ -876,23 +915,35 @@ impl Matrix { let c = self.col; assert_eq!(r * c, self.data.len()); let l = r * c - 1; - let mut data: Vec = self.data.clone(); + // let mut data: Vec = self.data.clone(); let ref_data = &self.data; match self.shape { Row => { - for i in 0..l { - let s = (i * c) % l; - data[i] = ref_data[s]; - } + // for i in 0..l { + // let s = (i * c) % l; + // data[i] = ref_data[s]; + // } + // data[l] = ref_data[l]; + // matrix(data, r, c, Col) + let mut data = (0..=l) + .into_par_iter() + .map(|i| ref_data[(i * c) % l]) + .collect::>(); data[l] = ref_data[l]; matrix(data, r, c, Col) } Col => { - for i in 0..l { - let s = (i * r) % l; - data[i] = ref_data[s]; - } + // for i in 0..l { + // let s = (i * r) % l; + // data[i] = ref_data[s]; + // } + // data[l] = ref_data[l]; + // matrix(data, r, c, Row) + let mut data = (0..=l) + .into_par_iter() + .map(|i| ref_data[(i * r) % l]) + .collect::>(); data[l] = ref_data[l]; matrix(data, r, c, Row) } @@ -907,10 +958,11 @@ impl Matrix { /// ``` /// use peroxide::fuga::*; /// - /// let a = matrix(vec![1,2,3,4],2,2,Row); + /// let mut a = matrix(vec![1,2,3,4,5,6,7,8,9],3,3,Row); /// assert_eq!(a.shape, Row); - /// let b = a.change_shape(); - /// assert_eq!(b.shape, Col); + /// a.change_shape_mut(); + /// assert_eq!(a, matrix(vec![1,4,7,2,5,8,3,6,9],3,3,Col)); + /// assert_eq!(a.shape, Col); /// ``` pub fn change_shape_mut(&mut self) { let r = self.row; @@ -921,18 +973,32 @@ impl Matrix { match self.shape { Row => { - for i in 0..l { - let s = (i * c) % l; - self.data[i] = ref_data[s]; - } + // for i in 0..l { + // let s = (i * c) % l; + // self.data[i] = ref_data[s]; + // } + // self.data[l] = ref_data[l]; + // self.shape = Col; + + self.data = (0..=l) + .into_par_iter() + .map(|i| ref_data[(i * c) % l]) + .collect::>(); self.data[l] = ref_data[l]; self.shape = Col; } Col => { - for i in 0..l { - let s = (i * r) % l; - self.data[i] = ref_data[s]; - } + // for i in 0..l { + // let s = (i * r) % l; + // self.data[i] = ref_data[s]; + // } + // self.data[l] = ref_data[l]; + // self.shape = Row; + + self.data = (0..=l) + .into_par_iter() + .map(|i| ref_data[(i * r) % l]) + .collect::>(); self.data[l] = ref_data[l]; self.shape = Row; } @@ -1041,10 +1107,14 @@ impl Matrix { /// ``` pub fn col(&self, index: usize) -> Vec { assert!(index < self.col); - let mut container: Vec = vec![0f64; self.row]; - for i in 0..self.row { - container[i] = self[(i, index)]; - } + // let mut container: Vec = vec![0f64; self.row]; + // for i in 0..self.row { + // container[i] = self[(i, index)]; + // } + let container = (0..self.row) + .into_par_iter() + .map(|i| self[(i, index)]) + .collect::>(); container } @@ -1063,10 +1133,16 @@ impl Matrix { /// ``` pub fn row(&self, index: usize) -> Vec { assert!(index < self.row); - let mut container: Vec = vec![0f64; self.col]; - for i in 0..self.col { - container[i] = self[(index, i)]; - } + // let mut container: Vec = vec![0f64; self.col]; + // for i in 0..self.col { + // container[i] = self[(index, i)]; + // } + // container + + let container = (0..self.col) + .into_par_iter() + .map(|i| self[(index, i)]) + .collect::>(); container } @@ -1084,14 +1160,17 @@ impl Matrix { /// } /// ``` pub fn diag(&self) -> Vec { - let mut container = vec![0f64; self.row]; - let r = self.row; - let c = self.col; - assert_eq!(r, c); - let c2 = c + 1; - for i in 0..r { - container[i] = self.data[i * c2]; - } + assert_eq!(self.row, self.col); + // let mut container = vec![0f64; self.row]; + // for i in 0..self.row { + // container[i] = self.data[i * (self.col + 1)]; + // } + // container + + let container = (0..self.row) + .into_par_iter() + .map(|i| self.data[i * (self.col + 1)]) + .collect::>(); container } @@ -1102,7 +1181,8 @@ impl Matrix { /// use peroxide::fuga::*; /// /// let a = matrix(vec![1,2,3,4], 2, 2, Row); - /// println!("{}", a); // [[1,3],[2,4]] + /// println!("{}", a.transpose()); // [[1,3],[2,4]] + /// assert_eq!(a.transpose(), matrix(vec![1,3,2,4], 2, 2, Row)) /// ``` pub fn transpose(&self) -> Self { match self.shape { @@ -1143,7 +1223,7 @@ impl Matrix { /// Ok(()) /// } /// ``` - #[cfg(feature="csv")] + #[cfg(feature = "csv")] pub fn write(&self, file_path: &str) -> Result<(), Box> { let mut wtr = WriterBuilder::new().from_path(file_path)?; let r = self.row; @@ -1174,7 +1254,7 @@ impl Matrix { /// Ok(()) /// } /// ``` - #[cfg(feature="csv")] + #[cfg(feature = "csv")] pub fn write_round(&self, file_path: &str, round: usize) -> Result<(), Box> { let mut wtr = WriterBuilder::new().from_path(file_path)?; let r = self.row; @@ -1190,7 +1270,7 @@ impl Matrix { Ok(()) } - #[cfg(feature="csv")] + #[cfg(feature = "csv")] pub fn write_with_header( &self, file_path: &str, @@ -1212,7 +1292,7 @@ impl Matrix { Ok(()) } - #[cfg(feature="csv")] + #[cfg(feature = "csv")] pub fn write_with_header_round( &self, file_path: &str, @@ -1260,7 +1340,7 @@ impl Matrix { /// Ok(()) /// } /// ``` - #[cfg(feature="csv")] + #[cfg(feature = "csv")] pub fn read(file_path: &str, header: bool, delimiter: char) -> Result> { let mut rdr = ReaderBuilder::new() .has_headers(header) @@ -1324,20 +1404,31 @@ impl Matrix { /// From index operations pub fn from_index(f: F, size: (usize, usize)) -> Matrix where - F: Fn(usize, usize) -> G + Copy, + F: Fn(usize, usize) -> G + Copy + Send + Sync, G: Into, { let row = size.0; let col = size.1; - let mut mat = matrix(vec![0f64; row * col], row, col, Row); - - for i in 0..row { - for j in 0..col { - mat[(i, j)] = f(i, j).into(); - } - } - mat + // let mut mat = matrix(vec![0f64; row * col], row, col, Row); + + // for i in 0..row { + // for j in 0..col { + // mat[(i, j)] = f(i, j).into(); + // } + // } + // mat + + let data = (0..row) + .into_par_iter() + .flat_map(|i| { + (0..col) + .into_par_iter() + .map(|j| f(i, j).into()) + .collect::>() + }) + .collect::>(); + matrix(data, row, col, Row) } /// Matrix to `Vec>` @@ -1365,7 +1456,7 @@ impl Matrix { /// /// # Description /// Return below elements of matrix to new matrix - /// + /// /// $$ /// \begin{pmatrix} /// \\ddots & & & & \\\\ @@ -1380,7 +1471,7 @@ impl Matrix { /// ``` /// extern crate peroxide; /// use peroxide::fuga::*; - /// + /// /// fn main() { /// let a = ml_matrix("1 2 3;4 5 6;7 8 9"); /// let b = ml_matrix("5 6;8 9"); @@ -1392,8 +1483,8 @@ impl Matrix { let row = end.0 - start.0 + 1; let col = end.1 - start.1 + 1; let mut result = matrix(vec![0f64; row * col], row, col, self.shape); - for i in 0 .. row { - for j in 0 .. col { + for i in 0..row { + for j in 0..col { result[(i, j)] = self[(start.0 + i, start.1 + j)]; } } @@ -1404,7 +1495,7 @@ impl Matrix { /// /// # Description /// Substitute below elements of matrix - /// + /// /// $$ /// \begin{pmatrix} /// \\ddots & & & & \\\\ @@ -1414,12 +1505,12 @@ impl Matrix { /// & & & & \\ddots /// \end{pmatrix} /// $$ - /// + /// /// # Examples /// ``` /// extern crate peroxide; /// use peroxide::fuga::*; - /// + /// /// fn main() { /// let mut a = ml_matrix("1 2 3;4 5 6;7 8 9"); /// let b = ml_matrix("1 2;3 4"); @@ -1431,8 +1522,8 @@ impl Matrix { pub fn subs_mat(&mut self, start: (usize, usize), end: (usize, usize), m: &Matrix) { let row = end.0 - start.0 + 1; let col = end.1 - start.1 + 1; - for i in 0 .. row { - for j in 0 .. col { + for i in 0..row { + for j in 0..col { self[(start.0 + i, start.1 + j)] = m[(i, j)]; } } @@ -2918,7 +3009,7 @@ impl SVD { pub fn s_mat(&self) -> Matrix { let mut mat = zeros(self.u.col, self.vt.row); - for i in 0 .. mat.row.min(mat.col) { + for i in 0..mat.row.min(mat.col) { mat[(i, i)] = self.s[i]; } mat @@ -2961,11 +3052,7 @@ impl SVD { } } - SVD { - s, - u, - vt, - } + SVD { s, u, vt } } } @@ -3151,7 +3238,7 @@ impl LinearAlgebra for Matrix { #[allow(non_snake_case)] fn qr(&self) -> QR { match () { - #[cfg(feature="O3")] + #[cfg(feature = "O3")] () => { let opt_dgeqrf = lapack_dgeqrf(self); match opt_dgeqrf { @@ -3159,10 +3246,7 @@ impl LinearAlgebra for Matrix { Some(dgeqrf) => { let q = dgeqrf.get_Q(); let r = dgeqrf.get_R(); - QR { - q, - r - } + QR { q, r } } } } @@ -3209,7 +3293,7 @@ impl LinearAlgebra for Matrix { /// ``` fn svd(&self) -> SVD { match () { - #[cfg(feature="O3")] + #[cfg(feature = "O3")] () => { let opt_dgesvd = lapack_dgesvd(self); match opt_dgesvd { @@ -3218,14 +3302,12 @@ impl LinearAlgebra for Matrix { SVD_STATUS::Diverge(i) => { panic!("Divergent occurs in SVD - {} iterations", i) } - SVD_STATUS::Success => { - SVD { - s: dgesvd.s, - u: dgesvd.u, - vt: dgesvd.vt, - } - } - } + SVD_STATUS::Success => SVD { + s: dgesvd.s, + u: dgesvd.u, + vt: dgesvd.vt, + }, + }, } } _ => { @@ -3465,7 +3547,9 @@ impl LinearAlgebra for Matrix { None => panic!("Singular matrix (opt_dgrf)"), Some(dgrf) => match dgrf.status { LAPACK_STATUS::NonSingular => lapack_dgetri(&dgrf).unwrap(), - LAPACK_STATUS::Singular => panic!("Singular matrix (LAPACK_STATUS Singular)"), + LAPACK_STATUS::Singular => { + panic!("Singular matrix (LAPACK_STATUS Singular)") + } }, } } @@ -3500,13 +3584,13 @@ impl LinearAlgebra for Matrix { /// ``` fn pseudo_inv(&self) -> Self { match () { - #[cfg(feature="O3")] + #[cfg(feature = "O3")] () => { let svd = self.svd(); let row = svd.vt.row; let col = svd.u.row; let mut sp = zeros(row, col); - for i in 0 .. row.min(col) { + for i in 0..row.min(col) { sp[(i, i)] = 1f64 / svd.s[i]; } svd.vt.t() * sp * svd.u.t() @@ -3620,9 +3704,9 @@ impl LinearAlgebra for Matrix { return false; } - for i in 0 .. self.row { - for j in i .. self.col { - if !nearly_eq(self[(i,j)], self[(j,i)]) { + for i in 0..self.row { + for j in i..self.col { + if !nearly_eq(self[(i, j)], self[(j, i)]) { return false; } } @@ -3722,7 +3806,6 @@ impl Div for Matrix { } } - impl ExpLogOps for Matrix { type Float = f64; fn exp(&self) -> Self { @@ -4248,7 +4331,7 @@ pub enum POSITIVE_STATUS { #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub enum UPLO { Upper, - Lower + Lower, } /// Temporary data structure from `dgetrf` @@ -4279,7 +4362,7 @@ pub struct DGESVD { pub struct DPOTRF { pub fact_mat: Matrix, pub uplo: UPLO, - pub status: POSITIVE_STATUS + pub status: POSITIVE_STATUS, } ///// Temporary data structure from `dgeev` @@ -4473,7 +4556,22 @@ pub fn lapack_dgesvd(mat: &Matrix) -> Option { // Workspace query unsafe { - dgesvd(jobu, jobvt, m, n, &mut A.data, lda, &mut s, &mut u, ldu, &mut vt, ldvt, &mut work, lwork, &mut info); + dgesvd( + jobu, + jobvt, + m, + n, + &mut A.data, + lda, + &mut s, + &mut u, + ldu, + &mut vt, + ldvt, + &mut work, + lwork, + &mut info, + ); } let optimal_lwork = work[0] as usize; @@ -4532,35 +4630,23 @@ pub fn lapack_dpotrf(mat: &Matrix, UPLO: UPLO) -> Option { let mut info = 0i32; let uplo = match UPLO { UPLO::Upper => b'U', - UPLO::Lower => b'L' + UPLO::Lower => b'L', }; - unsafe { - dpotrf( - uplo, - N, - &mut A.data, - lda, - &mut info, - ) - } + unsafe { dpotrf(uplo, N, &mut A.data, lda, &mut info) } if info == 0 { - Some( - DPOTRF { - fact_mat: matrix(A.data, mat.row, mat.col, Col), - uplo: UPLO, - status: POSITIVE_STATUS::Success - } - ) + Some(DPOTRF { + fact_mat: matrix(A.data, mat.row, mat.col, Col), + uplo: UPLO, + status: POSITIVE_STATUS::Success, + }) } else if info > 0 { - Some( - DPOTRF { - fact_mat: matrix(A.data, mat.row, mat.col, Col), - uplo: UPLO, - status: POSITIVE_STATUS::Failed(info) - } - ) + Some(DPOTRF { + fact_mat: matrix(A.data, mat.row, mat.col, Col), + uplo: UPLO, + status: POSITIVE_STATUS::Failed(info), + }) } else { None } @@ -4674,8 +4760,8 @@ impl DPOTRF { let mat = &self.fact_mat; let n = mat.col; let mut result = matrix(vec![0f64; n.pow(2)], n, n, mat.shape); - for i in 0 .. n { - for j in i .. n { + for i in 0..n { + for j in i..n { result[(i, j)] = mat[(i, j)]; } } @@ -4691,8 +4777,8 @@ impl DPOTRF { let n = mat.col; let mut result = matrix(vec![0f64; n.pow(2)], n, n, mat.shape); - for i in 0 .. n { - for j in 0 .. i+1 { + for i in 0..n { + for j in 0..i + 1 { result[(i, j)] = mat[(i, j)]; } } diff --git a/src/structure/vector.rs b/src/structure/vector.rs index 58c4315e..0300bc78 100644 --- a/src/structure/vector.rs +++ b/src/structure/vector.rs @@ -267,7 +267,7 @@ extern crate blas; #[cfg(feature = "O3")] use blas::{daxpy, ddot, dnrm2, idamax}; -use rayon::prelude::*; +use crate::rayon::prelude::*; use crate::structure::matrix::{matrix, Matrix, Row}; use crate::traits::{ From 1c4e0b305dfdf793af90b9e4716d733a615f8029 Mon Sep 17 00:00:00 2001 From: Soumya Date: Sun, 29 Sep 2024 17:18:46 +0200 Subject: [PATCH 05/17] Modified further traits for Matrix - Added rayon support for Matrix for traits like LinearOps, Normed etc --- src/structure/matrix.rs | 198 +++++++++++++++++++++++++++++----------- 1 file changed, 147 insertions(+), 51 deletions(-) diff --git a/src/structure/matrix.rs b/src/structure/matrix.rs index 4fa1f00f..de290215 100644 --- a/src/structure/matrix.rs +++ b/src/structure/matrix.rs @@ -1435,21 +1435,49 @@ impl Matrix { /// /// To send `Matrix` to `inline-python` pub fn to_vec(&self) -> Vec> { - let mut result = vec![vec![0f64; self.col]; self.row]; - for i in 0..self.row { - result[i] = self.row(i); - } - result + // let mut result = vec![vec![0f64; self.col]; self.row]; + // for i in 0..self.row { + // result[i] = self.row(i); + // } + // result + + (0..self.row) + .into_par_iter() + .map(|i| self.row(i)) + .collect::>>() } + /// To diagonal components + /// + /// # Examples + /// ``` + /// #[macro_use] + /// extern crate peroxide; + /// use peroxide::fuga::*; + /// + /// fn main() { + /// let a = matrix!(1;4;1, 2, 2, Row); + /// assert_eq!(a.to_diag(), matrix(c!(1,0,0,4), 2, 2, Row)); + /// } pub fn to_diag(&self) -> Matrix { assert_eq!(self.row, self.col, "Should be square matrix"); - let mut result = matrix(vec![0f64; self.row * self.col], self.row, self.col, Row); - let diag = self.diag(); - for i in 0..self.row { - result[(i, i)] = diag[i]; - } - result + // let mut result = matrix(vec![0f64; self.row * self.col], self.row, self.col, Row); + // let diag = self.diag(); + // for i in 0..self.row { + // result[(i, i)] = diag[i]; + // } + // result + + let data = (0..self.row) + .into_par_iter() + .flat_map(|i| { + (0..self.col) + .into_par_iter() + .map(|j| if i == j { self.diag()[i] } else { 0_f64 }) + .collect::>() + }) + .collect::>(); + matrix(data, self.row, self.col, Row) } /// Submatrix @@ -1482,13 +1510,24 @@ impl Matrix { pub fn submat(&self, start: (usize, usize), end: (usize, usize)) -> Matrix { let row = end.0 - start.0 + 1; let col = end.1 - start.1 + 1; - let mut result = matrix(vec![0f64; row * col], row, col, self.shape); - for i in 0..row { - for j in 0..col { - result[(i, j)] = self[(start.0 + i, start.1 + j)]; - } - } - result + // let mut result = matrix(vec![0f64; row * col], row, col, self.shape); + // for i in 0..row { + // for j in 0..col { + // result[(i, j)] = self[(start.0 + i, start.1 + j)]; + // } + // } + // result + + let data = (0..row) + .into_par_iter() + .flat_map(|i| { + (0..col) + .into_par_iter() + .map(|j| self[(start.0 + i, start.1 + j)]) + .collect::>() + }) + .collect::>(); + matrix(data, row, col, self.shape) } /// Substitute matrix to specific position @@ -1576,12 +1615,26 @@ impl Vector for Matrix { matrix(y, self.row, self.col, self.shape) } _ => { + // let mut result = matrix(self.data.clone(), self.row, self.col, self.shape); + // for i in 0..self.row { + // for j in 0..self.col { + // result[(i, j)] += other[(i, j)]; + // } + // } + // result + let mut result = matrix(self.data.clone(), self.row, self.col, self.shape); - for i in 0..self.row { - for j in 0..self.col { - result[(i, j)] += other[(i, j)]; - } - } + result + .data + .par_iter_mut() + .enumerate() + .for_each(|(idx, value)| { + let i = idx / self.col; + let j = idx % self.col; + + *value += other[(i, j)]; + }); + result } } @@ -1606,12 +1659,25 @@ impl Vector for Matrix { matrix(y, self.row, self.col, self.shape) } _ => { + // let mut result = matrix(self.data.clone(), self.row, self.col, self.shape); + // for i in 0..self.row { + // for j in 0..self.col { + // result[(i, j)] -= other[(i, j)]; + // } + // } + // result + let mut result = matrix(self.data.clone(), self.row, self.col, self.shape); - for i in 0..self.row { - for j in 0..self.col { - result[(i, j)] -= other[(i, j)]; - } - } + result + .data + .par_iter_mut() + .enumerate() + .for_each(|(idx, val)| { + let i = idx / self.col; + let j = idx % self.col; + *val -= other[(i, j)]; + }); + result } } @@ -1644,21 +1710,39 @@ impl Normed for Matrix { fn norm(&self, kind: Norm) -> f64 { match kind { Norm::F => { - let mut s = 0f64; - for i in 0..self.data.len() { - s += self.data[i].powi(2); - } + // let mut s = 0f64; + // for i in 0..self.data.len() { + // s += self.data[i].powi(2); + // } + // s.sqrt() + let s = self + .data + .clone() + .into_par_iter() + .fold(|| 0_f64, |acc, el| acc + el.powi(2)) + .sum::(); s.sqrt() } Norm::Lpq(p, q) => { - let mut s = 0f64; - for j in 0..self.col { - let mut s_row = 0f64; - for i in 0..self.row { - s_row += self[(i, j)].powi(p as i32); - } - s += s_row.powf(q / p); - } + // let mut s = 0f64; + // for j in 0..self.col { + // let mut s_row = 0f64; + // for i in 0..self.row { + // s_row += self[(i, j)].powi(p as i32); + // } + // s += s_row.powf(q / p); + // } + // s.powf(1f64 / q) + let s = (0..self.col) + .into_par_iter() + .map(|j| { + let s_row = (0..self.row) + .into_iter() + .map(|i| self[(i, j)].powi(p as i32)) + .sum::(); + s_row.powf(q / p) + }) + .sum::(); s.powf(1f64 / q) } Norm::L1 => { @@ -1667,7 +1751,8 @@ impl Normed for Matrix { Row => self.change_shape().norm(Norm::L1), Col => { for c in 0..self.col { - let s = self.col(c).iter().sum(); + // let s = self.col(c).iter().sum(); + let s = self.col(c).par_iter().sum(); if s > m { m = s; } @@ -1682,7 +1767,8 @@ impl Normed for Matrix { Col => self.change_shape().norm(Norm::LInf), Row => { for r in 0..self.row { - let s = self.row(r).iter().sum(); + // let s = self.row(r).iter().sum(); + let s = self.row(r).par_iter().sum(); if s > m { m = s; } @@ -1779,16 +1865,26 @@ impl MatrixProduct for Matrix { assert_eq!(self.row, other.row); assert_eq!(self.col, other.col); - let r = self.row; - let c = self.col; + // let r = self.row; + // let c = self.col; + // let mut m = matrix(vec![0f64; r * c], r, c, self.shape); + // for i in 0..r { + // for j in 0..c { + // m[(i, j)] = self[(i, j)] * other[(i, j)] + // } + // } + // m - let mut m = matrix(vec![0f64; r * c], r, c, self.shape); - for i in 0..r { - for j in 0..c { - m[(i, j)] = self[(i, j)] * other[(i, j)] - } - } - m + let m = (0..self.row) + .into_par_iter() + .flat_map(|i| { + (0..self.col) + .into_par_iter() + .map(|j| self[(i, j)] * other[(i, j)]) + .collect::>() + }) + .collect::>(); + matrix(m, self.row, self.col, self.shape) } } From 9d3a64931e8db78c34545d705dbd5692335d0d9f Mon Sep 17 00:00:00 2001 From: Soumya Date: Sun, 29 Sep 2024 19:13:24 +0200 Subject: [PATCH 06/17] Modified FPMatrix to use Rayon - Used rayon to parallelize Functional Programming for Matrix --- src/structure/matrix.rs | 197 ++++++++++++++++++++++++++++------------ src/traits/fp.rs | 12 +-- 2 files changed, 145 insertions(+), 64 deletions(-) diff --git a/src/structure/matrix.rs b/src/structure/matrix.rs index de290215..b53212ff 100644 --- a/src/structure/matrix.rs +++ b/src/structure/matrix.rs @@ -1952,12 +1952,24 @@ impl Add for Matrix { matrix(y, self.row, self.col, self.shape) } _ => { + // let mut result = matrix(self.data.clone(), self.row, self.col, self.shape); + // for i in 0..self.row { + // for j in 0..self.col { + // result[(i, j)] += other[(i, j)]; + // } + // } + // result let mut result = matrix(self.data.clone(), self.row, self.col, self.shape); - for i in 0..self.row { - for j in 0..self.col { - result[(i, j)] += other[(i, j)]; - } - } + result + .data + .par_iter_mut() + .enumerate() + .for_each(|(idx, value)| { + let i = idx / self.col; + let j = idx % self.col; + + *value += other[(i, j)]; + }); result } } @@ -1987,7 +1999,7 @@ impl<'a, 'b> Add<&'b Matrix> for &'a Matrix { /// ``` impl Add for Matrix where - T: Into + Copy, + T: Into + Copy + Send + Sync, { type Output = Self; fn add(self, other: T) -> Self { @@ -2010,7 +2022,7 @@ where /// Element-wise addition between &Matrix & f64 impl<'a, T> Add for &'a Matrix where - T: Into + Copy, + T: Into + Copy + Send + Sync, { type Output = Matrix; fn add(self, other: T) -> Self::Output { @@ -2149,7 +2161,11 @@ impl Neg for Matrix { matrix(y, self.row, self.col, self.shape) } _ => matrix( - self.data.into_iter().map(|x: f64| -x).collect::>(), + // self.data.into_iter().map(|x: f64| -x).collect::>(), + self.data + .into_par_iter() + .map(|x: f64| -x) + .collect::>(), self.row, self.col, self.shape, @@ -2175,9 +2191,14 @@ impl<'a> Neg for &'a Matrix { matrix(y, self.row, self.col, self.shape) } _ => matrix( + // self.data + // .clone() + // .into_iter() + // .map(|x: f64| -x) + // .collect::>(), self.data .clone() - .into_iter() + .into_par_iter() .map(|x: f64| -x) .collect::>(), self.row, @@ -2224,12 +2245,24 @@ impl Sub for Matrix { matrix(y, self.row, self.col, self.shape) } _ => { + // let mut result = matrix(self.data.clone(), self.row, self.col, self.shape); + // for i in 0..self.row { + // for j in 0..self.col { + // result[(i, j)] -= other[(i, j)]; + // } + // } + // result let mut result = matrix(self.data.clone(), self.row, self.col, self.shape); - for i in 0..self.row { - for j in 0..self.col { - result[(i, j)] -= other[(i, j)]; - } - } + result + .data + .par_iter_mut() + .enumerate() + .for_each(|(idx, value)| { + let i = idx / self.col; + let j = idx % self.col; + + *value -= other[(i, j)]; + }); result } } @@ -2247,7 +2280,7 @@ impl<'a, 'b> Sub<&'b Matrix> for &'a Matrix { /// Subtraction between Matrix & f64 impl Sub for Matrix where - T: Into + Copy, + T: Into + Copy + Send + Sync, { type Output = Self; @@ -2271,7 +2304,7 @@ where /// Subtraction between &Matrix & f64 impl<'a, T> Sub for &'a Matrix where - T: Into + Copy, + T: Into + Copy + Send + Sync, { type Output = Matrix; @@ -2757,16 +2790,23 @@ impl FPMatrix for Matrix { let new_data = self .data .clone() - .into_iter() + // .into_iter() + .into_par_iter() .take(n * self.col) .collect::>(); matrix(new_data, n, self.col, Row) } Col => { - let mut temp_data: Vec = Vec::new(); - for i in 0..n { - temp_data.extend(self.row(i)); - } + // let mut temp_data: Vec = Vec::new(); + // for i in 0..n { + // temp_data.extend(self.row(i)); + // } + // matrix(temp_data, n, self.col, Row) + + let temp_data = (0..n) + .into_par_iter() + .flat_map(|i| self.row(i)) + .collect::>(); matrix(temp_data, n, self.col, Row) } } @@ -2781,16 +2821,23 @@ impl FPMatrix for Matrix { let new_data = self .data .clone() - .into_iter() + // .into_iter() + .into_par_iter() .take(n * self.row) .collect::>(); matrix(new_data, self.row, n, Col) } Row => { - let mut temp_data: Vec = Vec::new(); - for i in 0..n { - temp_data.extend(self.col(i)); - } + // let mut temp_data: Vec = Vec::new(); + // for i in 0..n { + // temp_data.extend(self.col(i)); + // } + // matrix(temp_data, self.row, n, Col) + + let temp_data = (0..n) + .into_par_iter() + .flat_map(|i| self.col(i)) + .collect::>(); matrix(temp_data, self.row, n, Col) } } @@ -2799,28 +2846,43 @@ impl FPMatrix for Matrix { fn skip_row(&self, n: usize) -> Self { assert!(n < self.row, "Skip range is larger than row of matrix"); - let mut temp_data: Vec = Vec::new(); - for i in n..self.row { - temp_data.extend(self.row(i)); - } + // let mut temp_data: Vec = Vec::new(); + // for i in n..self.row { + // temp_data.extend(self.row(i)); + // } + // matrix(temp_data, self.row - n, self.col, Row) + + let temp_data = (n..self.row) + .into_par_iter() + .flat_map(|i| self.row(i)) + .collect::>(); matrix(temp_data, self.row - n, self.col, Row) } fn skip_col(&self, n: usize) -> Self { assert!(n < self.col, "Skip range is larger than col of matrix"); - let mut temp_data: Vec = Vec::new(); - for i in n..self.col { - temp_data.extend(self.col(i)); - } + // let mut temp_data: Vec = Vec::new(); + // for i in n..self.col { + // temp_data.extend(self.col(i)); + // } + // matrix(temp_data, self.row, self.col - n, Col) + + let temp_data = (n..self.col) + .into_par_iter() + .flat_map(|i| self.col(i)) + .collect::>(); matrix(temp_data, self.row, self.col - n, Col) } fn fmap(&self, f: F) -> Matrix where - F: Fn(f64) -> f64, + F: Fn(f64) -> f64 + Send + Sync, { - let result = self.data.iter().map(|x| f(*x)).collect::>(); + // let result = self.data.iter().map(|x| f(*x)).collect::>(); + // matrix(result, self.row, self.col, self.shape) + + let result = self.data.par_iter().map(|x| f(*x)).collect::>(); matrix(result, self.row, self.col, self.shape) } @@ -2842,11 +2904,9 @@ impl FPMatrix for Matrix { F: Fn(Vec) -> Vec, { let mut result = matrix(vec![0f64; self.row * self.col], self.row, self.col, Col); - for i in 0..self.col { result.subs_col(i, &f(self.col(i))); } - result } @@ -2868,11 +2928,9 @@ impl FPMatrix for Matrix { F: Fn(Vec) -> Vec, { let mut result = matrix(vec![0f64; self.row * self.col], self.row, self.col, Row); - for i in 0..self.row { result.subs_row(i, &f(self.row(i))); } - result } @@ -2908,25 +2966,38 @@ impl FPMatrix for Matrix { fn reduce(&self, init: T, f: F) -> f64 where - F: Fn(f64, f64) -> f64, - T: Into, + F: Fn(f64, f64) -> f64 + Send + Sync, + T: Into + Send + Sync + Copy + Clone, { - self.data.iter().fold(init.into(), |x, y| f(x, *y)) + // self.data.iter().fold(init.into(), |x, y| f(x, *y)) + self.data + .clone() + .into_par_iter() + .fold(|| init.into(), |acc, y| f(acc, y)) + .sum() } fn zip_with(&self, f: F, other: &Matrix) -> Self where - F: Fn(f64, f64) -> f64, + F: Fn(f64, f64) -> f64 + Send + Sync, { assert_eq!(self.data.len(), other.data.len()); let mut a = other.clone(); if self.shape != other.shape { a = a.change_shape(); } + // let result = self + // .data + // .iter() + // .zip(a.data.iter()) + // .map(|(x, y)| f(*x, *y)) + // .collect::>(); + // matrix(result, self.row, self.col, self.shape) + let result = self .data - .iter() - .zip(a.data.iter()) + .par_iter() + .zip(a.data.par_iter()) .map(|(x, y)| f(*x, *y)) .collect::>(); matrix(result, self.row, self.col, self.shape) @@ -2934,24 +3005,34 @@ impl FPMatrix for Matrix { fn col_reduce(&self, f: F) -> Vec where - F: Fn(Vec) -> f64, + F: Fn(Vec) -> f64 + Send + Sync, { - let mut v = vec![0f64; self.col]; - for i in 0..self.col { - v[i] = f(self.col(i)); - } - v + // let mut v = vec![0f64; self.col]; + // for i in 0..self.col { + // v[i] = f(self.col(i)); + // } + // v + + (0..self.col) + .into_par_iter() + .map(|i| f(self.col(i))) + .collect::>() } fn row_reduce(&self, f: F) -> Vec where - F: Fn(Vec) -> f64, + F: Fn(Vec) -> f64 + Send + Sync, { - let mut v = vec![0f64; self.row]; - for i in 0..self.row { - v[i] = f(self.row(i)); - } - v + // let mut v = vec![0f64; self.row]; + // for i in 0..self.row { + // v[i] = f(self.row(i)); + // } + // v + + (0..self.row) + .into_par_iter() + .map(|i| f(self.row(i))) + .collect::>() } } diff --git a/src/traits/fp.rs b/src/traits/fp.rs index 9204df38..1106f452 100644 --- a/src/traits/fp.rs +++ b/src/traits/fp.rs @@ -31,7 +31,7 @@ pub trait FPMatrix { fn skip_col(&self, n: usize) -> Matrix; fn fmap(&self, f: F) -> Matrix where - F: Fn(f64) -> f64; + F: Fn(f64) -> f64 + Send + Sync; fn col_map(&self, f: F) -> Matrix where F: Fn(Vec) -> Vec; @@ -46,15 +46,15 @@ pub trait FPMatrix { F: Fn(Vec) -> Vec; fn reduce(&self, init: T, f: F) -> f64 where - F: Fn(f64, f64) -> f64, - T: Into; + F: Fn(f64, f64) -> f64 + Send + Sync, + T: Into + Copy + Clone + Send + Sync; fn zip_with(&self, f: F, other: &Matrix) -> Matrix where - F: Fn(f64, f64) -> f64; + F: Fn(f64, f64) -> f64 + Send + Sync; fn col_reduce(&self, f: F) -> Vec where - F: Fn(Vec) -> f64; + F: Fn(Vec) -> f64 + Send + Sync; fn row_reduce(&self, f: F) -> Vec where - F: Fn(Vec) -> f64; + F: Fn(Vec) -> f64 + Send + Sync; } From bc070a7339d0c27cf2b8f70c4db477435e6239cd Mon Sep 17 00:00:00 2001 From: Soumya Date: Mon, 30 Sep 2024 10:57:23 +0200 Subject: [PATCH 07/17] Parallelized vec and matrix using Rayon --- src/structure/matrix.rs | 46 +++++++------ src/structure/vector.rs | 143 +++++++++++++++++++++++++++++++++------- 2 files changed, 144 insertions(+), 45 deletions(-) diff --git a/src/structure/matrix.rs b/src/structure/matrix.rs index b53212ff..bcb8bf49 100644 --- a/src/structure/matrix.rs +++ b/src/structure/matrix.rs @@ -789,13 +789,14 @@ where /// } /// ``` pub fn ml_matrix(s: &str) -> Matrix where { - // let str_rows: Vec<&str> = s.split(';').collect(); - // let r = str_rows.len(); - // let str_data = str_rows - // .into_iter() - // .map(|x| x.trim().split(' ').collect::>()) - // .collect::>>(); - // let c = str_data[0].len(); + let str_rows: Vec<&str> = s.split(';').collect(); + let r = str_rows.len(); + let str_data = str_rows + .into_iter() + .map(|x| x.trim().split(' ').collect::>()) + .collect::>>(); + let c = str_data[0].len(); + // let data = str_data // .into_iter() // .flat_map(|t| { @@ -806,14 +807,6 @@ pub fn ml_matrix(s: &str) -> Matrix where { // .collect::>(); // matrix(data, r, c, Row) - let str_rows: Vec<&str> = s.split(';').collect(); - let r = str_rows.len(); - let str_data = str_rows - .into_iter() - .map(|x| x.trim().split(' ').collect::>()) - .collect::>>(); - let c = str_data[0].len(); - let data = str_data .into_par_iter() .flat_map(|t| { @@ -2970,6 +2963,7 @@ impl FPMatrix for Matrix { T: Into + Send + Sync + Copy + Clone, { // self.data.iter().fold(init.into(), |x, y| f(x, *y)) + self.data .clone() .into_par_iter() @@ -3061,11 +3055,23 @@ pub trait LinearAlgebra { } pub fn diag(n: usize) -> Matrix { - let mut v: Vec = vec![0f64; n * n]; - for i in 0..n { - let idx = i * (n + 1); - v[idx] = 1f64; - } + // let mut v: Vec = vec![0f64; n * n]; + // for i in 0..n { + // let idx = i * (n + 1); + // v[idx] = 1f64; + // } + // matrix(v, n, n, Row) + + let v = (0..n * n) + .into_par_iter() + .map(|i| { + if i % (n + 1) == 0 { + 1_f64 // Set diagonal elements to 1 + } else { + 0_f64 // All other elements are 0 + } + }) + .collect::>(); matrix(v, n, n, Row) } diff --git a/src/structure/vector.rs b/src/structure/vector.rs index 0300bc78..ad00d09d 100644 --- a/src/structure/vector.rs +++ b/src/structure/vector.rs @@ -327,6 +327,7 @@ impl FPVector for Vec { T: Into + Send + Sync + Copy, { self.iter().fold(init.into(), |x, &y| f(x, y)) + // self.par_iter() // .cloned() // .fold(|| init.into(), |x, y| f(x, y)) @@ -539,15 +540,35 @@ impl Algorithm for Vec { let l = self.len(); let idx = (1..(l + 1)).collect::>(); + // let mut vec_tup = self + // .clone() + // .into_iter() + // .zip(idx.clone()) + // .collect::>(); + // vec_tup.sort_by(|x, y| x.0.partial_cmp(&y.0).unwrap().reverse()); + // let indices = vec_tup.into_iter().map(|(_, y)| y).collect::>(); + // idx.into_iter() + // .map(|x| indices.clone().into_iter().position(|t| t == x).unwrap()) + // .collect::>() + let mut vec_tup = self .clone() - .into_iter() + .into_par_iter() .zip(idx.clone()) .collect::>(); vec_tup.sort_by(|x, y| x.0.partial_cmp(&y.0).unwrap().reverse()); - let indices = vec_tup.into_iter().map(|(_, y)| y).collect::>(); - idx.into_iter() - .map(|x| indices.clone().into_iter().position(|t| t == x).unwrap()) + let indices = vec_tup + .into_par_iter() + .map(|(_, y)| y) + .collect::>(); + idx.into_par_iter() + .map(|x| { + indices + .clone() + .into_par_iter() + .position_any(|t| t == x) // Note: position() is deprecated by rayon + .unwrap() + }) .collect::>() } @@ -613,15 +634,41 @@ impl Algorithm for Vec { } #[cfg(not(feature = "O3"))] { - self.into_iter() + // self.into_iter() + // .enumerate() + // .fold((0usize, f64::MIN), |acc, (ics, &val)| { + // if acc.1 < val { + // (ics, val) + // } else { + // acc + // } + // }) + // .0 + + self.into_par_iter() .enumerate() - .fold((0usize, f64::MIN), |acc, (ics, &val)| { - if acc.1 < val { - (ics, val) - } else { - acc - } - }) + .fold( + || (0usize, f64::MIN), + |acc, (ics, &val)| { + if acc.1 < val { + (ics, val) + } else { + acc + } + }, + ) + .reduce( + // Note: need reduce and can nto simply do .max() because rayon::..max() requires Ord, which is stricter than PartialOrd + // Hence combine the results from the parallel fold + || (0usize, f64::MIN), // Identity element + |acc1, acc2| { + if acc1.1 < acc2.1 { + acc2 // Return the pair with the larger value + } else { + acc1 + } + }, + ) .0 } } @@ -642,15 +689,39 @@ impl Algorithm for Vec { /// assert_eq!(v2.arg_min(),4); /// } fn arg_min(&self) -> usize { - self.iter() + // self.iter() + // .enumerate() + // .fold((0usize, f64::MAX), |acc, (ics, &val)| { + // if acc.1 > val { + // (ics, val) + // } else { + // acc + // } + // }) + // .0 + + self.into_par_iter() .enumerate() - .fold((0usize, f64::MAX), |acc, (ics, &val)| { - if acc.1 > val { - (ics, val) - } else { - acc - } - }) + .fold( + || (0usize, f64::MAX), + |acc, (ics, &val)| { + if acc.1 > val { + (ics, val) + } else { + acc + } + }, + ) + .reduce( + || (0usize, f64::MAX), + |acc1, acc2| { + if acc1.1 > acc2.1 { + acc2 // Return the pair with the smaller value + } else { + acc1 + } + }, + ) .0 } @@ -666,14 +737,28 @@ impl Algorithm for Vec { } #[cfg(not(feature = "O3"))] { - self.into_iter() - .fold(f64::MIN, |acc, &val| if acc < val { val } else { acc }) + // self.into_iter() + // .fold(f64::MIN, |acc, &val| if acc < val { val } else { acc }) + + self.into_par_iter() + .fold(|| f64::MIN, |acc, &val| if acc < val { val } else { acc }) + .reduce( + || f64::MIN, + |acc1, acc2| if acc1 < acc2 { acc2 } else { acc1 }, + ) } } fn min(&self) -> f64 { - self.iter() - .fold(f64::MAX, |acc, &val| if acc > val { val } else { acc }) + // self.iter() + // .fold(f64::MAX, |acc, &val| if acc > val { val } else { acc }) + + self.into_par_iter() + .fold(|| f64::MAX, |acc, &val| if acc > val { val } else { acc }) + .reduce( + || f64::MAX, + |acc1, acc2| if acc1 > acc2 { acc2 } else { acc1 }, + ) } fn swap_with_perm(&mut self, p: &Vec<(usize, usize)>) { @@ -718,6 +803,7 @@ impl Normed for Vec { #[cfg(not(feature = "O3"))] { // self.iter().map(|x| x.powi(2)).sum::().sqrt() + self.par_iter() .map(|x| x.powi(2)) .sum::() @@ -731,12 +817,17 @@ impl Normed for Vec { p ); // self.iter().map(|x| x.powf(p)).sum::().powf(1f64 / p) + self.par_iter() .map(|x| x.powf(p)) .sum::() .powf(1_f64 / p) } - Norm::LInf => self.iter().fold(0f64, |x, y| x.max(y.abs())), + // Norm::LInf => self.iter().fold(0f64, |x, y| x.max(y.abs())), + Norm::LInf => self + .par_iter() + .fold(|| 0f64, |x, y| x.max(y.abs())) + .reduce(|| 0f64, |acc1, acc2| acc1.max(acc2.abs())), Norm::F => unimplemented!(), Norm::Lpq(_, _) => unimplemented!(), } @@ -766,6 +857,7 @@ impl InnerProduct for Vec { // self.iter() // .zip(rhs.iter()) // .fold(0f64, |x, (y1, y2)| x + y1 * y2) + self.par_iter() .zip(rhs.into_par_iter()) .fold(|| 0_f64, |x, (y1, y2)| x + y1 * y2) @@ -802,6 +894,7 @@ impl VectorProduct for Vec { // v[1] = self[2] * other[0] - self[0] * other[2]; // v[2] = self[0] * other[1] - self[1] * other[0]; // v + let v = (0..3) .into_par_iter() .map(|index| { From 60037918e1ca06142cb6692b557e701f604e3683 Mon Sep 17 00:00:00 2001 From: Soumya Date: Mon, 30 Sep 2024 21:23:07 +0200 Subject: [PATCH 08/17] [Feature] Added Parallel trait - Added parallel version of required traits for vec and matrix - Adjusted commented sections of the code --- src/numerical/integral.rs | 4 +- src/prelude/simpler.rs | 2 +- src/statistics/dist.rs | 2 +- src/statistics/rand.rs | 4 +- src/structure/matrix.rs | 931 ++++++++++++++++++++++++++++---------- src/structure/vector.rs | 593 ++++++++++++++++++------ src/traits/fp.rs | 74 ++- src/traits/general.rs | 13 + src/traits/math.rs | 32 ++ src/traits/mod.rs | 2 +- src/traits/mutable.rs | 15 +- src/traits/sugar.rs | 4 +- 12 files changed, 1262 insertions(+), 414 deletions(-) diff --git a/src/numerical/integral.rs b/src/numerical/integral.rs index a04d745f..d7650f3c 100644 --- a/src/numerical/integral.rs +++ b/src/numerical/integral.rs @@ -161,7 +161,7 @@ impl Integral { /// * `G30K61R` pub fn integrate(f: F, (a, b): (f64, f64), method: Integral) -> f64 where - F: Fn(f64) -> f64 + Copy + Send + Sync, + F: Fn(f64) -> f64 + Copy, { match method { Integral::GaussLegendre(n) => gauss_legendre_quadrature(f, n, (a, b)), @@ -173,7 +173,7 @@ where /// Newton Cotes Quadrature pub fn newton_cotes_quadrature(f: F, n: usize, (a, b): (f64, f64)) -> f64 where - F: Fn(f64) -> f64 + Send + Sync, + F: Fn(f64) -> f64, { let h = (b - a) / (n as f64); let node_x = seq(a, b, h); diff --git a/src/prelude/simpler.rs b/src/prelude/simpler.rs index 5b786b6a..c2c0c7d7 100644 --- a/src/prelude/simpler.rs +++ b/src/prelude/simpler.rs @@ -23,7 +23,7 @@ pub trait SimpleNorm: Normed { } /// Simple integrate -pub fn integrate f64 + Copy + Send + Sync>(f: F, (a, b): (f64, f64)) -> f64 { +pub fn integrate f64 + Copy>(f: F, (a, b): (f64, f64)) -> f64 { integral::integrate(f, (a, b), G7K15R(1e-4, 20)) } diff --git a/src/statistics/dist.rs b/src/statistics/dist.rs index d3f1f6cb..ee150aca 100644 --- a/src/statistics/dist.rs +++ b/src/statistics/dist.rs @@ -373,7 +373,7 @@ impl WeightedUniform { /// ``` pub fn from_max_pool_1d(f: F, (a, b): (f64, f64), n: usize, eps: f64) -> Result where - F: Fn(f64) -> f64 + Copy + Send + Sync, + F: Fn(f64) -> f64 + Copy, { // Find non-zero intervals let mut a = a; diff --git a/src/statistics/rand.rs b/src/statistics/rand.rs index 7b36bc1d..1e577fbe 100644 --- a/src/statistics/rand.rs +++ b/src/statistics/rand.rs @@ -467,7 +467,7 @@ pub fn ziggurat(rng: &mut ThreadRng, sigma: f64) -> f64 { /// } pub fn prs(f: F, n: usize, (a, b): (f64, f64), m: usize, eps: f64) -> anyhow::Result> where - F: Fn(f64) -> f64 + Copy + Send + Sync, + F: Fn(f64) -> f64 + Copy, { let mut rng = thread_rng(); @@ -538,7 +538,7 @@ pub fn prs_with_rng( rng: &mut R, ) -> anyhow::Result> where - F: Fn(f64) -> f64 + Copy + Send + Sync, + F: Fn(f64) -> f64 + Copy, { let mut result = vec![0f64; n]; diff --git a/src/structure/matrix.rs b/src/structure/matrix.rs index bcb8bf49..af3d98e8 100644 --- a/src/structure/matrix.rs +++ b/src/structure/matrix.rs @@ -614,6 +614,8 @@ use lapack::{dgecon, dgeqrf, dgesvd, dgetrf, dgetri, dgetrs, dorgqr, dpotrf}; use serde::{Deserialize, Serialize}; use crate::rayon::prelude::*; +use crate::traits::fp::ParallelFPMatrix; +use crate::traits::math::{ParallelMatrixProduct, ParallelNormed, ParallelVector}; pub use self::Shape::{Col, Row}; use crate::numerical::eigen::{eigen, EigenMethod}; @@ -712,6 +714,31 @@ pub struct Matrix { /// } /// ``` pub fn matrix(v: Vec, r: usize, c: usize, shape: Shape) -> Matrix +where + T: Into, +{ + Matrix { + data: v.into_iter().map(|t| t.into()).collect::>(), + row: r, + col: c, + shape, + } +} + +/// R-like matrix constructor in parallel +/// +/// # Examples +/// ``` +/// #[macro_use] +/// extern crate peroxide; +/// use peroxide::fuga::*; +/// +/// fn main() { +/// let a = par_matrix(c!(1,2,3,4), 2, 2, Row); +/// a.print(); // Print matrix +/// } +/// ``` +pub fn par_matrix(v: Vec, r: usize, c: usize, shape: Shape) -> Matrix where T: Into + Send + Sync, { @@ -726,11 +753,19 @@ where /// R-like matrix constructor (Explicit ver.) pub fn r_matrix(v: Vec, r: usize, c: usize, shape: Shape) -> Matrix where - T: Into + Send + Sync, + T: Into, { matrix(v, r, c, shape) } +/// R-like matrix constructor (Explicit ver.) in parallel +pub fn par_r_matrix(v: Vec, r: usize, c: usize, shape: Shape) -> Matrix +where + T: Into + Send + Sync, +{ + par_matrix(v, r, c, shape) +} + /// Python-like matrix constructor /// /// # Examples @@ -747,19 +782,38 @@ where /// ``` pub fn py_matrix(v: Vec>) -> Matrix where - T: Into + Copy + Send + Sync, + T: Into + Copy, { - // let r = v.len(); - // let c = v[0].len(); - // let mut data = vec![0f64; r * c]; - // for i in 0..r { - // for j in 0..c { - // let idx = i * c + j; - // data[idx] = v[i][j].into(); - // } - // } - // matrix(data, r, c, Row) + let r = v.len(); + let c = v[0].len(); + let mut data = vec![0f64; r * c]; + for i in 0..r { + for j in 0..c { + let idx = i * c + j; + data[idx] = v[i][j].into(); + } + } + matrix(data, r, c, Row) +} +/// Python-like matrix constructor in parallel +/// +/// # Examples +/// ``` +/// #[macro_use] +/// extern crate peroxide; +/// use peroxide::fuga::*; +/// +/// fn main() { +/// let a = par_py_matrix(vec![c!(1,2), c!(3,4)]); +/// let b = matrix(c!(1,2,3,4), 2, 2, Row); +/// assert_eq!(a, b); +/// } +/// ``` +pub fn par_py_matrix(v: Vec>) -> Matrix +where + T: Into + Copy + Send + Sync, +{ let r = v.len(); let c = v[0].len(); let data = (0..r) @@ -771,7 +825,7 @@ where .collect::>() }) .collect::>(); - matrix(data, r, c, Row) + par_matrix(data, r, c, Row) } /// Matlab-like matrix constructor @@ -797,15 +851,40 @@ pub fn ml_matrix(s: &str) -> Matrix where { .collect::>>(); let c = str_data[0].len(); - // let data = str_data - // .into_iter() - // .flat_map(|t| { - // t.into_iter() - // .map(|x| x.parse::().unwrap()) - // .collect::>() - // }) - // .collect::>(); - // matrix(data, r, c, Row) + let data = str_data + .into_iter() + .flat_map(|t| { + t.into_iter() + .map(|x| x.parse::().unwrap()) + .collect::>() + }) + .collect::>(); + + matrix(data, r, c, Row) +} + +/// Matlab-like matrix constructor in parallel +/// +/// # Examples +/// ``` +/// #[macro_use] +/// extern crate peroxide; +/// use peroxide::fuga::*; +/// +/// fn main() { +/// let a = par_ml_matrix("1 2; 3 4"); +/// let b = matrix(c!(1,2,3,4), 2, 2, Row); +/// assert_eq!(a, b); +/// } +/// ``` +pub fn par_ml_matrix(s: &str) -> Matrix where { + let str_rows: Vec<&str> = s.split(';').collect(); + let r = str_rows.len(); + let str_data = str_rows + .into_iter() + .map(|x| x.trim().split(' ').collect::>()) + .collect::>>(); + let c = str_data[0].len(); let data = str_data .into_par_iter() @@ -815,7 +894,8 @@ pub fn ml_matrix(s: &str) -> Matrix where { .collect::>() }) .collect::>(); - matrix(data, r, c, Row) + + par_matrix(data, r, c, Row) } /// Pretty Print @@ -835,6 +915,8 @@ impl PartialEq for Matrix { // .zip(other.data.clone()) // .all(|(x, y)| nearly_eq(x, y)) // && self.row == other.row + + // Note: the sequential version in this case can be replaced by the parallel version? self.data .clone() .into_par_iter() @@ -908,37 +990,65 @@ impl Matrix { let c = self.col; assert_eq!(r * c, self.data.len()); let l = r * c - 1; - // let mut data: Vec = self.data.clone(); + let mut data: Vec = self.data.clone(); + let ref_data = &self.data; + + match self.shape { + Row => { + for i in 0..l { + let s = (i * c) % l; + data[i] = ref_data[s]; + } + data[l] = ref_data[l]; + matrix(data, r, c, Col) + } + Col => { + for i in 0..l { + let s = (i * r) % l; + data[i] = ref_data[s]; + } + data[l] = ref_data[l]; + matrix(data, r, c, Row) + } + } + } + + /// Change Bindings in Parallel + /// + /// `Row` -> `Col` or `Col` -> `Row` + /// + /// # Examples + /// ``` + /// use peroxide::fuga::*; + /// + /// let a = matrix(vec![1,2,3,4],2,2,Row); + /// assert_eq!(a.shape, Row); + /// let b = a.par_change_shape(); + /// assert_eq!(b.shape, Col); + /// ``` + pub fn par_change_shape(&self) -> Self { + let r = self.row; + let c = self.col; + assert_eq!(r * c, self.data.len()); + let l = r * c - 1; let ref_data = &self.data; match self.shape { Row => { - // for i in 0..l { - // let s = (i * c) % l; - // data[i] = ref_data[s]; - // } - // data[l] = ref_data[l]; - // matrix(data, r, c, Col) let mut data = (0..=l) .into_par_iter() .map(|i| ref_data[(i * c) % l]) .collect::>(); data[l] = ref_data[l]; - matrix(data, r, c, Col) + par_matrix(data, r, c, Col) } Col => { - // for i in 0..l { - // let s = (i * r) % l; - // data[i] = ref_data[s]; - // } - // data[l] = ref_data[l]; - // matrix(data, r, c, Row) let mut data = (0..=l) .into_par_iter() .map(|i| ref_data[(i * r) % l]) .collect::>(); data[l] = ref_data[l]; - matrix(data, r, c, Row) + par_matrix(data, r, c, Row) } } } @@ -966,13 +1076,47 @@ impl Matrix { match self.shape { Row => { - // for i in 0..l { - // let s = (i * c) % l; - // self.data[i] = ref_data[s]; - // } - // self.data[l] = ref_data[l]; - // self.shape = Col; + for i in 0..l { + let s = (i * c) % l; + self.data[i] = ref_data[s]; + } + self.data[l] = ref_data[l]; + self.shape = Col; + } + Col => { + for i in 0..l { + let s = (i * r) % l; + self.data[i] = ref_data[s]; + } + self.data[l] = ref_data[l]; + self.shape = Row; + } + } + } + + /// Change Bindings Mutably in Parallel + /// + /// `Row` -> `Col` or `Col` -> `Row` + /// + /// # Examples + /// ``` + /// use peroxide::fuga::*; + /// + /// let mut a = matrix(vec![1,2,3,4,5,6,7,8,9],3,3,Row); + /// assert_eq!(a.shape, Row); + /// a.par_change_shape_mut(); + /// assert_eq!(a, matrix(vec![1,4,7,2,5,8,3,6,9],3,3,Col)); + /// assert_eq!(a.shape, Col); + /// ``` + pub fn par_change_shape_mut(&mut self) { + let r = self.row; + let c = self.col; + assert_eq!(r * c, self.data.len()); + let l = r * c - 1; + let ref_data = self.data.clone(); + match self.shape { + Row => { self.data = (0..=l) .into_par_iter() .map(|i| ref_data[(i * c) % l]) @@ -981,13 +1125,6 @@ impl Matrix { self.shape = Col; } Col => { - // for i in 0..l { - // let s = (i * r) % l; - // self.data[i] = ref_data[s]; - // } - // self.data[l] = ref_data[l]; - // self.shape = Row; - self.data = (0..=l) .into_par_iter() .map(|i| ref_data[(i * r) % l]) @@ -1100,10 +1237,29 @@ impl Matrix { /// ``` pub fn col(&self, index: usize) -> Vec { assert!(index < self.col); - // let mut container: Vec = vec![0f64; self.row]; - // for i in 0..self.row { - // container[i] = self[(i, index)]; - // } + let mut container: Vec = vec![0f64; self.row]; + for i in 0..self.row { + container[i] = self[(i, index)]; + } + container + } + + /// Extract Column in parallel + /// + /// # Examples + /// ``` + /// #[macro_use] + /// extern crate peroxide; + /// use peroxide::fuga::*; + /// + /// fn main() { + /// let a = matrix(c!(1,2,3,4), 2, 2, Row); + /// assert_eq!(a.pub_col(0), c!(1,3)); + /// } + /// ``` + pub fn pub_col(&self, index: usize) -> Vec { + assert!(index < self.col); + let container = (0..self.row) .into_par_iter() .map(|i| self[(i, index)]) @@ -1126,11 +1282,28 @@ impl Matrix { /// ``` pub fn row(&self, index: usize) -> Vec { assert!(index < self.row); - // let mut container: Vec = vec![0f64; self.col]; - // for i in 0..self.col { - // container[i] = self[(index, i)]; - // } - // container + let mut container: Vec = vec![0f64; self.col]; + for i in 0..self.col { + container[i] = self[(index, i)]; + } + container + } + + /// Extract Row in Parallel + /// + /// # Examples + /// ``` + /// #[macro_use] + /// extern crate peroxide; + /// use peroxide::fuga::*; + /// + /// fn main() { + /// let a = matrix(c!(1,2,3,4), 2, 2, Row); + /// assert_eq!(a.pub_row(0), c!(1,2)); + /// } + /// ``` + pub fn pub_row(&self, index: usize) -> Vec { + assert!(index < self.row); let container = (0..self.col) .into_par_iter() @@ -1154,11 +1327,28 @@ impl Matrix { /// ``` pub fn diag(&self) -> Vec { assert_eq!(self.row, self.col); - // let mut container = vec![0f64; self.row]; - // for i in 0..self.row { - // container[i] = self.data[i * (self.col + 1)]; - // } - // container + let mut container = vec![0f64; self.row]; + for i in 0..self.row { + container[i] = self.data[i * (self.col + 1)]; + } + container + } + + /// Extract diagonal components in parallel + /// + /// # Examples + /// ``` + /// #[macro_use] + /// extern crate peroxide; + /// use peroxide::fuga::*; + /// + /// fn main() { + /// let a = matrix!(1;4;1, 2, 2, Row); + /// assert_eq!(a.par_diag(), c!(1,4)); + /// } + /// ``` + pub fn par_diag(&self) -> Vec { + assert_eq!(self.row, self.col); let container = (0..self.row) .into_par_iter() @@ -1397,20 +1587,30 @@ impl Matrix { /// From index operations pub fn from_index(f: F, size: (usize, usize)) -> Matrix where - F: Fn(usize, usize) -> G + Copy + Send + Sync, + F: Fn(usize, usize) -> G + Copy, G: Into, { let row = size.0; let col = size.1; - // let mut mat = matrix(vec![0f64; row * col], row, col, Row); + let mut mat = matrix(vec![0f64; row * col], row, col, Row); - // for i in 0..row { - // for j in 0..col { - // mat[(i, j)] = f(i, j).into(); - // } - // } - // mat + for i in 0..row { + for j in 0..col { + mat[(i, j)] = f(i, j).into(); + } + } + mat + } + + /// From index operations + pub fn par_from_index(f: F, size: (usize, usize)) -> Matrix + where + F: Fn(usize, usize) -> G + Copy + Send + Sync, + G: Into, + { + let row = size.0; + let col = size.1; let data = (0..row) .into_par_iter() @@ -1421,19 +1621,24 @@ impl Matrix { .collect::>() }) .collect::>(); - matrix(data, row, col, Row) + par_matrix(data, row, col, Row) } /// Matrix to `Vec>` /// /// To send `Matrix` to `inline-python` pub fn to_vec(&self) -> Vec> { - // let mut result = vec![vec![0f64; self.col]; self.row]; - // for i in 0..self.row { - // result[i] = self.row(i); - // } - // result + let mut result = vec![vec![0f64; self.col]; self.row]; + for i in 0..self.row { + result[i] = self.row(i); + } + result + } + /// Matrix to `Vec>` in parallel + /// + /// To send `Matrix` to `inline-python` + pub fn par_to_vec(&self) -> Vec> { (0..self.row) .into_par_iter() .map(|i| self.row(i)) @@ -1454,12 +1659,28 @@ impl Matrix { /// } pub fn to_diag(&self) -> Matrix { assert_eq!(self.row, self.col, "Should be square matrix"); - // let mut result = matrix(vec![0f64; self.row * self.col], self.row, self.col, Row); - // let diag = self.diag(); - // for i in 0..self.row { - // result[(i, i)] = diag[i]; - // } - // result + let mut result = matrix(vec![0f64; self.row * self.col], self.row, self.col, Row); + let diag = self.diag(); + for i in 0..self.row { + result[(i, i)] = diag[i]; + } + result + } + + /// To diagonal components in parallel + /// + /// # Examples + /// ``` + /// #[macro_use] + /// extern crate peroxide; + /// use peroxide::fuga::*; + /// + /// fn main() { + /// let a = matrix!(1;4;1, 2, 2, Row); + /// assert_eq!(a.par_to_diag(), matrix(c!(1,0,0,4), 2, 2, Row)); + /// } + pub fn par_to_diag(&self) -> Matrix { + assert_eq!(self.row, self.col, "Should be square matrix"); let data = (0..self.row) .into_par_iter() @@ -1470,7 +1691,8 @@ impl Matrix { .collect::>() }) .collect::>(); - matrix(data, self.row, self.col, Row) + + par_matrix(data, self.row, self.col, Row) } /// Submatrix @@ -1503,13 +1725,45 @@ impl Matrix { pub fn submat(&self, start: (usize, usize), end: (usize, usize)) -> Matrix { let row = end.0 - start.0 + 1; let col = end.1 - start.1 + 1; - // let mut result = matrix(vec![0f64; row * col], row, col, self.shape); - // for i in 0..row { - // for j in 0..col { - // result[(i, j)] = self[(start.0 + i, start.1 + j)]; - // } - // } - // result + let mut result = matrix(vec![0f64; row * col], row, col, self.shape); + for i in 0..row { + for j in 0..col { + result[(i, j)] = self[(start.0 + i, start.1 + j)]; + } + } + result + } + + /// Submatrix in parallel + /// + /// # Description + /// Return below elements of matrix to new matrix + /// + /// $$ + /// \begin{pmatrix} + /// \\ddots & & & & \\\\ + /// & start & \\cdots & end.1 & \\\\ + /// & \\vdots & \\ddots & \\vdots & \\\\ + /// & end.0 & \\cdots & end & \\\\ + /// & & & & \\ddots + /// \end{pmatrix} + /// $$ + /// + /// # Examples + /// ``` + /// extern crate peroxide; + /// use peroxide::fuga::*; + /// + /// fn main() { + /// let a = ml_matrix("1 2 3;4 5 6;7 8 9"); + /// let b = ml_matrix("5 6;8 9"); + /// let c = a.par_submat((1, 1), (2, 2)); + /// assert_eq!(b, c); + /// } + /// ``` + pub fn par_submat(&self, start: (usize, usize), end: (usize, usize)) -> Matrix { + let row = end.0 - start.0 + 1; + let col = end.1 - start.1 + 1; let data = (0..row) .into_par_iter() @@ -1520,7 +1774,7 @@ impl Matrix { .collect::>() }) .collect::>(); - matrix(data, row, col, self.shape) + par_matrix(data, row, col, self.shape) } /// Substitute matrix to specific position @@ -1582,13 +1836,99 @@ impl Matrix { } } -// ============================================================================= -// Mathematics for Matrix -// ============================================================================= -impl Vector for Matrix { +// ============================================================================= +// Mathematics for Matrix +// ============================================================================= +impl Vector for Matrix { + type Scalar = f64; + + fn add_vec(&self, other: &Self) -> Self { + assert_eq!(self.row, other.row); + assert_eq!(self.col, other.col); + + match () { + #[cfg(feature = "O3")] + () => { + if self.shape != other.shape { + return self.add(&other.change_shape()); + } + let x = &self.data; + let mut y = other.data.clone(); + let n_i32 = x.len() as i32; + let a_f64 = 1f64; + unsafe { + daxpy(n_i32, a_f64, x, 1, &mut y, 1); + } + matrix(y, self.row, self.col, self.shape) + } + _ => { + let mut result = matrix(self.data.clone(), self.row, self.col, self.shape); + for i in 0..self.row { + for j in 0..self.col { + result[(i, j)] += other[(i, j)]; + } + } + result + } + } + } + + fn sub_vec(&self, other: &Self) -> Self { + assert_eq!(self.row, other.row); + assert_eq!(self.col, other.col); + match () { + #[cfg(feature = "O3")] + () => { + if self.shape != other.shape { + return self.sub(&other.change_shape()); + } + let x = &other.data; + let mut y = self.data.clone(); + let n_i32 = x.len() as i32; + let a_f64 = -1f64; + unsafe { + daxpy(n_i32, a_f64, x, 1, &mut y, 1); + } + matrix(y, self.row, self.col, self.shape) + } + _ => { + let mut result = matrix(self.data.clone(), self.row, self.col, self.shape); + for i in 0..self.row { + for j in 0..self.col { + result[(i, j)] -= other[(i, j)]; + } + } + result + } + } + } + + fn mul_scalar(&self, other: Self::Scalar) -> Self { + match () { + #[cfg(feature = "O3")] + () => { + let x = &self.data; + let mut y = vec![0f64; x.len()]; + let a_f64 = other; + let n_i32 = x.len() as i32; + + unsafe { + daxpy(n_i32, a_f64, x, 1, &mut y, 1); + } + matrix(y, self.row, self.col, self.shape) + } + _ => { + let scalar = other; + self.fmap(|x| x * scalar) + } + } + } +} + +impl ParallelVector for Matrix { type Scalar = f64; - fn add_vec(&self, other: &Self) -> Self { + fn par_add_vec(&self, other: &Self) -> Self { assert_eq!(self.row, other.row); assert_eq!(self.col, other.col); @@ -1608,14 +1948,6 @@ impl Vector for Matrix { matrix(y, self.row, self.col, self.shape) } _ => { - // let mut result = matrix(self.data.clone(), self.row, self.col, self.shape); - // for i in 0..self.row { - // for j in 0..self.col { - // result[(i, j)] += other[(i, j)]; - // } - // } - // result - let mut result = matrix(self.data.clone(), self.row, self.col, self.shape); result .data @@ -1633,7 +1965,7 @@ impl Vector for Matrix { } } - fn sub_vec(&self, other: &Self) -> Self { + fn par_sub_vec(&self, other: &Self) -> Self { assert_eq!(self.row, other.row); assert_eq!(self.col, other.col); match () { @@ -1652,14 +1984,6 @@ impl Vector for Matrix { matrix(y, self.row, self.col, self.shape) } _ => { - // let mut result = matrix(self.data.clone(), self.row, self.col, self.shape); - // for i in 0..self.row { - // for j in 0..self.col { - // result[(i, j)] -= other[(i, j)]; - // } - // } - // result - let mut result = matrix(self.data.clone(), self.row, self.col, self.shape); result .data @@ -1675,39 +1999,82 @@ impl Vector for Matrix { } } } +} - fn mul_scalar(&self, other: Self::Scalar) -> Self { - match () { - #[cfg(feature = "O3")] - () => { - let x = &self.data; - let mut y = vec![0f64; x.len()]; - let a_f64 = other; - let n_i32 = x.len() as i32; - - unsafe { - daxpy(n_i32, a_f64, x, 1, &mut y, 1); +impl Normed for Matrix { + type UnsignedScalar = f64; + fn norm(&self, kind: Norm) -> f64 { + match kind { + Norm::F => { + let mut s = 0f64; + for i in 0..self.data.len() { + s += self.data[i].powi(2); } - matrix(y, self.row, self.col, self.shape) + s.sqrt() } - _ => { - let scalar = other; - self.fmap(|x| x * scalar) + Norm::Lpq(p, q) => { + let mut s = 0f64; + for j in 0..self.col { + let mut s_row = 0f64; + for i in 0..self.row { + s_row += self[(i, j)].powi(p as i32); + } + s += s_row.powf(q / p); + } + s.powf(1f64 / q) + } + Norm::L1 => { + let mut m = f64::MIN; + match self.shape { + Row => self.change_shape().norm(Norm::L1), + Col => { + for c in 0..self.col { + let s = self.col(c).iter().sum(); + if s > m { + m = s; + } + } + m + } + } + } + Norm::LInf => { + let mut m = f64::MIN; + match self.shape { + Col => self.change_shape().norm(Norm::LInf), + Row => { + for r in 0..self.row { + let s = self.row(r).iter().sum(); + if s > m { + m = s; + } + } + m + } + } + } + Norm::L2 => { + let a = &self.t() * self; + let eig = eigen(&a, EigenMethod::Jacobi); + eig.eigenvalue.norm(Norm::LInf) } + Norm::Lp(_) => unimplemented!(), } } + fn normalize(&self, _kind: Norm) -> Self + where + Self: Sized, + { + unimplemented!() + } } -impl Normed for Matrix { +impl ParallelNormed for Matrix { type UnsignedScalar = f64; - fn norm(&self, kind: Norm) -> f64 { + + fn par_norm(&self, kind: Norm) -> f64 { match kind { Norm::F => { - // let mut s = 0f64; - // for i in 0..self.data.len() { - // s += self.data[i].powi(2); - // } - // s.sqrt() let s = self .data .clone() @@ -1717,15 +2084,6 @@ impl Normed for Matrix { s.sqrt() } Norm::Lpq(p, q) => { - // let mut s = 0f64; - // for j in 0..self.col { - // let mut s_row = 0f64; - // for i in 0..self.row { - // s_row += self[(i, j)].powi(p as i32); - // } - // s += s_row.powf(q / p); - // } - // s.powf(1f64 / q) let s = (0..self.col) .into_par_iter() .map(|j| { @@ -1744,7 +2102,6 @@ impl Normed for Matrix { Row => self.change_shape().norm(Norm::L1), Col => { for c in 0..self.col { - // let s = self.col(c).iter().sum(); let s = self.col(c).par_iter().sum(); if s > m { m = s; @@ -1760,7 +2117,6 @@ impl Normed for Matrix { Col => self.change_shape().norm(Norm::LInf), Row => { for r in 0..self.row { - // let s = self.row(r).iter().sum(); let s = self.row(r).par_iter().sum(); if s > m { m = s; @@ -1778,12 +2134,6 @@ impl Normed for Matrix { Norm::Lp(_) => unimplemented!(), } } - fn normalize(&self, _kind: Norm) -> Self - where - Self: Sized, - { - unimplemented!() - } } /// Frobenius inner product @@ -1858,15 +2208,22 @@ impl MatrixProduct for Matrix { assert_eq!(self.row, other.row); assert_eq!(self.col, other.col); - // let r = self.row; - // let c = self.col; - // let mut m = matrix(vec![0f64; r * c], r, c, self.shape); - // for i in 0..r { - // for j in 0..c { - // m[(i, j)] = self[(i, j)] * other[(i, j)] - // } - // } - // m + let r = self.row; + let c = self.col; + let mut m = matrix(vec![0f64; r * c], r, c, self.shape); + for i in 0..r { + for j in 0..c { + m[(i, j)] = self[(i, j)] * other[(i, j)] + } + } + m + } +} + +impl ParallelMatrixProduct for Matrix { + fn par_hadamard(&self, other: &Self) -> Matrix { + assert_eq!(self.row, other.row); + assert_eq!(self.col, other.col); let m = (0..self.row) .into_par_iter() @@ -1877,7 +2234,7 @@ impl MatrixProduct for Matrix { .collect::>() }) .collect::>(); - matrix(m, self.row, self.col, self.shape) + par_matrix(m, self.row, self.col, self.shape) } } @@ -1952,7 +2309,7 @@ impl Add for Matrix { // } // } // result - let mut result = matrix(self.data.clone(), self.row, self.col, self.shape); + let mut result = par_matrix(self.data.clone(), self.row, self.col, self.shape); result .data .par_iter_mut() @@ -2245,7 +2602,7 @@ impl Sub for Matrix { // } // } // result - let mut result = matrix(self.data.clone(), self.row, self.col, self.shape); + let mut result = par_matrix(self.data.clone(), self.row, self.col, self.shape); result .data .par_iter_mut() @@ -2783,23 +3140,16 @@ impl FPMatrix for Matrix { let new_data = self .data .clone() - // .into_iter() - .into_par_iter() + .into_iter() .take(n * self.col) .collect::>(); matrix(new_data, n, self.col, Row) } Col => { - // let mut temp_data: Vec = Vec::new(); - // for i in 0..n { - // temp_data.extend(self.row(i)); - // } - // matrix(temp_data, n, self.col, Row) - - let temp_data = (0..n) - .into_par_iter() - .flat_map(|i| self.row(i)) - .collect::>(); + let mut temp_data: Vec = Vec::new(); + for i in 0..n { + temp_data.extend(self.row(i)); + } matrix(temp_data, n, self.col, Row) } } @@ -2814,23 +3164,16 @@ impl FPMatrix for Matrix { let new_data = self .data .clone() - // .into_iter() - .into_par_iter() + .into_iter() .take(n * self.row) .collect::>(); matrix(new_data, self.row, n, Col) } Row => { - // let mut temp_data: Vec = Vec::new(); - // for i in 0..n { - // temp_data.extend(self.col(i)); - // } - // matrix(temp_data, self.row, n, Col) - - let temp_data = (0..n) - .into_par_iter() - .flat_map(|i| self.col(i)) - .collect::>(); + let mut temp_data: Vec = Vec::new(); + for i in 0..n { + temp_data.extend(self.col(i)); + } matrix(temp_data, self.row, n, Col) } } @@ -2839,43 +3182,28 @@ impl FPMatrix for Matrix { fn skip_row(&self, n: usize) -> Self { assert!(n < self.row, "Skip range is larger than row of matrix"); - // let mut temp_data: Vec = Vec::new(); - // for i in n..self.row { - // temp_data.extend(self.row(i)); - // } - // matrix(temp_data, self.row - n, self.col, Row) - - let temp_data = (n..self.row) - .into_par_iter() - .flat_map(|i| self.row(i)) - .collect::>(); + let mut temp_data: Vec = Vec::new(); + for i in n..self.row { + temp_data.extend(self.row(i)); + } matrix(temp_data, self.row - n, self.col, Row) } fn skip_col(&self, n: usize) -> Self { assert!(n < self.col, "Skip range is larger than col of matrix"); - // let mut temp_data: Vec = Vec::new(); - // for i in n..self.col { - // temp_data.extend(self.col(i)); - // } - // matrix(temp_data, self.row, self.col - n, Col) - - let temp_data = (n..self.col) - .into_par_iter() - .flat_map(|i| self.col(i)) - .collect::>(); + let mut temp_data: Vec = Vec::new(); + for i in n..self.col { + temp_data.extend(self.col(i)); + } matrix(temp_data, self.row, self.col - n, Col) } fn fmap(&self, f: F) -> Matrix where - F: Fn(f64) -> f64 + Send + Sync, + F: Fn(f64) -> f64, { - // let result = self.data.iter().map(|x| f(*x)).collect::>(); - // matrix(result, self.row, self.col, self.shape) - - let result = self.data.par_iter().map(|x| f(*x)).collect::>(); + let result = self.data.iter().map(|x| f(*x)).collect::>(); matrix(result, self.row, self.col, self.shape) } @@ -2958,12 +3286,136 @@ impl FPMatrix for Matrix { } fn reduce(&self, init: T, f: F) -> f64 + where + F: Fn(f64, f64) -> f64, + T: Into + Copy + Clone, + { + self.data.iter().fold(init.into(), |x, y| f(x, *y)) + } + + fn zip_with(&self, f: F, other: &Matrix) -> Self + where + F: Fn(f64, f64) -> f64, + { + assert_eq!(self.data.len(), other.data.len()); + let mut a = other.clone(); + if self.shape != other.shape { + a = a.change_shape(); + } + let result = self + .data + .iter() + .zip(a.data.iter()) + .map(|(x, y)| f(*x, *y)) + .collect::>(); + matrix(result, self.row, self.col, self.shape) + } + + fn col_reduce(&self, f: F) -> Vec + where + F: Fn(Vec) -> f64, + { + let mut v = vec![0f64; self.col]; + for i in 0..self.col { + v[i] = f(self.col(i)); + } + v + } + + fn row_reduce(&self, f: F) -> Vec + where + F: Fn(Vec) -> f64, + { + let mut v = vec![0f64; self.row]; + for i in 0..self.row { + v[i] = f(self.row(i)); + } + v + } +} + +impl ParallelFPMatrix for Matrix { + fn par_take_row(&self, n: usize) -> Self { + if n >= self.row { + return self.clone(); + } + match self.shape { + Row => { + let new_data = self + .data + .clone() + .into_par_iter() + .take(n * self.col) + .collect::>(); + par_matrix(new_data, n, self.col, Row) + } + Col => { + let temp_data = (0..n) + .into_par_iter() + .flat_map(|i| self.row(i)) + .collect::>(); + par_matrix(temp_data, n, self.col, Row) + } + } + } + + fn par_take_col(&self, n: usize) -> Self { + if n >= self.col { + return self.clone(); + } + match self.shape { + Col => { + let new_data = self + .data + .clone() + .into_par_iter() + .take(n * self.row) + .collect::>(); + par_matrix(new_data, self.row, n, Col) + } + Row => { + let temp_data = (0..n) + .into_par_iter() + .flat_map(|i| self.col(i)) + .collect::>(); + par_matrix(temp_data, self.row, n, Col) + } + } + } + + fn par_skip_row(&self, n: usize) -> Self { + assert!(n < self.row, "Skip range is larger than row of matrix"); + + let temp_data = (n..self.row) + .into_par_iter() + .flat_map(|i| self.row(i)) + .collect::>(); + par_matrix(temp_data, self.row - n, self.col, Row) + } + + fn par_skip_col(&self, n: usize) -> Self { + assert!(n < self.col, "Skip range is larger than col of matrix"); + + let temp_data = (n..self.col) + .into_par_iter() + .flat_map(|i| self.col(i)) + .collect::>(); + par_matrix(temp_data, self.row, self.col - n, Col) + } + + fn par_fmap(&self, f: F) -> Matrix + where + F: Fn(f64) -> f64 + Send + Sync, + { + let result = self.data.par_iter().map(|x| f(*x)).collect::>(); + par_matrix(result, self.row, self.col, self.shape) + } + + fn par_reduce(&self, init: T, f: F) -> f64 where F: Fn(f64, f64) -> f64 + Send + Sync, T: Into + Send + Sync + Copy + Clone, { - // self.data.iter().fold(init.into(), |x, y| f(x, *y)) - self.data .clone() .into_par_iter() @@ -2971,7 +3423,7 @@ impl FPMatrix for Matrix { .sum() } - fn zip_with(&self, f: F, other: &Matrix) -> Self + fn par_zip_with(&self, f: F, other: &Matrix) -> Self where F: Fn(f64, f64) -> f64 + Send + Sync, { @@ -2980,13 +3432,6 @@ impl FPMatrix for Matrix { if self.shape != other.shape { a = a.change_shape(); } - // let result = self - // .data - // .iter() - // .zip(a.data.iter()) - // .map(|(x, y)| f(*x, *y)) - // .collect::>(); - // matrix(result, self.row, self.col, self.shape) let result = self .data @@ -2994,35 +3439,23 @@ impl FPMatrix for Matrix { .zip(a.data.par_iter()) .map(|(x, y)| f(*x, *y)) .collect::>(); - matrix(result, self.row, self.col, self.shape) + par_matrix(result, self.row, self.col, self.shape) } - fn col_reduce(&self, f: F) -> Vec + fn par_col_reduce(&self, f: F) -> Vec where F: Fn(Vec) -> f64 + Send + Sync, { - // let mut v = vec![0f64; self.col]; - // for i in 0..self.col { - // v[i] = f(self.col(i)); - // } - // v - (0..self.col) .into_par_iter() .map(|i| f(self.col(i))) .collect::>() } - fn row_reduce(&self, f: F) -> Vec + fn par_row_reduce(&self, f: F) -> Vec where F: Fn(Vec) -> f64 + Send + Sync, { - // let mut v = vec![0f64; self.row]; - // for i in 0..self.row { - // v[i] = f(self.row(i)); - // } - // v - (0..self.row) .into_par_iter() .map(|i| f(self.row(i))) @@ -3055,13 +3488,15 @@ pub trait LinearAlgebra { } pub fn diag(n: usize) -> Matrix { - // let mut v: Vec = vec![0f64; n * n]; - // for i in 0..n { - // let idx = i * (n + 1); - // v[idx] = 1f64; - // } - // matrix(v, n, n, Row) + let mut v: Vec = vec![0f64; n * n]; + for i in 0..n { + let idx = i * (n + 1); + v[idx] = 1f64; + } + matrix(v, n, n, Row) +} +pub fn par_diag(n: usize) -> Matrix { let v = (0..n * n) .into_par_iter() .map(|i| { @@ -3072,7 +3507,7 @@ pub fn diag(n: usize) -> Matrix { } }) .collect::>(); - matrix(v, n, n, Row) + par_matrix(v, n, n, Row) } /// Data structure for Complete Pivoting LU decomposition diff --git a/src/structure/vector.rs b/src/structure/vector.rs index ad00d09d..aaa939d5 100644 --- a/src/structure/vector.rs +++ b/src/structure/vector.rs @@ -270,6 +270,10 @@ use blas::{daxpy, ddot, dnrm2, idamax}; use crate::rayon::prelude::*; use crate::structure::matrix::{matrix, Matrix, Row}; +use crate::traits::fp::ParallelFPVector; +use crate::traits::general::ParallelAlgorithm; +use crate::traits::math::{ParallelInnerProduct, ParallelNormed, ParallelVectorProduct}; +use crate::traits::mutable::ParallelMutFP; use crate::traits::{ fp::FPVector, general::Algorithm, @@ -277,7 +281,7 @@ use crate::traits::{ mutable::MutFP, pointer::{Oxide, Redox, RedoxCommon}, }; -// use std::cmp::min; +use std::cmp::min; impl FPVector for Vec { type Scalar = f64; @@ -297,14 +301,10 @@ impl FPVector for Vec { /// ``` fn fmap(&self, f: F) -> Vec where - F: Fn(f64) -> f64 + Sync + Send, + F: Fn(f64) -> f64, { - // let mut v = self.clone(); - // v.iter_mut().for_each(|x| *x = f(*x)); - // v - let mut v = self.clone(); - v.par_iter_mut().for_each(|x| *x = f(*x)); + v.iter_mut().for_each(|x| *x = f(*x)); v } @@ -322,12 +322,148 @@ impl FPVector for Vec { /// } /// ``` fn reduce(&self, init: T, f: F) -> f64 + where + F: Fn(f64, f64) -> f64, + T: Into + Copy, + { + self.iter().fold(init.into(), |x, &y| f(x, y)) + } + + fn zip_with(&self, f: F, other: &Vec) -> Vec + where + F: Fn(f64, f64) -> f64, + { + self.iter() + .zip(other) + .map(|(x, y)| f(*x, *y)) + .collect::>() + } + + /// Filter for `Vec` + /// + /// # Examples + /// ``` + /// #[macro_use] + /// extern crate peroxide; + /// use peroxide::fuga::*; + /// + /// fn main() { + /// let a = c!(1,2,3,4,5); + /// let b = a.filter(|x| x > 3.); + /// assert_eq!(b, c!(4,5)); + /// } + /// ``` + fn filter(&self, f: F) -> Vec + where + F: Fn(f64) -> bool, + { + self.clone() + .into_iter() + .filter(|x| f(*x)) + .collect::>() + } + + /// Take for `Vec` + /// + /// # Examples + /// ``` + /// #[macro_use] + /// extern crate peroxide; + /// use peroxide::fuga::*; + /// + /// fn main() { + /// let a = c!(1,2,3,4,5); + /// let b = a.take(3); + /// assert_eq!(b, c!(1,2,3)); + /// } + /// ``` + fn take(&self, n: usize) -> Vec { + let mut v = vec![0f64; n]; + v[..n].copy_from_slice(&self[..n]); + v + } + + /// Skip for `Vec` + /// + /// # Examples + /// ``` + /// #[macro_use] + /// extern crate peroxide; + /// use peroxide::fuga::*; + /// + /// fn main() { + /// let a = c!(1,2,3,4,5); + /// let b = a.skip(3); + /// assert_eq!(b, c!(4,5)); + /// } + /// ``` + fn skip(&self, n: usize) -> Vec { + let l = self.len(); + let mut v = vec![0f64; l - n]; + for (i, j) in (n..l).enumerate() { + v[i] = self[j]; + } + v + } + + fn sum(&self) -> f64 { + self.iter().sum() + } + + fn prod(&self) -> f64 { + self.iter().product() + } +} + +impl ParallelFPVector for Vec { + type Scalar = f64; + + /// par_fmap for `Vec` + /// + /// # Examples + /// ``` + /// #[macro_use] + /// extern crate peroxide; + /// use peroxide::fuga::*; + /// use peroxide::traits::fp::ParallelFPVector; + /// + /// fn main() { + /// let a = c!(1,2,3,4,5); + /// assert_eq!(a.par_fmap(|x| x*2f64), seq!(2,10,2)); + /// } + /// ``` + fn par_fmap(&self, f: F) -> Vec + where + F: Fn(f64) -> f64 + Sync + Send, + { + let mut v = self.clone(); + v.par_iter_mut().for_each(|x| *x = f(*x)); + v + } + + /// reduce for `Vec` + /// + /// # Examples + /// ``` + /// #[macro_use] + /// extern crate peroxide; + /// use peroxide::fuga::*; + /// use peroxide::traits::fp::ParallelFPVector; + /// + /// fn main() { + /// let a = seq!(1,100,1); + /// assert_eq!(a.par_reduce(0, |x,y| x + y), 5050f64); + /// } + /// ``` + fn par_reduce(&self, init: T, f: F) -> f64 where F: Fn(f64, f64) -> f64 + Send + Sync, T: Into + Send + Sync + Copy, { self.iter().fold(init.into(), |x, &y| f(x, y)) + // Parallel version unimplemented + // self.par_iter() // .cloned() // .fold(|| init.into(), |x, y| f(x, y)) @@ -336,14 +472,10 @@ impl FPVector for Vec { // // https://docs.rs/rayon/latest/rayon/iter/trait.ParallelIterator.html#method.reduce } - fn zip_with(&self, f: F, other: &Vec) -> Vec + fn par_zip_with(&self, f: F, other: &Vec) -> Vec where F: Fn(f64, f64) -> f64 + Send + Sync, { - // self.iter() - // .zip(other) - // .map(|(x, y)| f(*x, *y)) - // .collect::>() self.par_iter() .zip(other) .map(|(x, y)| f(*x, *y)) @@ -357,45 +489,40 @@ impl FPVector for Vec { /// #[macro_use] /// extern crate peroxide; /// use peroxide::fuga::*; + /// use peroxide::traits::fp::ParallelFPVector; /// /// fn main() { /// let a = c!(1,2,3,4,5); - /// let b = a.filter(|x| x > 3.); + /// let b = a.par_filter(|x| x > 3.); /// assert_eq!(b, c!(4,5)); /// } /// ``` - fn filter(&self, f: F) -> Vec + fn par_filter(&self, f: F) -> Vec where F: Fn(f64) -> bool + Send + Sync, { - // self.clone() - // .into_iter() - // .filter(|x| f(*x)) - // .collect::>() self.clone() .into_par_iter() .filter(|x| f(*x)) .collect::>() } - /// Take for `Vec` + /// Take for `Vec` in Parallel /// /// # Examples /// ``` /// #[macro_use] /// extern crate peroxide; /// use peroxide::fuga::*; + /// use peroxide::traits::fp::ParallelFPVector; /// /// fn main() { /// let a = c!(1,2,3,4,5); - /// let b = a.take(3); + /// let b = a.par_take(3); /// assert_eq!(b, c!(1,2,3)); /// } /// ``` - fn take(&self, n: usize) -> Vec { - // let mut v = vec![0f64; n]; - // v[..n].copy_from_slice(&self[..n]); - // v + fn par_take(&self, n: usize) -> Vec { let mut v = vec![0_f64; n]; v[..n] .par_iter_mut() @@ -406,27 +533,22 @@ impl FPVector for Vec { v } - /// Skip for `Vec` + /// Skip for `Vec` in Parallel /// /// # Examples /// ``` /// #[macro_use] /// extern crate peroxide; /// use peroxide::fuga::*; + /// use peroxide::traits::fp::ParallelFPVector; /// /// fn main() { /// let a = c!(1,2,3,4,5); - /// let b = a.skip(3); + /// let b = a.par_skip(3); /// assert_eq!(b, c!(4,5)); /// } /// ``` - fn skip(&self, n: usize) -> Vec { - // let l = self.len(); - // let mut v = vec![0f64; l - n]; - // for (i, j) in (n..l).enumerate() { - // v[i] = self[j]; - // } - // v + fn par_skip(&self, n: usize) -> Vec { let l = self.len(); let mut v = vec![0f64; l - n]; v[..(l - n)] @@ -436,29 +558,35 @@ impl FPVector for Vec { v } - fn sum(&self) -> f64 { - // self.iter().sum() + fn par_sum(&self) -> f64 { self.par_iter().sum() } - fn prod(&self) -> f64 { - // self.iter().product() + fn par_prod(&self) -> f64 { self.par_iter().product() } } /// Explicit version of `map` pub fn map(f: F, xs: &[T]) -> Vec +where + F: Fn(T) -> T, + T: Default + Copy, +{ + let l = xs.len(); + let mut result = vec![T::default(); l]; + for i in 0..l { + result[i] = f(xs[i]); + } + result +} + +/// Explicit version of `map` in parallel +pub fn par_map(f: F, xs: &[T]) -> Vec where F: Fn(T) -> T + Send + Sync, T: Default + Copy + Send + Sync, { - // let l = xs.len(); - // let mut result = vec![T::default(); l]; - // for i in 0..l { - // result[i] = f(xs[i]); - // } - // result let mut result = vec![T::default(); xs.len()]; result.par_iter_mut().for_each(|x| *x = f(*x)); result @@ -480,16 +608,24 @@ where /// Explicit version of `zip_with` pub fn zip_with(f: F, xs: &[T], ys: &[T]) -> Vec +where + F: Fn(T, T) -> T, + T: Default + Copy, +{ + let l = min(xs.len(), ys.len()); + let mut result = vec![T::default(); l]; + for i in 0..l { + result[i] = f(xs[i], ys[i]); + } + result +} + +/// Explicit version of `zip_with` in parallel +pub fn par_zip_with(f: F, xs: &[T], ys: &[T]) -> Vec where F: Fn(T, T) -> T + Send + Sync, T: Default + Copy + Send + Sync, { - // let l = min(xs.len(), ys.len()); - // let mut result = vec![T::default(); l]; - // for i in 0..l { - // result[i] = f(xs[i], ys[i]); - // } - // result xs.par_iter() .zip(ys.into_par_iter()) .map(|(x, y)| f(*x, *y)) @@ -500,22 +636,38 @@ impl MutFP for Vec { type Scalar = f64; fn mut_map(&mut self, f: F) + where + F: Fn(Self::Scalar) -> Self::Scalar, + { + for x in self.iter_mut() { + *x = f(*x); + } + } + + fn mut_zip_with(&mut self, f: F, other: &Self) + where + F: Fn(Self::Scalar, Self::Scalar) -> Self::Scalar, + { + for i in 0..self.len() { + self[i] = f(self[i], other[i]); + } + } +} + +impl ParallelMutFP for Vec { + type Scalar = f64; + + fn par_mut_map(&mut self, f: F) where F: Fn(Self::Scalar) -> Self::Scalar + Send + Sync, { - // for x in self.iter_mut() { - // *x = f(*x); - // } self.par_iter_mut().for_each(|x| *x = f(*x)); } - fn mut_zip_with(&mut self, f: F, other: &Self) + fn par_mut_zip_with(&mut self, f: F, other: &Self) where F: Fn(Self::Scalar, Self::Scalar) -> Self::Scalar + Send + Sync, { - // for i in 0..self.len() { - // self[i] = f(self[i], other[i]); - // } self.par_iter_mut() .zip(other.into_par_iter()) .for_each(|(x, y)| *x = f(*x, *y)); @@ -540,35 +692,15 @@ impl Algorithm for Vec { let l = self.len(); let idx = (1..(l + 1)).collect::>(); - // let mut vec_tup = self - // .clone() - // .into_iter() - // .zip(idx.clone()) - // .collect::>(); - // vec_tup.sort_by(|x, y| x.0.partial_cmp(&y.0).unwrap().reverse()); - // let indices = vec_tup.into_iter().map(|(_, y)| y).collect::>(); - // idx.into_iter() - // .map(|x| indices.clone().into_iter().position(|t| t == x).unwrap()) - // .collect::>() - let mut vec_tup = self .clone() - .into_par_iter() + .into_iter() .zip(idx.clone()) .collect::>(); vec_tup.sort_by(|x, y| x.0.partial_cmp(&y.0).unwrap().reverse()); - let indices = vec_tup - .into_par_iter() - .map(|(_, y)| y) - .collect::>(); - idx.into_par_iter() - .map(|x| { - indices - .clone() - .into_par_iter() - .position_any(|t| t == x) // Note: position() is deprecated by rayon - .unwrap() - }) + let indices = vec_tup.into_iter().map(|(_, y)| y).collect::>(); + idx.into_iter() + .map(|x| indices.clone().into_iter().position(|t| t == x).unwrap()) .collect::>() } @@ -634,17 +766,145 @@ impl Algorithm for Vec { } #[cfg(not(feature = "O3"))] { - // self.into_iter() - // .enumerate() - // .fold((0usize, f64::MIN), |acc, (ics, &val)| { - // if acc.1 < val { - // (ics, val) - // } else { - // acc - // } - // }) - // .0 + self.into_iter() + .enumerate() + .fold((0usize, f64::MIN), |acc, (ics, &val)| { + if acc.1 < val { + (ics, val) + } else { + acc + } + }) + .0 + } + } + + /// arg min + /// + /// # Examples + /// ``` + /// #[macro_use] + /// extern crate peroxide; + /// use peroxide::fuga::*; + /// + /// fn main() { + /// let v = c!(1,3,2,4,3,7); + /// assert_eq!(v.arg_min(),0); + /// + /// let v2 = c!(4,3,2,5,1,6); + /// assert_eq!(v2.arg_min(),4); + /// } + fn arg_min(&self) -> usize { + self.iter() + .enumerate() + .fold((0usize, f64::MAX), |acc, (ics, &val)| { + if acc.1 > val { + (ics, val) + } else { + acc + } + }) + .0 + } + + fn max(&self) -> f64 { + #[cfg(feature = "O3")] + { + let n_i32 = self.len() as i32; + let idx: usize; + unsafe { + idx = idamax(n_i32, self, 1) - 1; + } + self[idx] + } + #[cfg(not(feature = "O3"))] + { + self.into_iter() + .fold(f64::MIN, |acc, &val| if acc < val { val } else { acc }) + } + } + + fn min(&self) -> f64 { + self.iter() + .fold(f64::MAX, |acc, &val| if acc > val { val } else { acc }) + } + fn swap_with_perm(&mut self, p: &Vec<(usize, usize)>) { + for (i, j) in p.iter() { + self.swap(*i, *j); + } + } +} + +impl ParallelAlgorithm for Vec { + /// Assign rank in parallel + /// + /// # Examples + /// ``` + /// #[macro_use] + /// extern crate peroxide; + /// use peroxide::fuga::*; + /// use peroxide::traits::general::ParallelAlgorithm; + /// + /// fn main() { + /// let v = c!(7, 5, 9, 2, 8); + /// assert_eq!(v.par_rank(), vec![2,3,0,4,1]); + /// } + /// ``` + fn par_rank(&self) -> Vec { + let l = self.len(); + let idx = (1..(l + 1)).collect::>(); + + let mut vec_tup = self + .clone() + .into_par_iter() + .zip(idx.clone()) + .collect::>(); + vec_tup.sort_by(|x, y| x.0.partial_cmp(&y.0).unwrap().reverse()); + let indices = vec_tup + .into_par_iter() + .map(|(_, y)| y) + .collect::>(); + idx.into_par_iter() + .map(|x| { + indices + .clone() + .into_par_iter() + .position_any(|t| t == x) // Note: position() is deprecated by rayon + .unwrap() + }) + .collect::>() + } + + /// arg max in Parallel + /// + /// # Examples + /// ``` + /// #[macro_use] + /// extern crate peroxide; + /// use peroxide::fuga::*; + /// use peroxide::traits::general::ParallelAlgorithm; + /// + /// fn main() { + /// let v = c!(1,3,2,4,3,7); + /// assert_eq!(v.par_arg_max(),5); + /// + /// let v2 = c!(1,3,2,5,6,6); + /// assert_eq!(v2.par_arg_max(),4); + /// } + /// ``` + fn par_arg_max(&self) -> usize { + #[cfg(feature = "O3")] + { + let n_i32 = self.len() as i32; + let idx: usize; + unsafe { + idx = idamax(n_i32, self, 1) - 1; + } + idx + } + #[cfg(not(feature = "O3"))] + { self.into_par_iter() .enumerate() .fold( @@ -658,7 +918,7 @@ impl Algorithm for Vec { }, ) .reduce( - // Note: need reduce and can nto simply do .max() because rayon::..max() requires Ord, which is stricter than PartialOrd + // Note: need reduce and can not simply do .max() because rayon::..max() requires Ord, which is stricter than PartialOrd // Hence combine the results from the parallel fold || (0usize, f64::MIN), // Identity element |acc1, acc2| { @@ -673,33 +933,23 @@ impl Algorithm for Vec { } } - /// arg min + /// arg min in Parallel /// /// # Examples /// ``` /// #[macro_use] /// extern crate peroxide; /// use peroxide::fuga::*; + /// use peroxide::traits::general::ParallelAlgorithm; /// /// fn main() { /// let v = c!(1,3,2,4,3,7); - /// assert_eq!(v.arg_min(),0); + /// assert_eq!(v.par_arg_min(),0); /// /// let v2 = c!(4,3,2,5,1,6); - /// assert_eq!(v2.arg_min(),4); + /// assert_eq!(v2.par_arg_min(),4); /// } - fn arg_min(&self) -> usize { - // self.iter() - // .enumerate() - // .fold((0usize, f64::MAX), |acc, (ics, &val)| { - // if acc.1 > val { - // (ics, val) - // } else { - // acc - // } - // }) - // .0 - + fn par_arg_min(&self) -> usize { self.into_par_iter() .enumerate() .fold( @@ -725,7 +975,7 @@ impl Algorithm for Vec { .0 } - fn max(&self) -> f64 { + fn par_max(&self) -> f64 { #[cfg(feature = "O3")] { let n_i32 = self.len() as i32; @@ -737,9 +987,6 @@ impl Algorithm for Vec { } #[cfg(not(feature = "O3"))] { - // self.into_iter() - // .fold(f64::MIN, |acc, &val| if acc < val { val } else { acc }) - self.into_par_iter() .fold(|| f64::MIN, |acc, &val| if acc < val { val } else { acc }) .reduce( @@ -749,10 +996,7 @@ impl Algorithm for Vec { } } - fn min(&self) -> f64 { - // self.iter() - // .fold(f64::MAX, |acc, &val| if acc > val { val } else { acc }) - + fn par_min(&self) -> f64 { self.into_par_iter() .fold(|| f64::MAX, |acc, &val| if acc > val { val } else { acc }) .reduce( @@ -760,12 +1004,6 @@ impl Algorithm for Vec { |acc1, acc2| if acc1 > acc2 { acc2 } else { acc1 }, ) } - - fn swap_with_perm(&mut self, p: &Vec<(usize, usize)>) { - for (i, j) in p.iter() { - self.swap(*i, *j); - } - } } impl Vector for Vec { @@ -802,8 +1040,49 @@ impl Normed for Vec { } #[cfg(not(feature = "O3"))] { - // self.iter().map(|x| x.powi(2)).sum::().sqrt() + self.iter().map(|x| x.powi(2)).sum::().sqrt() + } + } + Norm::Lp(p) => { + assert!( + p >= 1f64, + "lp norm is only defined for p>=1, the given value was p={}", + p + ); + self.iter().map(|x| x.powf(p)).sum::().powf(1f64 / p) + } + Norm::LInf => self.iter().fold(0f64, |x, y| x.max(y.abs())), + Norm::F => unimplemented!(), + Norm::Lpq(_, _) => unimplemented!(), + } + } + fn normalize(&self, kind: Norm) -> Self + where + Self: Sized, + { + let denom = self.norm(kind); + self.fmap(|x| x / denom) + } +} + +impl ParallelNormed for Vec { + type UnsignedScalar = f64; + fn par_norm(&self, kind: Norm) -> f64 { + match kind { + Norm::L1 => self.iter().map(|x| x.abs()).sum(), + Norm::L2 => { + #[cfg(feature = "O3")] + { + let n_i32 = self.len() as i32; + let res: f64; + unsafe { + res = dnrm2(n_i32, self, 1); + } + res + } + #[cfg(not(feature = "O3"))] + { self.par_iter() .map(|x| x.powi(2)) .sum::() @@ -816,14 +1095,11 @@ impl Normed for Vec { "lp norm is only defined for p>=1, the given value was p={}", p ); - // self.iter().map(|x| x.powf(p)).sum::().powf(1f64 / p) - self.par_iter() .map(|x| x.powf(p)) .sum::() .powf(1_f64 / p) } - // Norm::LInf => self.iter().fold(0f64, |x, y| x.max(y.abs())), Norm::LInf => self .par_iter() .fold(|| 0f64, |x, y| x.max(y.abs())) @@ -832,13 +1108,6 @@ impl Normed for Vec { Norm::Lpq(_, _) => unimplemented!(), } } - fn normalize(&self, kind: Norm) -> Self - where - Self: Sized, - { - let denom = self.norm(kind); - self.fmap(|x| x / denom) - } } impl InnerProduct for Vec { @@ -854,10 +1123,26 @@ impl InnerProduct for Vec { } #[cfg(not(feature = "O3"))] { - // self.iter() - // .zip(rhs.iter()) - // .fold(0f64, |x, (y1, y2)| x + y1 * y2) + self.iter() + .zip(rhs.iter()) + .fold(0f64, |x, (y1, y2)| x + y1 * y2) + } + } +} +impl ParallelInnerProduct for Vec { + fn par_dot(&self, rhs: &Self) -> f64 { + #[cfg(feature = "O3")] + { + let n_i32 = self.len() as i32; + let res: f64; + unsafe { + res = ddot(n_i32, self, 1, rhs, 1); + } + res + } + #[cfg(not(feature = "O3"))] + { self.par_iter() .zip(rhs.into_par_iter()) .fold(|| 0_f64, |x, (y1, y2)| x + y1 * y2) @@ -889,12 +1174,36 @@ impl VectorProduct for Vec { v } 3 => { - // let mut v = vec![0f64; 3]; - // v[0] = self[1] * other[2] - self[2] * other[1]; - // v[1] = self[2] * other[0] - self[0] * other[2]; - // v[2] = self[0] * other[1] - self[1] * other[0]; - // v + let mut v = vec![0f64; 3]; + v[0] = self[1] * other[2] - self[2] * other[1]; + v[1] = self[2] * other[0] - self[0] * other[2]; + v[2] = self[0] * other[1] - self[1] * other[0]; + v + } + _ => { + panic!("Cross product can be defined only in 2 or 3 dimension") + } + } + } + + fn outer(&self, other: &Self) -> Matrix { + let m: Matrix = self.into(); + let n = matrix(other.to_owned(), 1, other.len(), Row); + m * n + } +} +impl ParallelVectorProduct for Vec { + fn par_cross(&self, other: &Self) -> Self { + assert_eq!(self.len(), other.len()); + // 2D cross product is ill-defined + match self.len() { + 2 => { + let mut v = vec![0f64; 1]; + v[0] = self[0] * other[1] - self[1] * other[0]; + v + } + 3 => { let v = (0..3) .into_par_iter() .map(|index| { @@ -909,12 +1218,6 @@ impl VectorProduct for Vec { } } } - - fn outer(&self, other: &Self) -> Matrix { - let m: Matrix = self.into(); - let n = matrix(other.to_owned(), 1, other.len(), Row); - m * n - } } // /// Convenient Vec Operations (No Clone, No Copy) diff --git a/src/traits/fp.rs b/src/traits/fp.rs index 1106f452..fbb66767 100644 --- a/src/traits/fp.rs +++ b/src/traits/fp.rs @@ -6,17 +6,17 @@ pub trait FPVector { fn fmap(&self, f: F) -> Self where - F: Fn(Self::Scalar) -> Self::Scalar + Send + Sync; + F: Fn(Self::Scalar) -> Self::Scalar; fn reduce(&self, init: T, f: F) -> Self::Scalar where - F: Fn(Self::Scalar, Self::Scalar) -> Self::Scalar + Send + Sync, - T: Into + Send + Sync + Copy; + F: Fn(Self::Scalar, Self::Scalar) -> Self::Scalar, + T: Into + Copy; fn zip_with(&self, f: F, other: &Self) -> Self where - F: Fn(Self::Scalar, Self::Scalar) -> Self::Scalar + Send + Sync; + F: Fn(Self::Scalar, Self::Scalar) -> Self::Scalar; fn filter(&self, f: F) -> Self where - F: Fn(Self::Scalar) -> bool + Send + Sync; + F: Fn(Self::Scalar) -> bool; fn take(&self, n: usize) -> Self; fn skip(&self, n: usize) -> Self; fn sum(&self) -> Self::Scalar; @@ -31,7 +31,7 @@ pub trait FPMatrix { fn skip_col(&self, n: usize) -> Matrix; fn fmap(&self, f: F) -> Matrix where - F: Fn(f64) -> f64 + Send + Sync; + F: Fn(f64) -> f64; fn col_map(&self, f: F) -> Matrix where F: Fn(Vec) -> Vec; @@ -45,16 +45,72 @@ pub trait FPMatrix { where F: Fn(Vec) -> Vec; fn reduce(&self, init: T, f: F) -> f64 + where + F: Fn(f64, f64) -> f64, + T: Into + Copy + Clone; + fn zip_with(&self, f: F, other: &Matrix) -> Matrix + where + F: Fn(f64, f64) -> f64; + fn col_reduce(&self, f: F) -> Vec + where + F: Fn(Vec) -> f64; + fn row_reduce(&self, f: F) -> Vec + where + F: Fn(Vec) -> f64; +} + +/// Functional Programming tools for Vector in Parallel (Uses Rayon crate) +pub trait ParallelFPVector { + type Scalar; + + fn par_fmap(&self, f: F) -> Self + where + F: Fn(Self::Scalar) -> Self::Scalar + Send + Sync; + fn par_reduce(&self, init: T, f: F) -> Self::Scalar + where + F: Fn(Self::Scalar, Self::Scalar) -> Self::Scalar + Send + Sync, + T: Into + Send + Sync + Copy; + fn par_zip_with(&self, f: F, other: &Self) -> Self + where + F: Fn(Self::Scalar, Self::Scalar) -> Self::Scalar + Send + Sync; + fn par_filter(&self, f: F) -> Self + where + F: Fn(Self::Scalar) -> bool + Send + Sync; + + fn par_take(&self, n: usize) -> Self; + fn par_skip(&self, n: usize) -> Self; + fn par_sum(&self) -> Self::Scalar; + fn par_prod(&self) -> Self::Scalar; +} + +/// Functional Programming for Matrix in Parallel (Uses Rayon crate) +pub trait ParallelFPMatrix { + fn par_fmap(&self, f: F) -> Matrix + where + F: Fn(f64) -> f64 + Send + Sync; + fn par_reduce(&self, init: T, f: F) -> f64 where F: Fn(f64, f64) -> f64 + Send + Sync, T: Into + Copy + Clone + Send + Sync; - fn zip_with(&self, f: F, other: &Matrix) -> Matrix + fn par_zip_with(&self, f: F, other: &Matrix) -> Matrix where F: Fn(f64, f64) -> f64 + Send + Sync; - fn col_reduce(&self, f: F) -> Vec + fn par_col_reduce(&self, f: F) -> Vec where F: Fn(Vec) -> f64 + Send + Sync; - fn row_reduce(&self, f: F) -> Vec + fn par_row_reduce(&self, f: F) -> Vec where F: Fn(Vec) -> f64 + Send + Sync; + fn par_take_row(&self, n: usize) -> Matrix; + fn par_take_col(&self, n: usize) -> Matrix; + fn par_skip_row(&self, n: usize) -> Matrix; + fn par_skip_col(&self, n: usize) -> Matrix; + + // These are not in parallel: + // fn col_map(&self, f: F) -> Matrix + // where + // F: Fn(Vec) -> Vec; + // fn row_map(&self, f: F) -> Matrix + // where + // F: Fn(Vec) -> Vec; } diff --git a/src/traits/general.rs b/src/traits/general.rs index 2737f97b..7ec7fed1 100644 --- a/src/traits/general.rs +++ b/src/traits/general.rs @@ -8,3 +8,16 @@ pub trait Algorithm { fn min(&self) -> f64; fn swap_with_perm(&mut self, p: &Vec<(usize, usize)>); } + +/// Some algorithms for Vector in Parallel +pub trait ParallelAlgorithm { + fn par_rank(&self) -> Vec; + fn par_arg_max(&self) -> usize; + fn par_arg_min(&self) -> usize; + fn par_max(&self) -> f64; + fn par_min(&self) -> f64; + + // Not implemented in parallel: + // fn par_sign(&self) -> f64; + // fn par_swap_with_perm(&mut self, p: &Vec<(usize, usize)>); +} diff --git a/src/traits/math.rs b/src/traits/math.rs index 228aad9e..9531ba05 100644 --- a/src/traits/math.rs +++ b/src/traits/math.rs @@ -97,3 +97,35 @@ impl Normed for f64 { self / self.abs() } } + +// ============================================================================= +// Implementation for parallel traits +// ============================================================================= + +/// Mathematical Vector in Parallel +pub trait ParallelVector { + type Scalar; + fn par_add_vec(&self, rhs: &Self) -> Self; + fn par_sub_vec(&self, rhs: &Self) -> Self; +} + +/// Normed Vector in Parallel +pub trait ParallelNormed: Vector { + type UnsignedScalar; + fn par_norm(&self, kind: Norm) -> Self::UnsignedScalar; +} + +/// Inner product Vector in Parallel +pub trait ParallelInnerProduct: ParallelNormed { + fn par_dot(&self, rhs: &Self) -> Self::Scalar; +} + +/// Matrix Products in Parallel +pub trait ParallelMatrixProduct { + fn par_hadamard(&self, other: &Self) -> Matrix; +} + +/// Vector Products in Parallel +pub trait ParallelVectorProduct: Vector { + fn par_cross(&self, other: &Self) -> Self; +} diff --git a/src/traits/mod.rs b/src/traits/mod.rs index 92b224a6..2709f2db 100644 --- a/src/traits/mod.rs +++ b/src/traits/mod.rs @@ -1,3 +1,4 @@ +pub mod float; pub mod fp; pub mod general; pub mod math; @@ -6,4 +7,3 @@ pub mod num; pub mod pointer; pub mod stable; pub mod sugar; -pub mod float; diff --git a/src/traits/mutable.rs b/src/traits/mutable.rs index a8ef80a9..fccb56ce 100644 --- a/src/traits/mutable.rs +++ b/src/traits/mutable.rs @@ -4,10 +4,10 @@ pub trait MutFP { type Scalar; fn mut_map(&mut self, f: F) where - F: Fn(Self::Scalar) -> Self::Scalar + Send + Sync; + F: Fn(Self::Scalar) -> Self::Scalar; fn mut_zip_with(&mut self, f: F, other: &Self) where - F: Fn(Self::Scalar, Self::Scalar) -> Self::Scalar + Send + Sync; + F: Fn(Self::Scalar, Self::Scalar) -> Self::Scalar; } pub trait MutMatrix { @@ -16,3 +16,14 @@ pub trait MutMatrix { unsafe fn swap(&mut self, idx1: usize, idx2: usize, shape: Shape); unsafe fn swap_with_perm(&mut self, p: &Vec<(usize, usize)>, shape: Shape); } + +// Mutable trait for Vector in Parallel (Uses Rayon crate) +pub trait ParallelMutFP { + type Scalar; + fn par_mut_map(&mut self, f: F) + where + F: Fn(Self::Scalar) -> Self::Scalar + Send + Sync; + fn par_mut_zip_with(&mut self, f: F, other: &Self) + where + F: Fn(Self::Scalar, Self::Scalar) -> Self::Scalar + Send + Sync; +} diff --git a/src/traits/sugar.rs b/src/traits/sugar.rs index 7bb9b989..e1ce7f1e 100644 --- a/src/traits/sugar.rs +++ b/src/traits/sugar.rs @@ -11,9 +11,7 @@ where + Add + Sub + Mul - + Div - + Send - + Sync, + + Div, { //type Scalar; fn add_v(&self, v: &Self) -> Self { From 646d2d00879a9b5839a3b54425a16a9c8588d8f3 Mon Sep 17 00:00:00 2001 From: Soumya Date: Tue, 1 Oct 2024 00:33:31 +0200 Subject: [PATCH 09/17] Added further parallel procedures - Using Rayon to parallelize further traits for matrix and vector --- src/structure/matrix.rs | 184 ++++++++++++++++++++++++++++------------ src/traits/math.rs | 1 + 2 files changed, 130 insertions(+), 55 deletions(-) diff --git a/src/structure/matrix.rs b/src/structure/matrix.rs index af3d98e8..d671156e 100644 --- a/src/structure/matrix.rs +++ b/src/structure/matrix.rs @@ -615,7 +615,9 @@ use serde::{Deserialize, Serialize}; use crate::rayon::prelude::*; use crate::traits::fp::ParallelFPMatrix; -use crate::traits::math::{ParallelMatrixProduct, ParallelNormed, ParallelVector}; +use crate::traits::math::{ + ParallelInnerProduct, ParallelMatrixProduct, ParallelNormed, ParallelVector, +}; pub use self::Shape::{Col, Row}; use crate::numerical::eigen::{eigen, EigenMethod}; @@ -881,7 +883,7 @@ pub fn par_ml_matrix(s: &str) -> Matrix where { let str_rows: Vec<&str> = s.split(';').collect(); let r = str_rows.len(); let str_data = str_rows - .into_iter() + .into_par_iter() .map(|x| x.trim().split(' ').collect::>()) .collect::>>(); let c = str_data[0].len(); @@ -1254,10 +1256,10 @@ impl Matrix { /// /// fn main() { /// let a = matrix(c!(1,2,3,4), 2, 2, Row); - /// assert_eq!(a.pub_col(0), c!(1,3)); + /// assert_eq!(a.par_col(0), c!(1,3)); /// } /// ``` - pub fn pub_col(&self, index: usize) -> Vec { + pub fn par_col(&self, index: usize) -> Vec { assert!(index < self.col); let container = (0..self.row) @@ -1299,10 +1301,10 @@ impl Matrix { /// /// fn main() { /// let a = matrix(c!(1,2,3,4), 2, 2, Row); - /// assert_eq!(a.pub_row(0), c!(1,2)); + /// assert_eq!(a.par_row(0), c!(1,2)); /// } /// ``` - pub fn pub_row(&self, index: usize) -> Vec { + pub fn par_row(&self, index: usize) -> Vec { assert!(index < self.row); let container = (0..self.col) @@ -1374,6 +1376,23 @@ impl Matrix { } } + /// Transpose in Parallel + /// + /// # Examples + /// ``` + /// use peroxide::fuga::*; + /// + /// let a = par_matrix(vec![1,2,3,4], 2, 2, Row); + /// println!("{}", a.par_transpose()); // [[1,3],[2,4]] + /// assert_eq!(a.par_transpose(), matrix(vec![1,3,2,4], 2, 2, Row)) + /// ``` + pub fn par_transpose(&self) -> Self { + match self.shape { + Row => par_matrix(self.data.clone(), self.col, self.row, Col), + Col => par_matrix(self.data.clone(), self.col, self.row, Row), + } + } + /// R-like transpose function /// /// # Examples @@ -1391,6 +1410,23 @@ impl Matrix { self.transpose() } + /// R-like transpose function in parallel + /// + /// # Examples + /// ``` + /// #[macro_use] + /// extern crate peroxide; + /// use peroxide::fuga::*; + /// + /// fn main() { + /// let a = matrix!(1;4;1, 2, 2, Row); + /// assert_eq!(a.transpose(), a.par_t()); + /// } + /// ``` + pub fn par_t(&self) -> Self { + self.par_transpose() + } + /// Write to CSV /// /// # Examples @@ -1603,7 +1639,7 @@ impl Matrix { mat } - /// From index operations + /// From index operations in parallel pub fn par_from_index(f: F, size: (usize, usize)) -> Matrix where F: Fn(usize, usize) -> G + Copy + Send + Sync, @@ -1948,7 +1984,7 @@ impl ParallelVector for Matrix { matrix(y, self.row, self.col, self.shape) } _ => { - let mut result = matrix(self.data.clone(), self.row, self.col, self.shape); + let mut result = par_matrix(self.data.clone(), self.row, self.col, self.shape); result .data .par_iter_mut() @@ -1984,7 +2020,7 @@ impl ParallelVector for Matrix { matrix(y, self.row, self.col, self.shape) } _ => { - let mut result = matrix(self.data.clone(), self.row, self.col, self.shape); + let mut result = par_matrix(self.data.clone(), self.row, self.col, self.shape); result .data .par_iter_mut() @@ -1999,10 +2035,32 @@ impl ParallelVector for Matrix { } } } + + fn par_mul_scalar(&self, other: Self::Scalar) -> Self { + match () { + #[cfg(feature = "O3")] + () => { + let x = &self.data; + let mut y = vec![0f64; x.len()]; + let a_f64 = other; + let n_i32 = x.len() as i32; + + unsafe { + daxpy(n_i32, a_f64, x, 1, &mut y, 1); + } + matrix(y, self.row, self.col, self.shape) + } + _ => { + let scalar = other; + self.par_fmap(|x| x * scalar) + } + } + } } impl Normed for Matrix { type UnsignedScalar = f64; + fn norm(&self, kind: Norm) -> f64 { match kind { Norm::F => { @@ -2147,6 +2205,17 @@ impl InnerProduct for Matrix { } } +/// Frobenius inner product in parallel +impl ParallelInnerProduct for Matrix { + fn par_dot(&self, rhs: &Self) -> f64 { + if self.shape == rhs.shape { + self.data.par_dot(&rhs.data) + } else { + self.data.par_dot(&rhs.change_shape().data) + } + } +} + /// TODO: Transpose /// Matrix as Linear operator for Vector @@ -2302,25 +2371,27 @@ impl Add for Matrix { matrix(y, self.row, self.col, self.shape) } _ => { - // let mut result = matrix(self.data.clone(), self.row, self.col, self.shape); - // for i in 0..self.row { - // for j in 0..self.col { - // result[(i, j)] += other[(i, j)]; - // } - // } - // result - let mut result = par_matrix(self.data.clone(), self.row, self.col, self.shape); + let mut result = matrix(self.data.clone(), self.row, self.col, self.shape); + for i in 0..self.row { + for j in 0..self.col { + result[(i, j)] += other[(i, j)]; + } + } result - .data - .par_iter_mut() - .enumerate() - .for_each(|(idx, value)| { - let i = idx / self.col; - let j = idx % self.col; - *value += other[(i, j)]; - }); - result + // Note: Parallel version - how to add the implementation below? + // let mut result = par_matrix(self.data.clone(), self.row, self.col, self.shape); + // result + // .data + // .par_iter_mut() + // .enumerate() + // .for_each(|(idx, value)| { + // let i = idx / self.col; + // let j = idx % self.col; + + // *value += other[(i, j)]; + // }); + // result } } } @@ -2511,11 +2582,12 @@ impl Neg for Matrix { matrix(y, self.row, self.col, self.shape) } _ => matrix( - // self.data.into_iter().map(|x: f64| -x).collect::>(), - self.data - .into_par_iter() - .map(|x: f64| -x) - .collect::>(), + self.data.into_iter().map(|x: f64| -x).collect::>(), + // Note: how to add parallel implementation below? + // self.data + // .into_par_iter() + // .map(|x: f64| -x) + // .collect::>(), self.row, self.col, self.shape, @@ -2541,16 +2613,17 @@ impl<'a> Neg for &'a Matrix { matrix(y, self.row, self.col, self.shape) } _ => matrix( - // self.data - // .clone() - // .into_iter() - // .map(|x: f64| -x) - // .collect::>(), self.data .clone() - .into_par_iter() + .into_iter() .map(|x: f64| -x) .collect::>(), + // Note: how to add parallel implementation below? + // self.data + // .clone() + // .into_par_iter() + // .map(|x: f64| -x) + // .collect::>(), self.row, self.col, self.shape, @@ -2595,25 +2668,26 @@ impl Sub for Matrix { matrix(y, self.row, self.col, self.shape) } _ => { - // let mut result = matrix(self.data.clone(), self.row, self.col, self.shape); - // for i in 0..self.row { - // for j in 0..self.col { - // result[(i, j)] -= other[(i, j)]; - // } - // } - // result - let mut result = par_matrix(self.data.clone(), self.row, self.col, self.shape); - result - .data - .par_iter_mut() - .enumerate() - .for_each(|(idx, value)| { - let i = idx / self.col; - let j = idx % self.col; - - *value -= other[(i, j)]; - }); + let mut result = matrix(self.data.clone(), self.row, self.col, self.shape); + for i in 0..self.row { + for j in 0..self.col { + result[(i, j)] -= other[(i, j)]; + } + } result + // Note: how to add parallel implementation below? + // let mut result = par_matrix(self.data.clone(), self.row, self.col, self.shape); + // result + // .data + // .par_iter_mut() + // .enumerate() + // .for_each(|(idx, value)| { + // let i = idx / self.col; + // let j = idx % self.col; + + // *value -= other[(i, j)]; + // }); + // result } } } diff --git a/src/traits/math.rs b/src/traits/math.rs index 9531ba05..2100fd1b 100644 --- a/src/traits/math.rs +++ b/src/traits/math.rs @@ -107,6 +107,7 @@ pub trait ParallelVector { type Scalar; fn par_add_vec(&self, rhs: &Self) -> Self; fn par_sub_vec(&self, rhs: &Self) -> Self; + fn par_mul_scalar(&self, rhs: Self::Scalar) -> Self; } /// Normed Vector in Parallel From cdb8fdb3c2a1c8282cae5672877cb8c3bd568cdc Mon Sep 17 00:00:00 2001 From: Soumya Date: Sat, 5 Oct 2024 17:20:29 +0200 Subject: [PATCH 10/17] Added benchmarking - Benchmarking Rayon usage for matrix --- Cargo.toml | 8 +++- benches/parallel_rayon/matrix_benchmark.rs | 47 ++++++++++++++++++++++ 2 files changed, 54 insertions(+), 1 deletion(-) create mode 100644 benches/parallel_rayon/matrix_benchmark.rs diff --git a/Cargo.toml b/Cargo.toml index 18f5455b..39c51a7a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,7 @@ keywords = ["Numeric", "Science", "Dataframe", "Plot", "LinearAlgebra"] exclude = [ "example_data/", "src/bin/", - "benches/", + # "benches/", "example/", "test_data/", "peroxide-ad2", @@ -25,6 +25,7 @@ maintenance = { status = "actively-developed" } [dev-dependencies] float-cmp = "0.9" +criterion = { version = "0.5.1", features = ["html_reports"] } [dependencies] csv = { version = "1.3", optional = true, default-features = false } @@ -68,3 +69,8 @@ plot = ["pyo3"] nc = ["netcdf"] parquet = ["arrow2"] complex = ["num-complex", "matrixmultiply/cgemm"] + +[[bench]] +path = "benches/parallel_rayon/matrix_benchmark.rs" +name = "matrix_benchmark" +harness = false diff --git a/benches/parallel_rayon/matrix_benchmark.rs b/benches/parallel_rayon/matrix_benchmark.rs new file mode 100644 index 00000000..16148e92 --- /dev/null +++ b/benches/parallel_rayon/matrix_benchmark.rs @@ -0,0 +1,47 @@ +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use peroxide::fuga::*; + +pub fn par_matrix_benchmark(cr: &mut Criterion) { + let v: Vec = (0..1000000) + .into_iter() + .map(|i: i32| 2.0 * (i as f64)) + .collect::>(); + + // Result: 1000x1000 matrix: 630.92 µs + cr.bench_function("ser_matrix_bench", |b| { + b.iter(|| black_box(matrix(v.clone(), 1000, 1000, Shape::Row))) + }); + + // Result: 1000x1000 matrix: 9.6995 ms + cr.bench_function("par_matrix_bench", |b| { + b.iter(|| black_box(par_matrix(v.clone(), 1000, 1000, Shape::Row))) + }); +} + +pub fn par_py_matrix_benchmark(cr: &mut Criterion) { + let v: Vec> = (0..1000) + .into_iter() + .map(|i: i32| { + (0..1000) + .into_iter() + .map(|j| 2.0 * (i as f64) * (j as f64)) + .collect::>() + }) + .collect::>>(); + + // Result: 1000x1000 matrix: 3.9858 ms + cr.bench_function("ser_py_matrix_bench", |b| { + b.iter(|| black_box(py_matrix(v.clone()))) + }); + + // Result: 1000x1000 matrix: 24.654 ms + cr.bench_function("par_py_matrix_bench", |b| { + b.iter(|| black_box(par_py_matrix(v.clone()))) + }); +} + +criterion_group!( + benches, + /*par_matrix_benchmark,*/ par_py_matrix_benchmark +); +criterion_main!(benches); From 8a536be55bc64aefa9515d25b4c0d7b1df2c8a5a Mon Sep 17 00:00:00 2001 From: Soumya Date: Sat, 5 Oct 2024 17:53:32 +0200 Subject: [PATCH 11/17] Adding benchmarks for matrix - Adding benchmark for rayon usage in matrix --- benches/parallel_rayon/matrix_benchmark.rs | 107 ++++++++++++++++++++- 1 file changed, 106 insertions(+), 1 deletion(-) diff --git a/benches/parallel_rayon/matrix_benchmark.rs b/benches/parallel_rayon/matrix_benchmark.rs index 16148e92..c859bf1a 100644 --- a/benches/parallel_rayon/matrix_benchmark.rs +++ b/benches/parallel_rayon/matrix_benchmark.rs @@ -40,8 +40,113 @@ pub fn par_py_matrix_benchmark(cr: &mut Criterion) { }); } +pub fn par_matrix_change_shape_benchmark(cr: &mut Criterion) { + let v: Vec = (0..1000000) + .into_iter() + .map(|i: i32| 2.0 * (i as f64)) + .collect::>(); + + // Result: 1000x1000 matrix: 7.6453 ms + cr.bench_function("ser_matrix_change_shape_bench", |b| { + b.iter(|| black_box(matrix(v.clone(), 1000, 1000, Shape::Row).change_shape())) + }); + + // Result: 1000x1000 matrix: 9.8586 ms + cr.bench_function("par_matrix_change_shape_bench", |b| { + b.iter(|| black_box(par_matrix(v.clone(), 1000, 1000, Shape::Row).par_change_shape())) + }); +} + +pub fn par_matrix_extract_row_benchmark(cr: &mut Criterion) { + let v: Vec = (0..1000000) + .into_iter() + .map(|i: i32| 2.0 * (i as f64)) + .collect::>(); + + // Result: 1000x1000 matrix: 571.90 µs + cr.bench_function("ser_matrix_extract_row_bench", |b| { + b.iter(|| black_box(matrix(v.clone(), 1000, 1000, Shape::Row).row(100))) + }); + + // Result: 1000x1000 matrix: 10.440 ms + cr.bench_function("par_matrix_extract_row_bench", |b| { + b.iter(|| black_box(par_matrix(v.clone(), 1000, 1000, Shape::Row).par_row(100))) + }); +} + +pub fn par_matrix_from_index_benchmark(cr: &mut Criterion) { + let f = |x: usize, y: usize| 2.0 * (x as f64) * (y as f64); + let size: (usize, usize) = (1000, 1000); + + // Result: 1000x1000 matrix: 2.3662 ms + cr.bench_function("ser_matrix_from_index_bench", |b| { + b.iter(|| black_box(Matrix::from_index(f, size))) + }); + + // Result: 1000x1000 matrix: 2.3355 ms + cr.bench_function("par_matrix_from_index_bench", |b| { + b.iter(|| black_box(Matrix::from_index(f, size))) + }); +} + +pub fn par_matrix_to_vec_and_diag_benchmark(cr: &mut Criterion) { + let v: Vec = (0..1000000) + .into_iter() + .map(|i: i32| 2.0 * (i as f64)) + .collect::>(); + + // Result: 1000x1000 matrix: 5.0333 ms + cr.bench_function("ser_matrix_to_vec_bench", |b| { + b.iter(|| black_box(matrix(v.clone(), 1000, 1000, Shape::Row).to_vec())) + }); + + // Result: 1000x1000 matrix: 10.625 ms + cr.bench_function("par_matrix_to_vec_bench", |b| { + b.iter(|| black_box(par_matrix(v.clone(), 1000, 1000, Shape::Row).par_to_vec())) + }); + + // Result: 1000x1000 matrix: 3.0501 ms + cr.bench_function("ser_matrix_to_diag_bench", |b| { + b.iter(|| black_box(matrix(v.clone(), 1000, 1000, Shape::Row).to_diag())) + }); + + // Result: 1000x1000 matrix: 16.362 ms + cr.bench_function("par_matrix_to_diag_bench", |b| { + b.iter(|| black_box(par_matrix(v.clone(), 1000, 1000, Shape::Row).par_to_diag())) + }); +} + +pub fn par_matrix_submat_benchmark(cr: &mut Criterion) { + let v: Vec = (0..1000000) + .into_iter() + .map(|i: i32| 2.0 * (i as f64)) + .collect::>(); + + // Result: 1000x1000 matrix: 1.5658 ms + cr.bench_function("ser_matrix_submat_bench", |b| { + b.iter(|| { + black_box(matrix(v.clone(), 1000, 1000, Shape::Row).submat((100, 100), (900, 900))) + }) + }); + + // Result: 1000x1000 matrix: 13.000 ms + cr.bench_function("par_matrix_submat_bench", |b| { + b.iter(|| { + black_box( + par_matrix(v.clone(), 1000, 1000, Shape::Row).par_submat((100, 100), (900, 900)), + ) + }) + }); +} + criterion_group!( benches, - /*par_matrix_benchmark,*/ par_py_matrix_benchmark + /*par_matrix_benchmark,*/ + /*par_py_matrix_benchmark,*/ + /*par_matrix_change_shape_benchmark,*/ + /*par_matrix_extract_row_benchmark,*/ + /*par_matrix_from_index_benchmark,*/ + /*par_matrix_to_vec_and_diag_benchmark,*/ + par_matrix_submat_benchmark, ); criterion_main!(benches); From c104652c67f1537a9090a425926039afa7765b53 Mon Sep 17 00:00:00 2001 From: Soumya Date: Sat, 5 Oct 2024 18:41:47 +0200 Subject: [PATCH 12/17] Added further benchmarks for matrix - Added benchmark for Rayon usage in matrix - Saved results in benches/data --- .../data/rayon_matrix_benchmark_results.md | 194 ++++++++++++++ benches/parallel_rayon/matrix_benchmark.rs | 249 +++++++++++++++++- 2 files changed, 436 insertions(+), 7 deletions(-) create mode 100644 benches/data/rayon_matrix_benchmark_results.md diff --git a/benches/data/rayon_matrix_benchmark_results.md b/benches/data/rayon_matrix_benchmark_results.md new file mode 100644 index 00000000..f85feae8 --- /dev/null +++ b/benches/data/rayon_matrix_benchmark_results.md @@ -0,0 +1,194 @@ +Lib used for benchmarking: Criterion +Matrix size: 1000x1000 + +Running benches/parallel_rayon/matrix_benchmark.rs + +ser_matrix_bench time: [535.12 µs 544.51 µs 556.68 µs] +Found 11 outliers among 100 measurements (11.00%) + 4 (4.00%) high mild + 7 (7.00%) high severe + +par_matrix_bench time: [5.0912 ms 5.1431 ms 5.1995 ms] +Found 7 outliers among 100 measurements (7.00%) + 1 (1.00%) low mild + 5 (5.00%) high mild + 1 (1.00%) high severe + +ser_py_matrix_bench time: [4.3100 ms 4.3309 ms 4.3544 ms] +Found 7 outliers among 100 measurements (7.00%) + 2 (2.00%) high mild + 5 (5.00%) high severe + +par_py_matrix_bench time: [11.667 ms 11.789 ms 11.920 ms] +Found 10 outliers among 100 measurements (10.00%) + 6 (6.00%) high mild + 4 (4.00%) high severe + +ser_matrix_change_shape_bench + time: [7.3630 ms 7.4075 ms 7.4608 ms] +Found 5 outliers among 100 measurements (5.00%) + 1 (1.00%) high mild + 4 (4.00%) high severe + +par_matrix_change_shape_bench + time: [10.276 ms 10.385 ms 10.499 ms] +Found 3 outliers among 100 measurements (3.00%) + 2 (2.00%) high mild + 1 (1.00%) high severe + +ser_matrix_extract_row_bench + time: [613.39 µs 622.44 µs 633.72 µs] +Found 7 outliers among 100 measurements (7.00%) + 7 (7.00%) high severe + +par_matrix_extract_row_bench + time: [5.4321 ms 5.4851 ms 5.5399 ms] +Found 4 outliers among 100 measurements (4.00%) + 4 (4.00%) high mild + +ser_matrix_from_index_bench + time: [2.4174 ms 2.4490 ms 2.4851 ms] +Found 14 outliers among 100 measurements (14.00%) + 1 (1.00%) high mild + 13 (13.00%) high severe + +par_matrix_from_index_bench + time: [2.3912 ms 2.4090 ms 2.4304 ms] +Found 9 outliers among 100 measurements (9.00%) + 2 (2.00%) high mild + 7 (7.00%) high severe + +ser_matrix_to_vec_bench time: [2.4800 ms 2.5082 ms 2.5423 ms] +Found 10 outliers among 100 measurements (10.00%) + 4 (4.00%) high mild + 6 (6.00%) high severe + +par_matrix_to_vec_bench time: [6.4041 ms 6.4618 ms 6.5250 ms] +Found 6 outliers among 100 measurements (6.00%) + 5 (5.00%) high mild + 1 (1.00%) high severe + +ser_matrix_to_diag_bench + time: [2.4335 ms 2.4526 ms 2.4750 ms] +Found 14 outliers among 100 measurements (14.00%) + 6 (6.00%) high mild + 8 (8.00%) high severe + +par_matrix_to_diag_bench + time: [13.514 ms 13.684 ms 13.868 ms] +Found 10 outliers among 100 measurements (10.00%) + 7 (7.00%) high mild + 3 (3.00%) high severe + +Benchmarking ser_matrix_submat_bench: Warming up for 3.0000 s +Warning: Unable to complete 100 samples in 5.0s. You may wish to increase target time to 8.3s, enable flat sampling, or reduce sample count to 50. +ser_matrix_submat_bench time: [1.6077 ms 1.6243 ms 1.6451 ms] +Found 16 outliers among 100 measurements (16.00%) + 3 (3.00%) high mild + 13 (13.00%) high severe + +par_matrix_submat_bench time: [10.611 ms 10.761 ms 10.942 ms] +Found 5 outliers among 100 measurements (5.00%) + 3 (3.00%) high mild + 2 (2.00%) high severe + +ser_matrix_add_vec_bench + time: [7.3077 ms 7.3485 ms 7.3946 ms] +Found 12 outliers among 100 measurements (12.00%) + 2 (2.00%) high mild + 10 (10.00%) high severe + +par_matrix_add_vec_bench + time: [11.331 ms 11.480 ms 11.636 ms] +Found 2 outliers among 100 measurements (2.00%) + 2 (2.00%) high mild + +ser_matrix_norm_bench time: [5.1600 ms 5.1864 ms 5.2165 ms] +Found 7 outliers among 100 measurements (7.00%) + 1 (1.00%) high mild + 6 (6.00%) high severe + +par_matrix_norm_bench time: [2.6565 ms 2.6810 ms 2.7091 ms] +Found 5 outliers among 100 measurements (5.00%) + 2 (2.00%) high mild + 3 (3.00%) high severe + +Benchmarking ser_matrix_norm_bench #2: Warming up for 3.0000 s +Warning: Unable to complete 100 samples in 5.0s. You may wish to increase target time to 8.9s, enable flat sampling, or reduce sample count to 50. +ser_matrix_norm_bench #2 + time: [1.7262 ms 1.7391 ms 1.7541 ms] +Found 15 outliers among 100 measurements (15.00%) + 10 (10.00%) high mild + 5 (5.00%) high severe + +par_matrix_norm_bench #2 + time: [6.7071 ms 6.7883 ms 6.8703 ms] +Found 1 outliers among 100 measurements (1.00%) + 1 (1.00%) high mild + +ser_matrix_norm_bench #3 + time: [9.7582 ms 9.9006 ms 10.057 ms] +Found 12 outliers among 100 measurements (12.00%) + 5 (5.00%) high mild + 7 (7.00%) high severe + +par_matrix_norm_bench #3 + time: [9.3004 ms 9.4088 ms 9.5239 ms] +Found 1 outliers among 100 measurements (1.00%) + 1 (1.00%) high mild + +ser_matrix_inner_prod_bench + time: [5.2730 ms 5.3590 ms 5.4583 ms] +Found 14 outliers among 100 measurements (14.00%) + 3 (3.00%) high mild + 11 (11.00%) high severe + +par_matrix_inner_prod_bench + time: [5.0987 ms 5.1644 ms 5.2402 ms] +Found 7 outliers among 100 measurements (7.00%) + 3 (3.00%) high mild + 4 (4.00%) high severe + +ser_matrix_hadamard_bench + time: [5.6521 ms 5.6870 ms 5.7262 ms] +Found 12 outliers among 100 measurements (12.00%) + 3 (3.00%) high mild + 9 (9.00%) high severe + +par_matrix_hadamard_bench + time: [14.155 ms 14.335 ms 14.527 ms] +Found 4 outliers among 100 measurements (4.00%) + 3 (3.00%) high mild + 1 (1.00%) high severe + +ser_matrix_take_row_bench + time: [3.7894 ms 3.8234 ms 3.8613 ms] +Found 15 outliers among 100 measurements (15.00%) + 7 (7.00%) high mild + 8 (8.00%) high severe + +par_matrix_take_row_bench + time: [8.4008 ms 8.5171 ms 8.6523 ms] +Found 9 outliers among 100 measurements (9.00%) + 6 (6.00%) high mild + 3 (3.00%) high severe + +ser_matrix_fpmap_bench time: [3.2526 ms 3.2739 ms 3.2977 ms] +Found 12 outliers among 100 measurements (12.00%) + 2 (2.00%) high mild + 10 (10.00%) high severe + +par_matrix_fpmap_bench time: [10.604 ms 10.765 ms 10.937 ms] +Found 11 outliers among 100 measurements (11.00%) + 8 (8.00%) high mild + 3 (3.00%) high severe + +ser_matrix_reduce_bench time: [2.6748 ms 2.6964 ms 2.7201 ms] +Found 9 outliers among 100 measurements (9.00%) + 6 (6.00%) high mild + 3 (3.00%) high severe + +par_matrix_reduce_bench time: [6.2453 ms 6.3198 ms 6.4034 ms] +Found 6 outliers among 100 measurements (6.00%) + 4 (4.00%) high mild + 2 (2.00%) high severe diff --git a/benches/parallel_rayon/matrix_benchmark.rs b/benches/parallel_rayon/matrix_benchmark.rs index c859bf1a..a62cd539 100644 --- a/benches/parallel_rayon/matrix_benchmark.rs +++ b/benches/parallel_rayon/matrix_benchmark.rs @@ -1,5 +1,11 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use peroxide::fuga::*; +use peroxide::{ + fuga::*, + traits::{ + fp::ParallelFPMatrix, + math::{ParallelInnerProduct, ParallelMatrixProduct, ParallelNormed, ParallelVector}, + }, +}; pub fn par_matrix_benchmark(cr: &mut Criterion) { let v: Vec = (0..1000000) @@ -139,14 +145,243 @@ pub fn par_matrix_submat_benchmark(cr: &mut Criterion) { }); } +pub fn par_matrix_add_vec_benchmark(cr: &mut Criterion) { + let v: Vec = (0..1000000) + .into_iter() + .map(|i: i32| 2.0 * (i as f64)) + .collect::>(); + + let w: Vec = (0..1000000) + .into_iter() + .map(|i: i32| 3.0 * (i as f64)) + .collect::>(); + + // Result: 1000x1000 matrix: 7.1774 ms + cr.bench_function("ser_matrix_add_vec_bench", |b| { + b.iter(|| { + black_box(matrix(v.clone(), 1000, 1000, Shape::Row).add_vec(&matrix( + w.clone(), + 1000, + 1000, + Shape::Row, + ))) + }) + }); + + // Result: 1000x1000 matrix: 11.565 ms + cr.bench_function("par_matrix_add_vec_bench", |b| { + b.iter(|| { + black_box( + matrix(v.clone(), 1000, 1000, Shape::Row).par_add_vec(&matrix( + w.clone(), + 1000, + 1000, + Shape::Row, + )), + ) + }) + }); +} + +// Check: better parallel results (ran test 6 times) +pub fn par_matrix_norm_lpq_benchmark(cr: &mut Criterion) { + let v: Vec = (0..1000000) + .into_iter() + .map(|i: i32| 2.0 * (i as f64)) + .collect::>(); + + // Result: 1000x1000 matrix: [5.5969 ms 5.7555 ms 5.9515 ms 6.0843 ms 6.3072 ms 6.5636 ms] + cr.bench_function("ser_matrix_norm_bench", |b| { + b.iter(|| black_box(matrix(v.clone(), 1000, 1000, Shape::Row).norm(Norm::Lpq(4.0, 2.0)))) + }); + + // Result: 1000x1000 matrix: [3.1796 ms 3.2714 ms 3.3714 ms 3.6123 ms 3.7398 ms 3.8761 ms] + cr.bench_function("par_matrix_norm_bench", |b| { + b.iter(|| { + black_box(matrix(v.clone(), 1000, 1000, Shape::Row).par_norm(Norm::Lpq(4.0, 2.0))) + }) + }); +} + +pub fn par_matrix_norm_f_benchmark(cr: &mut Criterion) { + let v: Vec = (0..1000000) + .into_iter() + .map(|i: i32| 2.0 * (i as f64)) + .collect::>(); + + // Result: 1000x1000 matrix: 1.7414 ms + cr.bench_function("ser_matrix_norm_bench", |b| { + b.iter(|| black_box(matrix(v.clone(), 1000, 1000, Shape::Row).norm(Norm::F))) + }); + + // Result: 1000x1000 matrix: 9.0499 ms + cr.bench_function("par_matrix_norm_bench", |b| { + b.iter(|| black_box(matrix(v.clone(), 1000, 1000, Shape::Row).par_norm(Norm::F))) + }); +} + +pub fn par_matrix_norm_l1_benchmark(cr: &mut Criterion) { + let v: Vec = (0..1000000) + .into_iter() + .map(|i: i32| 2.0 * (i as f64)) + .collect::>(); + + // Result: 1000x1000 matrix: 9.0287 ms + cr.bench_function("ser_matrix_norm_bench", |b| { + b.iter(|| black_box(matrix(v.clone(), 1000, 1000, Shape::Row).norm(Norm::L1))) + }); + + // Result: 1000x1000 matrix: 10.393 ms + cr.bench_function("par_matrix_norm_bench", |b| { + b.iter(|| black_box(matrix(v.clone(), 1000, 1000, Shape::Row).par_norm(Norm::L1))) + }); +} + +// Check: better parallel results (ran test 6 times) +pub fn par_matrix_inner_prod_benchmark(cr: &mut Criterion) { + let v: Vec = (0..1000000) + .into_iter() + .map(|i: i32| 2.0 * (i as f64)) + .collect::>(); + + let w: Vec = (0..1000000) + .into_iter() + .map(|i: i32| 3.0 * (i as f64)) + .collect::>(); + + // Result: 1000x1000 matrix: [5.1075 ms 5.1505 ms 5.2013 ms 5.7617 ms 6.0196 ms 6.3009 ms] + cr.bench_function("ser_matrix_inner_prod_bench", |b| { + b.iter(|| { + black_box(matrix(v.clone(), 1000, 1000, Shape::Row).dot(&matrix( + w.clone(), + 1000, + 1000, + Shape::Row, + ))) + }) + }); + + // Result: 1000x1000 matrix: [4.9931 ms 5.0244 ms 5.0642 ms 5.0322 ms 5.0819 ms 5.1404 ms] + cr.bench_function("par_matrix_inner_prod_bench", |b| { + b.iter(|| { + black_box(matrix(v.clone(), 1000, 1000, Shape::Row).par_dot(&matrix( + w.clone(), + 1000, + 1000, + Shape::Row, + ))) + }) + }); +} + +pub fn par_matrix_hadamard_benchmark(cr: &mut Criterion) { + let v: Vec = (0..1000000) + .into_iter() + .map(|i: i32| 2.0 * (i as f64)) + .collect::>(); + + let w: Vec = (0..1000000) + .into_iter() + .map(|i: i32| 3.0 * (i as f64)) + .collect::>(); + + // Result: 1000x1000 matrix: 5.8664 ms + cr.bench_function("ser_matrix_hadamard_bench", |b| { + b.iter(|| { + black_box(matrix(v.clone(), 1000, 1000, Shape::Row).hadamard(&matrix( + w.clone(), + 1000, + 1000, + Shape::Row, + ))) + }) + }); + + // Result: 1000x1000 matrix: 13.982 ms + cr.bench_function("par_matrix_hadamard_bench", |b| { + b.iter(|| { + black_box( + matrix(v.clone(), 1000, 1000, Shape::Row).par_hadamard(&matrix( + w.clone(), + 1000, + 1000, + Shape::Row, + )), + ) + }) + }); +} + +pub fn par_matrix_take_row_benchmark(cr: &mut Criterion) { + let v: Vec = (0..1000000) + .into_iter() + .map(|i: i32| 2.0 * (i as f64)) + .collect::>(); + + // Result: 1000x1000 matrix: 3.7958 ms + cr.bench_function("ser_matrix_take_row_bench", |b| { + b.iter(|| black_box(matrix(v.clone(), 1000, 1000, Shape::Row).take_row(500))) + }); + + // Result: 1000x1000 matrix: 9.7897 ms + cr.bench_function("par_matrix_take_row_bench", |b| { + b.iter(|| black_box(par_matrix(v.clone(), 1000, 1000, Shape::Row).par_take_row(500))) + }); +} + +pub fn par_matrix_fpmap_benchmark(cr: &mut Criterion) { + let v: Vec = (0..1000000) + .into_iter() + .map(|i: i32| 2.0 * (i as f64)) + .collect::>(); + let f = |x: f64| 2.0 * x; + + // Result: 1000x1000 matrix: 3.3385 ms + cr.bench_function("ser_matrix_fpmap_bench", |b| { + b.iter(|| black_box(matrix(v.clone(), 1000, 1000, Shape::Row).fmap(f))) + }); + + // Result: 1000x1000 matrix: 9.6310 ms + cr.bench_function("par_matrix_fpmap_bench", |b| { + b.iter(|| black_box(par_matrix(v.clone(), 1000, 1000, Shape::Row).par_fmap(f))) + }); +} + +pub fn par_matrix_reduce_benchmark(cr: &mut Criterion) { + let v: Vec = (0..1000000) + .into_iter() + .map(|i: i32| 2.0 * (i as f64)) + .collect::>(); + let f = |x: f64, y: f64| 2.0 * x * y; + + // Result: 1000x1000 matrix: 2.8077 ms + cr.bench_function("ser_matrix_reduce_bench", |b| { + b.iter(|| black_box(matrix(v.clone(), 1000, 1000, Shape::Row).reduce(0.0, f))) + }); + + // Result: 1000x1000 matrix: 9.4877 ms + cr.bench_function("par_matrix_reduce_bench", |b| { + b.iter(|| black_box(par_matrix(v.clone(), 1000, 1000, Shape::Row).par_reduce(0.0, f))) + }); +} + criterion_group!( benches, - /*par_matrix_benchmark,*/ - /*par_py_matrix_benchmark,*/ - /*par_matrix_change_shape_benchmark,*/ - /*par_matrix_extract_row_benchmark,*/ - /*par_matrix_from_index_benchmark,*/ - /*par_matrix_to_vec_and_diag_benchmark,*/ + par_matrix_benchmark, + par_py_matrix_benchmark, + par_matrix_change_shape_benchmark, + par_matrix_extract_row_benchmark, + par_matrix_from_index_benchmark, + par_matrix_to_vec_and_diag_benchmark, par_matrix_submat_benchmark, + par_matrix_add_vec_benchmark, + par_matrix_norm_lpq_benchmark, + par_matrix_norm_f_benchmark, + par_matrix_norm_l1_benchmark, + par_matrix_inner_prod_benchmark, + par_matrix_hadamard_benchmark, + par_matrix_take_row_benchmark, + par_matrix_fpmap_benchmark, + par_matrix_reduce_benchmark, ); criterion_main!(benches); From af76bfcf24a46bed477cfdb5145245927f83041b Mon Sep 17 00:00:00 2001 From: Soumya Date: Mon, 7 Oct 2024 22:41:12 +0200 Subject: [PATCH 13/17] Removed parallelized code based on benchmarks --- benches/parallel_rayon/matrix_benchmark.rs | 268 +------- src/structure/matrix.rs | 713 +-------------------- 2 files changed, 28 insertions(+), 953 deletions(-) diff --git a/benches/parallel_rayon/matrix_benchmark.rs b/benches/parallel_rayon/matrix_benchmark.rs index a62cd539..eaaa1e13 100644 --- a/benches/parallel_rayon/matrix_benchmark.rs +++ b/benches/parallel_rayon/matrix_benchmark.rs @@ -1,10 +1,7 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; use peroxide::{ fuga::*, - traits::{ - fp::ParallelFPMatrix, - math::{ParallelInnerProduct, ParallelMatrixProduct, ParallelNormed, ParallelVector}, - }, + traits::math::{ParallelInnerProduct, ParallelNormed}, }; pub fn par_matrix_benchmark(cr: &mut Criterion) { @@ -24,62 +21,6 @@ pub fn par_matrix_benchmark(cr: &mut Criterion) { }); } -pub fn par_py_matrix_benchmark(cr: &mut Criterion) { - let v: Vec> = (0..1000) - .into_iter() - .map(|i: i32| { - (0..1000) - .into_iter() - .map(|j| 2.0 * (i as f64) * (j as f64)) - .collect::>() - }) - .collect::>>(); - - // Result: 1000x1000 matrix: 3.9858 ms - cr.bench_function("ser_py_matrix_bench", |b| { - b.iter(|| black_box(py_matrix(v.clone()))) - }); - - // Result: 1000x1000 matrix: 24.654 ms - cr.bench_function("par_py_matrix_bench", |b| { - b.iter(|| black_box(par_py_matrix(v.clone()))) - }); -} - -pub fn par_matrix_change_shape_benchmark(cr: &mut Criterion) { - let v: Vec = (0..1000000) - .into_iter() - .map(|i: i32| 2.0 * (i as f64)) - .collect::>(); - - // Result: 1000x1000 matrix: 7.6453 ms - cr.bench_function("ser_matrix_change_shape_bench", |b| { - b.iter(|| black_box(matrix(v.clone(), 1000, 1000, Shape::Row).change_shape())) - }); - - // Result: 1000x1000 matrix: 9.8586 ms - cr.bench_function("par_matrix_change_shape_bench", |b| { - b.iter(|| black_box(par_matrix(v.clone(), 1000, 1000, Shape::Row).par_change_shape())) - }); -} - -pub fn par_matrix_extract_row_benchmark(cr: &mut Criterion) { - let v: Vec = (0..1000000) - .into_iter() - .map(|i: i32| 2.0 * (i as f64)) - .collect::>(); - - // Result: 1000x1000 matrix: 571.90 µs - cr.bench_function("ser_matrix_extract_row_bench", |b| { - b.iter(|| black_box(matrix(v.clone(), 1000, 1000, Shape::Row).row(100))) - }); - - // Result: 1000x1000 matrix: 10.440 ms - cr.bench_function("par_matrix_extract_row_bench", |b| { - b.iter(|| black_box(par_matrix(v.clone(), 1000, 1000, Shape::Row).par_row(100))) - }); -} - pub fn par_matrix_from_index_benchmark(cr: &mut Criterion) { let f = |x: usize, y: usize| 2.0 * (x as f64) * (y as f64); let size: (usize, usize) = (1000, 1000); @@ -95,94 +36,6 @@ pub fn par_matrix_from_index_benchmark(cr: &mut Criterion) { }); } -pub fn par_matrix_to_vec_and_diag_benchmark(cr: &mut Criterion) { - let v: Vec = (0..1000000) - .into_iter() - .map(|i: i32| 2.0 * (i as f64)) - .collect::>(); - - // Result: 1000x1000 matrix: 5.0333 ms - cr.bench_function("ser_matrix_to_vec_bench", |b| { - b.iter(|| black_box(matrix(v.clone(), 1000, 1000, Shape::Row).to_vec())) - }); - - // Result: 1000x1000 matrix: 10.625 ms - cr.bench_function("par_matrix_to_vec_bench", |b| { - b.iter(|| black_box(par_matrix(v.clone(), 1000, 1000, Shape::Row).par_to_vec())) - }); - - // Result: 1000x1000 matrix: 3.0501 ms - cr.bench_function("ser_matrix_to_diag_bench", |b| { - b.iter(|| black_box(matrix(v.clone(), 1000, 1000, Shape::Row).to_diag())) - }); - - // Result: 1000x1000 matrix: 16.362 ms - cr.bench_function("par_matrix_to_diag_bench", |b| { - b.iter(|| black_box(par_matrix(v.clone(), 1000, 1000, Shape::Row).par_to_diag())) - }); -} - -pub fn par_matrix_submat_benchmark(cr: &mut Criterion) { - let v: Vec = (0..1000000) - .into_iter() - .map(|i: i32| 2.0 * (i as f64)) - .collect::>(); - - // Result: 1000x1000 matrix: 1.5658 ms - cr.bench_function("ser_matrix_submat_bench", |b| { - b.iter(|| { - black_box(matrix(v.clone(), 1000, 1000, Shape::Row).submat((100, 100), (900, 900))) - }) - }); - - // Result: 1000x1000 matrix: 13.000 ms - cr.bench_function("par_matrix_submat_bench", |b| { - b.iter(|| { - black_box( - par_matrix(v.clone(), 1000, 1000, Shape::Row).par_submat((100, 100), (900, 900)), - ) - }) - }); -} - -pub fn par_matrix_add_vec_benchmark(cr: &mut Criterion) { - let v: Vec = (0..1000000) - .into_iter() - .map(|i: i32| 2.0 * (i as f64)) - .collect::>(); - - let w: Vec = (0..1000000) - .into_iter() - .map(|i: i32| 3.0 * (i as f64)) - .collect::>(); - - // Result: 1000x1000 matrix: 7.1774 ms - cr.bench_function("ser_matrix_add_vec_bench", |b| { - b.iter(|| { - black_box(matrix(v.clone(), 1000, 1000, Shape::Row).add_vec(&matrix( - w.clone(), - 1000, - 1000, - Shape::Row, - ))) - }) - }); - - // Result: 1000x1000 matrix: 11.565 ms - cr.bench_function("par_matrix_add_vec_bench", |b| { - b.iter(|| { - black_box( - matrix(v.clone(), 1000, 1000, Shape::Row).par_add_vec(&matrix( - w.clone(), - 1000, - 1000, - Shape::Row, - )), - ) - }) - }); -} - // Check: better parallel results (ran test 6 times) pub fn par_matrix_norm_lpq_benchmark(cr: &mut Criterion) { let v: Vec = (0..1000000) @@ -203,23 +56,6 @@ pub fn par_matrix_norm_lpq_benchmark(cr: &mut Criterion) { }); } -pub fn par_matrix_norm_f_benchmark(cr: &mut Criterion) { - let v: Vec = (0..1000000) - .into_iter() - .map(|i: i32| 2.0 * (i as f64)) - .collect::>(); - - // Result: 1000x1000 matrix: 1.7414 ms - cr.bench_function("ser_matrix_norm_bench", |b| { - b.iter(|| black_box(matrix(v.clone(), 1000, 1000, Shape::Row).norm(Norm::F))) - }); - - // Result: 1000x1000 matrix: 9.0499 ms - cr.bench_function("par_matrix_norm_bench", |b| { - b.iter(|| black_box(matrix(v.clone(), 1000, 1000, Shape::Row).par_norm(Norm::F))) - }); -} - pub fn par_matrix_norm_l1_benchmark(cr: &mut Criterion) { let v: Vec = (0..1000000) .into_iter() @@ -274,114 +110,12 @@ pub fn par_matrix_inner_prod_benchmark(cr: &mut Criterion) { }); } -pub fn par_matrix_hadamard_benchmark(cr: &mut Criterion) { - let v: Vec = (0..1000000) - .into_iter() - .map(|i: i32| 2.0 * (i as f64)) - .collect::>(); - - let w: Vec = (0..1000000) - .into_iter() - .map(|i: i32| 3.0 * (i as f64)) - .collect::>(); - - // Result: 1000x1000 matrix: 5.8664 ms - cr.bench_function("ser_matrix_hadamard_bench", |b| { - b.iter(|| { - black_box(matrix(v.clone(), 1000, 1000, Shape::Row).hadamard(&matrix( - w.clone(), - 1000, - 1000, - Shape::Row, - ))) - }) - }); - - // Result: 1000x1000 matrix: 13.982 ms - cr.bench_function("par_matrix_hadamard_bench", |b| { - b.iter(|| { - black_box( - matrix(v.clone(), 1000, 1000, Shape::Row).par_hadamard(&matrix( - w.clone(), - 1000, - 1000, - Shape::Row, - )), - ) - }) - }); -} - -pub fn par_matrix_take_row_benchmark(cr: &mut Criterion) { - let v: Vec = (0..1000000) - .into_iter() - .map(|i: i32| 2.0 * (i as f64)) - .collect::>(); - - // Result: 1000x1000 matrix: 3.7958 ms - cr.bench_function("ser_matrix_take_row_bench", |b| { - b.iter(|| black_box(matrix(v.clone(), 1000, 1000, Shape::Row).take_row(500))) - }); - - // Result: 1000x1000 matrix: 9.7897 ms - cr.bench_function("par_matrix_take_row_bench", |b| { - b.iter(|| black_box(par_matrix(v.clone(), 1000, 1000, Shape::Row).par_take_row(500))) - }); -} - -pub fn par_matrix_fpmap_benchmark(cr: &mut Criterion) { - let v: Vec = (0..1000000) - .into_iter() - .map(|i: i32| 2.0 * (i as f64)) - .collect::>(); - let f = |x: f64| 2.0 * x; - - // Result: 1000x1000 matrix: 3.3385 ms - cr.bench_function("ser_matrix_fpmap_bench", |b| { - b.iter(|| black_box(matrix(v.clone(), 1000, 1000, Shape::Row).fmap(f))) - }); - - // Result: 1000x1000 matrix: 9.6310 ms - cr.bench_function("par_matrix_fpmap_bench", |b| { - b.iter(|| black_box(par_matrix(v.clone(), 1000, 1000, Shape::Row).par_fmap(f))) - }); -} - -pub fn par_matrix_reduce_benchmark(cr: &mut Criterion) { - let v: Vec = (0..1000000) - .into_iter() - .map(|i: i32| 2.0 * (i as f64)) - .collect::>(); - let f = |x: f64, y: f64| 2.0 * x * y; - - // Result: 1000x1000 matrix: 2.8077 ms - cr.bench_function("ser_matrix_reduce_bench", |b| { - b.iter(|| black_box(matrix(v.clone(), 1000, 1000, Shape::Row).reduce(0.0, f))) - }); - - // Result: 1000x1000 matrix: 9.4877 ms - cr.bench_function("par_matrix_reduce_bench", |b| { - b.iter(|| black_box(par_matrix(v.clone(), 1000, 1000, Shape::Row).par_reduce(0.0, f))) - }); -} - criterion_group!( benches, par_matrix_benchmark, - par_py_matrix_benchmark, - par_matrix_change_shape_benchmark, - par_matrix_extract_row_benchmark, par_matrix_from_index_benchmark, - par_matrix_to_vec_and_diag_benchmark, - par_matrix_submat_benchmark, - par_matrix_add_vec_benchmark, par_matrix_norm_lpq_benchmark, - par_matrix_norm_f_benchmark, par_matrix_norm_l1_benchmark, par_matrix_inner_prod_benchmark, - par_matrix_hadamard_benchmark, - par_matrix_take_row_benchmark, - par_matrix_fpmap_benchmark, - par_matrix_reduce_benchmark, ); criterion_main!(benches); diff --git a/src/structure/matrix.rs b/src/structure/matrix.rs index d671156e..b66267e6 100644 --- a/src/structure/matrix.rs +++ b/src/structure/matrix.rs @@ -610,18 +610,14 @@ extern crate lapack; use blas::{daxpy, dgemm, dgemv}; #[cfg(feature = "O3")] use lapack::{dgecon, dgeqrf, dgesvd, dgetrf, dgetri, dgetrs, dorgqr, dpotrf}; +use rayon::iter::{IntoParallelIterator, IntoParallelRefIterator, ParallelIterator}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; -use crate::rayon::prelude::*; -use crate::traits::fp::ParallelFPMatrix; -use crate::traits::math::{ - ParallelInnerProduct, ParallelMatrixProduct, ParallelNormed, ParallelVector, -}; - pub use self::Shape::{Col, Row}; use crate::numerical::eigen::{eigen, EigenMethod}; use crate::structure::dataframe::{Series, TypedVector}; +use crate::traits::math::{ParallelInnerProduct, ParallelNormed}; use crate::traits::sugar::ScalableMut; use crate::traits::{ fp::{FPMatrix, FPVector}, @@ -760,14 +756,6 @@ where matrix(v, r, c, shape) } -/// R-like matrix constructor (Explicit ver.) in parallel -pub fn par_r_matrix(v: Vec, r: usize, c: usize, shape: Shape) -> Matrix -where - T: Into + Send + Sync, -{ - par_matrix(v, r, c, shape) -} - /// Python-like matrix constructor /// /// # Examples @@ -798,38 +786,6 @@ where matrix(data, r, c, Row) } -/// Python-like matrix constructor in parallel -/// -/// # Examples -/// ``` -/// #[macro_use] -/// extern crate peroxide; -/// use peroxide::fuga::*; -/// -/// fn main() { -/// let a = par_py_matrix(vec![c!(1,2), c!(3,4)]); -/// let b = matrix(c!(1,2,3,4), 2, 2, Row); -/// assert_eq!(a, b); -/// } -/// ``` -pub fn par_py_matrix(v: Vec>) -> Matrix -where - T: Into + Copy + Send + Sync, -{ - let r = v.len(); - let c = v[0].len(); - let data = (0..r) - .into_par_iter() - .flat_map(|i| { - (0..c) - .into_par_iter() - .map(|j| v[i][j].into()) - .collect::>() - }) - .collect::>(); - par_matrix(data, r, c, Row) -} - /// Matlab-like matrix constructor /// /// # Examples @@ -852,7 +808,6 @@ pub fn ml_matrix(s: &str) -> Matrix where { .map(|x| x.trim().split(' ').collect::>()) .collect::>>(); let c = str_data[0].len(); - let data = str_data .into_iter() .flat_map(|t| { @@ -861,45 +816,9 @@ pub fn ml_matrix(s: &str) -> Matrix where { .collect::>() }) .collect::>(); - matrix(data, r, c, Row) } -/// Matlab-like matrix constructor in parallel -/// -/// # Examples -/// ``` -/// #[macro_use] -/// extern crate peroxide; -/// use peroxide::fuga::*; -/// -/// fn main() { -/// let a = par_ml_matrix("1 2; 3 4"); -/// let b = matrix(c!(1,2,3,4), 2, 2, Row); -/// assert_eq!(a, b); -/// } -/// ``` -pub fn par_ml_matrix(s: &str) -> Matrix where { - let str_rows: Vec<&str> = s.split(';').collect(); - let r = str_rows.len(); - let str_data = str_rows - .into_par_iter() - .map(|x| x.trim().split(' ').collect::>()) - .collect::>>(); - let c = str_data[0].len(); - - let data = str_data - .into_par_iter() - .flat_map(|t| { - t.into_par_iter() - .map(|x| x.parse::().unwrap()) - .collect::>() - }) - .collect::>(); - - par_matrix(data, r, c, Row) -} - /// Pretty Print impl fmt::Display for Matrix { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { @@ -911,17 +830,9 @@ impl fmt::Display for Matrix { impl PartialEq for Matrix { fn eq(&self, other: &Matrix) -> bool { if self.shape == other.shape { - // self.data - // .clone() - // .into_iter() - // .zip(other.data.clone()) - // .all(|(x, y)| nearly_eq(x, y)) - // && self.row == other.row - - // Note: the sequential version in this case can be replaced by the parallel version? self.data .clone() - .into_par_iter() + .into_iter() .zip(other.data.clone()) .all(|(x, y)| nearly_eq(x, y)) && self.row == other.row @@ -1015,7 +926,7 @@ impl Matrix { } } - /// Change Bindings in Parallel + /// Change Bindings Mutably /// /// `Row` -> `Col` or `Col` -> `Row` /// @@ -1025,50 +936,9 @@ impl Matrix { /// /// let a = matrix(vec![1,2,3,4],2,2,Row); /// assert_eq!(a.shape, Row); - /// let b = a.par_change_shape(); + /// let b = a.change_shape(); /// assert_eq!(b.shape, Col); /// ``` - pub fn par_change_shape(&self) -> Self { - let r = self.row; - let c = self.col; - assert_eq!(r * c, self.data.len()); - let l = r * c - 1; - let ref_data = &self.data; - - match self.shape { - Row => { - let mut data = (0..=l) - .into_par_iter() - .map(|i| ref_data[(i * c) % l]) - .collect::>(); - data[l] = ref_data[l]; - par_matrix(data, r, c, Col) - } - Col => { - let mut data = (0..=l) - .into_par_iter() - .map(|i| ref_data[(i * r) % l]) - .collect::>(); - data[l] = ref_data[l]; - par_matrix(data, r, c, Row) - } - } - } - - /// Change Bindings Mutably - /// - /// `Row` -> `Col` or `Col` -> `Row` - /// - /// # Examples - /// ``` - /// use peroxide::fuga::*; - /// - /// let mut a = matrix(vec![1,2,3,4,5,6,7,8,9],3,3,Row); - /// assert_eq!(a.shape, Row); - /// a.change_shape_mut(); - /// assert_eq!(a, matrix(vec![1,4,7,2,5,8,3,6,9],3,3,Col)); - /// assert_eq!(a.shape, Col); - /// ``` pub fn change_shape_mut(&mut self) { let r = self.row; let c = self.col; @@ -1096,47 +966,6 @@ impl Matrix { } } - /// Change Bindings Mutably in Parallel - /// - /// `Row` -> `Col` or `Col` -> `Row` - /// - /// # Examples - /// ``` - /// use peroxide::fuga::*; - /// - /// let mut a = matrix(vec![1,2,3,4,5,6,7,8,9],3,3,Row); - /// assert_eq!(a.shape, Row); - /// a.par_change_shape_mut(); - /// assert_eq!(a, matrix(vec![1,4,7,2,5,8,3,6,9],3,3,Col)); - /// assert_eq!(a.shape, Col); - /// ``` - pub fn par_change_shape_mut(&mut self) { - let r = self.row; - let c = self.col; - assert_eq!(r * c, self.data.len()); - let l = r * c - 1; - let ref_data = self.data.clone(); - - match self.shape { - Row => { - self.data = (0..=l) - .into_par_iter() - .map(|i| ref_data[(i * c) % l]) - .collect::>(); - self.data[l] = ref_data[l]; - self.shape = Col; - } - Col => { - self.data = (0..=l) - .into_par_iter() - .map(|i| ref_data[(i * r) % l]) - .collect::>(); - self.data[l] = ref_data[l]; - self.shape = Row; - } - } - } - /// Spread data(1D vector) to 2D formatted String /// /// # Examples @@ -1246,29 +1075,6 @@ impl Matrix { container } - /// Extract Column in parallel - /// - /// # Examples - /// ``` - /// #[macro_use] - /// extern crate peroxide; - /// use peroxide::fuga::*; - /// - /// fn main() { - /// let a = matrix(c!(1,2,3,4), 2, 2, Row); - /// assert_eq!(a.par_col(0), c!(1,3)); - /// } - /// ``` - pub fn par_col(&self, index: usize) -> Vec { - assert!(index < self.col); - - let container = (0..self.row) - .into_par_iter() - .map(|i| self[(i, index)]) - .collect::>(); - container - } - /// Extract Row /// /// # Examples @@ -1291,29 +1097,6 @@ impl Matrix { container } - /// Extract Row in Parallel - /// - /// # Examples - /// ``` - /// #[macro_use] - /// extern crate peroxide; - /// use peroxide::fuga::*; - /// - /// fn main() { - /// let a = matrix(c!(1,2,3,4), 2, 2, Row); - /// assert_eq!(a.par_row(0), c!(1,2)); - /// } - /// ``` - pub fn par_row(&self, index: usize) -> Vec { - assert!(index < self.row); - - let container = (0..self.col) - .into_par_iter() - .map(|i| self[(index, i)]) - .collect::>(); - container - } - /// Extract diagonal components /// /// # Examples @@ -1328,37 +1111,17 @@ impl Matrix { /// } /// ``` pub fn diag(&self) -> Vec { - assert_eq!(self.row, self.col); let mut container = vec![0f64; self.row]; - for i in 0..self.row { - container[i] = self.data[i * (self.col + 1)]; + let r = self.row; + let c = self.col; + assert_eq!(r, c); + let c2 = c + 1; + for i in 0..r { + container[i] = self.data[i * c2]; } container } - /// Extract diagonal components in parallel - /// - /// # Examples - /// ``` - /// #[macro_use] - /// extern crate peroxide; - /// use peroxide::fuga::*; - /// - /// fn main() { - /// let a = matrix!(1;4;1, 2, 2, Row); - /// assert_eq!(a.par_diag(), c!(1,4)); - /// } - /// ``` - pub fn par_diag(&self) -> Vec { - assert_eq!(self.row, self.col); - - let container = (0..self.row) - .into_par_iter() - .map(|i| self.data[i * (self.col + 1)]) - .collect::>(); - container - } - /// Transpose /// /// # Examples @@ -1366,8 +1129,7 @@ impl Matrix { /// use peroxide::fuga::*; /// /// let a = matrix(vec![1,2,3,4], 2, 2, Row); - /// println!("{}", a.transpose()); // [[1,3],[2,4]] - /// assert_eq!(a.transpose(), matrix(vec![1,3,2,4], 2, 2, Row)) + /// println!("{}", a); // [[1,3],[2,4]] /// ``` pub fn transpose(&self) -> Self { match self.shape { @@ -1376,23 +1138,6 @@ impl Matrix { } } - /// Transpose in Parallel - /// - /// # Examples - /// ``` - /// use peroxide::fuga::*; - /// - /// let a = par_matrix(vec![1,2,3,4], 2, 2, Row); - /// println!("{}", a.par_transpose()); // [[1,3],[2,4]] - /// assert_eq!(a.par_transpose(), matrix(vec![1,3,2,4], 2, 2, Row)) - /// ``` - pub fn par_transpose(&self) -> Self { - match self.shape { - Row => par_matrix(self.data.clone(), self.col, self.row, Col), - Col => par_matrix(self.data.clone(), self.col, self.row, Row), - } - } - /// R-like transpose function /// /// # Examples @@ -1410,23 +1155,6 @@ impl Matrix { self.transpose() } - /// R-like transpose function in parallel - /// - /// # Examples - /// ``` - /// #[macro_use] - /// extern crate peroxide; - /// use peroxide::fuga::*; - /// - /// fn main() { - /// let a = matrix!(1;4;1, 2, 2, Row); - /// assert_eq!(a.transpose(), a.par_t()); - /// } - /// ``` - pub fn par_t(&self) -> Self { - self.par_transpose() - } - /// Write to CSV /// /// # Examples @@ -1671,28 +1399,6 @@ impl Matrix { result } - /// Matrix to `Vec>` in parallel - /// - /// To send `Matrix` to `inline-python` - pub fn par_to_vec(&self) -> Vec> { - (0..self.row) - .into_par_iter() - .map(|i| self.row(i)) - .collect::>>() - } - - /// To diagonal components - /// - /// # Examples - /// ``` - /// #[macro_use] - /// extern crate peroxide; - /// use peroxide::fuga::*; - /// - /// fn main() { - /// let a = matrix!(1;4;1, 2, 2, Row); - /// assert_eq!(a.to_diag(), matrix(c!(1,0,0,4), 2, 2, Row)); - /// } pub fn to_diag(&self) -> Matrix { assert_eq!(self.row, self.col, "Should be square matrix"); let mut result = matrix(vec![0f64; self.row * self.col], self.row, self.col, Row); @@ -1703,34 +1409,6 @@ impl Matrix { result } - /// To diagonal components in parallel - /// - /// # Examples - /// ``` - /// #[macro_use] - /// extern crate peroxide; - /// use peroxide::fuga::*; - /// - /// fn main() { - /// let a = matrix!(1;4;1, 2, 2, Row); - /// assert_eq!(a.par_to_diag(), matrix(c!(1,0,0,4), 2, 2, Row)); - /// } - pub fn par_to_diag(&self) -> Matrix { - assert_eq!(self.row, self.col, "Should be square matrix"); - - let data = (0..self.row) - .into_par_iter() - .flat_map(|i| { - (0..self.col) - .into_par_iter() - .map(|j| if i == j { self.diag()[i] } else { 0_f64 }) - .collect::>() - }) - .collect::>(); - - par_matrix(data, self.row, self.col, Row) - } - /// Submatrix /// /// # Description @@ -1770,49 +1448,6 @@ impl Matrix { result } - /// Submatrix in parallel - /// - /// # Description - /// Return below elements of matrix to new matrix - /// - /// $$ - /// \begin{pmatrix} - /// \\ddots & & & & \\\\ - /// & start & \\cdots & end.1 & \\\\ - /// & \\vdots & \\ddots & \\vdots & \\\\ - /// & end.0 & \\cdots & end & \\\\ - /// & & & & \\ddots - /// \end{pmatrix} - /// $$ - /// - /// # Examples - /// ``` - /// extern crate peroxide; - /// use peroxide::fuga::*; - /// - /// fn main() { - /// let a = ml_matrix("1 2 3;4 5 6;7 8 9"); - /// let b = ml_matrix("5 6;8 9"); - /// let c = a.par_submat((1, 1), (2, 2)); - /// assert_eq!(b, c); - /// } - /// ``` - pub fn par_submat(&self, start: (usize, usize), end: (usize, usize)) -> Matrix { - let row = end.0 - start.0 + 1; - let col = end.1 - start.1 + 1; - - let data = (0..row) - .into_par_iter() - .flat_map(|i| { - (0..col) - .into_par_iter() - .map(|j| self[(start.0 + i, start.1 + j)]) - .collect::>() - }) - .collect::>(); - par_matrix(data, row, col, self.shape) - } - /// Substitute matrix to specific position /// /// # Description @@ -1961,106 +1596,8 @@ impl Vector for Matrix { } } -impl ParallelVector for Matrix { - type Scalar = f64; - - fn par_add_vec(&self, other: &Self) -> Self { - assert_eq!(self.row, other.row); - assert_eq!(self.col, other.col); - - match () { - #[cfg(feature = "O3")] - () => { - if self.shape != other.shape { - return self.add(&other.change_shape()); - } - let x = &self.data; - let mut y = other.data.clone(); - let n_i32 = x.len() as i32; - let a_f64 = 1f64; - unsafe { - daxpy(n_i32, a_f64, x, 1, &mut y, 1); - } - matrix(y, self.row, self.col, self.shape) - } - _ => { - let mut result = par_matrix(self.data.clone(), self.row, self.col, self.shape); - result - .data - .par_iter_mut() - .enumerate() - .for_each(|(idx, value)| { - let i = idx / self.col; - let j = idx % self.col; - - *value += other[(i, j)]; - }); - - result - } - } - } - - fn par_sub_vec(&self, other: &Self) -> Self { - assert_eq!(self.row, other.row); - assert_eq!(self.col, other.col); - match () { - #[cfg(feature = "O3")] - () => { - if self.shape != other.shape { - return self.sub(&other.change_shape()); - } - let x = &other.data; - let mut y = self.data.clone(); - let n_i32 = x.len() as i32; - let a_f64 = -1f64; - unsafe { - daxpy(n_i32, a_f64, x, 1, &mut y, 1); - } - matrix(y, self.row, self.col, self.shape) - } - _ => { - let mut result = par_matrix(self.data.clone(), self.row, self.col, self.shape); - result - .data - .par_iter_mut() - .enumerate() - .for_each(|(idx, val)| { - let i = idx / self.col; - let j = idx % self.col; - *val -= other[(i, j)]; - }); - - result - } - } - } - - fn par_mul_scalar(&self, other: Self::Scalar) -> Self { - match () { - #[cfg(feature = "O3")] - () => { - let x = &self.data; - let mut y = vec![0f64; x.len()]; - let a_f64 = other; - let n_i32 = x.len() as i32; - - unsafe { - daxpy(n_i32, a_f64, x, 1, &mut y, 1); - } - matrix(y, self.row, self.col, self.shape) - } - _ => { - let scalar = other; - self.par_fmap(|x| x * scalar) - } - } - } -} - impl Normed for Matrix { type UnsignedScalar = f64; - fn norm(&self, kind: Norm) -> f64 { match kind { Norm::F => { @@ -2133,12 +1670,10 @@ impl ParallelNormed for Matrix { fn par_norm(&self, kind: Norm) -> f64 { match kind { Norm::F => { - let s = self - .data - .clone() - .into_par_iter() - .fold(|| 0_f64, |acc, el| acc + el.powi(2)) - .sum::(); + let mut s = 0f64; + for i in 0..self.data.len() { + s += self.data[i].powi(2); + } s.sqrt() } Norm::Lpq(p, q) => { @@ -2175,7 +1710,7 @@ impl ParallelNormed for Matrix { Col => self.change_shape().norm(Norm::LInf), Row => { for r in 0..self.row { - let s = self.row(r).par_iter().sum(); + let s = self.row(r).iter().sum(); if s > m { m = s; } @@ -2279,6 +1814,7 @@ impl MatrixProduct for Matrix { let r = self.row; let c = self.col; + let mut m = matrix(vec![0f64; r * c], r, c, self.shape); for i in 0..r { for j in 0..c { @@ -2289,24 +1825,6 @@ impl MatrixProduct for Matrix { } } -impl ParallelMatrixProduct for Matrix { - fn par_hadamard(&self, other: &Self) -> Matrix { - assert_eq!(self.row, other.row); - assert_eq!(self.col, other.col); - - let m = (0..self.row) - .into_par_iter() - .flat_map(|i| { - (0..self.col) - .into_par_iter() - .map(|j| self[(i, j)] * other[(i, j)]) - .collect::>() - }) - .collect::>(); - par_matrix(m, self.row, self.col, self.shape) - } -} - // ============================================================================= // Common Properties of Matrix & Vec // ============================================================================= @@ -2378,20 +1896,6 @@ impl Add for Matrix { } } result - - // Note: Parallel version - how to add the implementation below? - // let mut result = par_matrix(self.data.clone(), self.row, self.col, self.shape); - // result - // .data - // .par_iter_mut() - // .enumerate() - // .for_each(|(idx, value)| { - // let i = idx / self.col; - // let j = idx % self.col; - - // *value += other[(i, j)]; - // }); - // result } } } @@ -2420,7 +1924,7 @@ impl<'a, 'b> Add<&'b Matrix> for &'a Matrix { /// ``` impl Add for Matrix where - T: Into + Copy + Send + Sync, + T: Into + Copy, { type Output = Self; fn add(self, other: T) -> Self { @@ -2443,7 +1947,7 @@ where /// Element-wise addition between &Matrix & f64 impl<'a, T> Add for &'a Matrix where - T: Into + Copy + Send + Sync, + T: Into + Copy, { type Output = Matrix; fn add(self, other: T) -> Self::Output { @@ -2583,11 +2087,6 @@ impl Neg for Matrix { } _ => matrix( self.data.into_iter().map(|x: f64| -x).collect::>(), - // Note: how to add parallel implementation below? - // self.data - // .into_par_iter() - // .map(|x: f64| -x) - // .collect::>(), self.row, self.col, self.shape, @@ -2618,12 +2117,6 @@ impl<'a> Neg for &'a Matrix { .into_iter() .map(|x: f64| -x) .collect::>(), - // Note: how to add parallel implementation below? - // self.data - // .clone() - // .into_par_iter() - // .map(|x: f64| -x) - // .collect::>(), self.row, self.col, self.shape, @@ -2675,19 +2168,6 @@ impl Sub for Matrix { } } result - // Note: how to add parallel implementation below? - // let mut result = par_matrix(self.data.clone(), self.row, self.col, self.shape); - // result - // .data - // .par_iter_mut() - // .enumerate() - // .for_each(|(idx, value)| { - // let i = idx / self.col; - // let j = idx % self.col; - - // *value -= other[(i, j)]; - // }); - // result } } } @@ -2704,7 +2184,7 @@ impl<'a, 'b> Sub<&'b Matrix> for &'a Matrix { /// Subtraction between Matrix & f64 impl Sub for Matrix where - T: Into + Copy + Send + Sync, + T: Into + Copy, { type Output = Self; @@ -2728,7 +2208,7 @@ where /// Subtraction between &Matrix & f64 impl<'a, T> Sub for &'a Matrix where - T: Into + Copy + Send + Sync, + T: Into + Copy, { type Output = Matrix; @@ -3299,9 +2779,11 @@ impl FPMatrix for Matrix { F: Fn(Vec) -> Vec, { let mut result = matrix(vec![0f64; self.row * self.col], self.row, self.col, Col); + for i in 0..self.col { result.subs_col(i, &f(self.col(i))); } + result } @@ -3323,9 +2805,11 @@ impl FPMatrix for Matrix { F: Fn(Vec) -> Vec, { let mut result = matrix(vec![0f64; self.row * self.col], self.row, self.col, Row); + for i in 0..self.row { result.subs_row(i, &f(self.row(i))); } + result } @@ -3362,7 +2846,7 @@ impl FPMatrix for Matrix { fn reduce(&self, init: T, f: F) -> f64 where F: Fn(f64, f64) -> f64, - T: Into + Copy + Clone, + T: Into, { self.data.iter().fold(init.into(), |x, y| f(x, *y)) } @@ -3408,135 +2892,6 @@ impl FPMatrix for Matrix { } } -impl ParallelFPMatrix for Matrix { - fn par_take_row(&self, n: usize) -> Self { - if n >= self.row { - return self.clone(); - } - match self.shape { - Row => { - let new_data = self - .data - .clone() - .into_par_iter() - .take(n * self.col) - .collect::>(); - par_matrix(new_data, n, self.col, Row) - } - Col => { - let temp_data = (0..n) - .into_par_iter() - .flat_map(|i| self.row(i)) - .collect::>(); - par_matrix(temp_data, n, self.col, Row) - } - } - } - - fn par_take_col(&self, n: usize) -> Self { - if n >= self.col { - return self.clone(); - } - match self.shape { - Col => { - let new_data = self - .data - .clone() - .into_par_iter() - .take(n * self.row) - .collect::>(); - par_matrix(new_data, self.row, n, Col) - } - Row => { - let temp_data = (0..n) - .into_par_iter() - .flat_map(|i| self.col(i)) - .collect::>(); - par_matrix(temp_data, self.row, n, Col) - } - } - } - - fn par_skip_row(&self, n: usize) -> Self { - assert!(n < self.row, "Skip range is larger than row of matrix"); - - let temp_data = (n..self.row) - .into_par_iter() - .flat_map(|i| self.row(i)) - .collect::>(); - par_matrix(temp_data, self.row - n, self.col, Row) - } - - fn par_skip_col(&self, n: usize) -> Self { - assert!(n < self.col, "Skip range is larger than col of matrix"); - - let temp_data = (n..self.col) - .into_par_iter() - .flat_map(|i| self.col(i)) - .collect::>(); - par_matrix(temp_data, self.row, self.col - n, Col) - } - - fn par_fmap(&self, f: F) -> Matrix - where - F: Fn(f64) -> f64 + Send + Sync, - { - let result = self.data.par_iter().map(|x| f(*x)).collect::>(); - par_matrix(result, self.row, self.col, self.shape) - } - - fn par_reduce(&self, init: T, f: F) -> f64 - where - F: Fn(f64, f64) -> f64 + Send + Sync, - T: Into + Send + Sync + Copy + Clone, - { - self.data - .clone() - .into_par_iter() - .fold(|| init.into(), |acc, y| f(acc, y)) - .sum() - } - - fn par_zip_with(&self, f: F, other: &Matrix) -> Self - where - F: Fn(f64, f64) -> f64 + Send + Sync, - { - assert_eq!(self.data.len(), other.data.len()); - let mut a = other.clone(); - if self.shape != other.shape { - a = a.change_shape(); - } - - let result = self - .data - .par_iter() - .zip(a.data.par_iter()) - .map(|(x, y)| f(*x, *y)) - .collect::>(); - par_matrix(result, self.row, self.col, self.shape) - } - - fn par_col_reduce(&self, f: F) -> Vec - where - F: Fn(Vec) -> f64 + Send + Sync, - { - (0..self.col) - .into_par_iter() - .map(|i| f(self.col(i))) - .collect::>() - } - - fn par_row_reduce(&self, f: F) -> Vec - where - F: Fn(Vec) -> f64 + Send + Sync, - { - (0..self.row) - .into_par_iter() - .map(|i| f(self.row(i))) - .collect::>() - } -} - // ============================================================================= // Linear Algebra // ============================================================================= @@ -3570,20 +2925,6 @@ pub fn diag(n: usize) -> Matrix { matrix(v, n, n, Row) } -pub fn par_diag(n: usize) -> Matrix { - let v = (0..n * n) - .into_par_iter() - .map(|i| { - if i % (n + 1) == 0 { - 1_f64 // Set diagonal elements to 1 - } else { - 0_f64 // All other elements are 0 - } - }) - .collect::>(); - par_matrix(v, n, n, Row) -} - /// Data structure for Complete Pivoting LU decomposition /// /// # Usage From f6ce0d5a07f1deb94d14cc373ab4414ebbe1fdfa Mon Sep 17 00:00:00 2001 From: Soumya Date: Mon, 14 Oct 2024 16:43:38 +0200 Subject: [PATCH 14/17] Removed ParallelAlgorithm trait - Removed methods in ParallelAlgorithm trait since they don't seem to offer significant benefits over their serial counterparts in the current implementation. - Removed some ParallelFPVector trait methods for same reason as above --- src/structure/vector.rs | 230 ---------------------------------------- src/traits/fp.rs | 17 --- src/traits/general.rs | 13 --- 3 files changed, 260 deletions(-) diff --git a/src/structure/vector.rs b/src/structure/vector.rs index aaa939d5..438c0a1d 100644 --- a/src/structure/vector.rs +++ b/src/structure/vector.rs @@ -271,7 +271,6 @@ use crate::rayon::prelude::*; use crate::structure::matrix::{matrix, Matrix, Row}; use crate::traits::fp::ParallelFPVector; -use crate::traits::general::ParallelAlgorithm; use crate::traits::math::{ParallelInnerProduct, ParallelNormed, ParallelVectorProduct}; use crate::traits::mutable::ParallelMutFP; use crate::traits::{ @@ -506,65 +505,6 @@ impl ParallelFPVector for Vec { .filter(|x| f(*x)) .collect::>() } - - /// Take for `Vec` in Parallel - /// - /// # Examples - /// ``` - /// #[macro_use] - /// extern crate peroxide; - /// use peroxide::fuga::*; - /// use peroxide::traits::fp::ParallelFPVector; - /// - /// fn main() { - /// let a = c!(1,2,3,4,5); - /// let b = a.par_take(3); - /// assert_eq!(b, c!(1,2,3)); - /// } - /// ``` - fn par_take(&self, n: usize) -> Vec { - let mut v = vec![0_f64; n]; - v[..n] - .par_iter_mut() - .zip(self[..n].par_iter()) - .for_each(|(dest, src)| { - *dest = *src; - }); // parallel equivalent of copy_from_slice() - v - } - - /// Skip for `Vec` in Parallel - /// - /// # Examples - /// ``` - /// #[macro_use] - /// extern crate peroxide; - /// use peroxide::fuga::*; - /// use peroxide::traits::fp::ParallelFPVector; - /// - /// fn main() { - /// let a = c!(1,2,3,4,5); - /// let b = a.par_skip(3); - /// assert_eq!(b, c!(4,5)); - /// } - /// ``` - fn par_skip(&self, n: usize) -> Vec { - let l = self.len(); - let mut v = vec![0f64; l - n]; - v[..(l - n)] - .par_iter_mut() - .zip(self[n..l].par_iter()) - .for_each(|(dest, src)| *dest = *src); - v - } - - fn par_sum(&self) -> f64 { - self.par_iter().sum() - } - - fn par_prod(&self) -> f64 { - self.par_iter().product() - } } /// Explicit version of `map` @@ -836,176 +776,6 @@ impl Algorithm for Vec { } } -impl ParallelAlgorithm for Vec { - /// Assign rank in parallel - /// - /// # Examples - /// ``` - /// #[macro_use] - /// extern crate peroxide; - /// use peroxide::fuga::*; - /// use peroxide::traits::general::ParallelAlgorithm; - /// - /// fn main() { - /// let v = c!(7, 5, 9, 2, 8); - /// assert_eq!(v.par_rank(), vec![2,3,0,4,1]); - /// } - /// ``` - fn par_rank(&self) -> Vec { - let l = self.len(); - let idx = (1..(l + 1)).collect::>(); - - let mut vec_tup = self - .clone() - .into_par_iter() - .zip(idx.clone()) - .collect::>(); - vec_tup.sort_by(|x, y| x.0.partial_cmp(&y.0).unwrap().reverse()); - let indices = vec_tup - .into_par_iter() - .map(|(_, y)| y) - .collect::>(); - idx.into_par_iter() - .map(|x| { - indices - .clone() - .into_par_iter() - .position_any(|t| t == x) // Note: position() is deprecated by rayon - .unwrap() - }) - .collect::>() - } - - /// arg max in Parallel - /// - /// # Examples - /// ``` - /// #[macro_use] - /// extern crate peroxide; - /// use peroxide::fuga::*; - /// use peroxide::traits::general::ParallelAlgorithm; - /// - /// fn main() { - /// let v = c!(1,3,2,4,3,7); - /// assert_eq!(v.par_arg_max(),5); - /// - /// let v2 = c!(1,3,2,5,6,6); - /// assert_eq!(v2.par_arg_max(),4); - /// } - /// ``` - fn par_arg_max(&self) -> usize { - #[cfg(feature = "O3")] - { - let n_i32 = self.len() as i32; - let idx: usize; - unsafe { - idx = idamax(n_i32, self, 1) - 1; - } - idx - } - #[cfg(not(feature = "O3"))] - { - self.into_par_iter() - .enumerate() - .fold( - || (0usize, f64::MIN), - |acc, (ics, &val)| { - if acc.1 < val { - (ics, val) - } else { - acc - } - }, - ) - .reduce( - // Note: need reduce and can not simply do .max() because rayon::..max() requires Ord, which is stricter than PartialOrd - // Hence combine the results from the parallel fold - || (0usize, f64::MIN), // Identity element - |acc1, acc2| { - if acc1.1 < acc2.1 { - acc2 // Return the pair with the larger value - } else { - acc1 - } - }, - ) - .0 - } - } - - /// arg min in Parallel - /// - /// # Examples - /// ``` - /// #[macro_use] - /// extern crate peroxide; - /// use peroxide::fuga::*; - /// use peroxide::traits::general::ParallelAlgorithm; - /// - /// fn main() { - /// let v = c!(1,3,2,4,3,7); - /// assert_eq!(v.par_arg_min(),0); - /// - /// let v2 = c!(4,3,2,5,1,6); - /// assert_eq!(v2.par_arg_min(),4); - /// } - fn par_arg_min(&self) -> usize { - self.into_par_iter() - .enumerate() - .fold( - || (0usize, f64::MAX), - |acc, (ics, &val)| { - if acc.1 > val { - (ics, val) - } else { - acc - } - }, - ) - .reduce( - || (0usize, f64::MAX), - |acc1, acc2| { - if acc1.1 > acc2.1 { - acc2 // Return the pair with the smaller value - } else { - acc1 - } - }, - ) - .0 - } - - fn par_max(&self) -> f64 { - #[cfg(feature = "O3")] - { - let n_i32 = self.len() as i32; - let idx: usize; - unsafe { - idx = idamax(n_i32, self, 1) - 1; - } - self[idx] - } - #[cfg(not(feature = "O3"))] - { - self.into_par_iter() - .fold(|| f64::MIN, |acc, &val| if acc < val { val } else { acc }) - .reduce( - || f64::MIN, - |acc1, acc2| if acc1 < acc2 { acc2 } else { acc1 }, - ) - } - } - - fn par_min(&self) -> f64 { - self.into_par_iter() - .fold(|| f64::MAX, |acc, &val| if acc > val { val } else { acc }) - .reduce( - || f64::MAX, - |acc1, acc2| if acc1 > acc2 { acc2 } else { acc1 }, - ) - } -} - impl Vector for Vec { type Scalar = f64; diff --git a/src/traits/fp.rs b/src/traits/fp.rs index fbb66767..07782cb0 100644 --- a/src/traits/fp.rs +++ b/src/traits/fp.rs @@ -76,11 +76,6 @@ pub trait ParallelFPVector { fn par_filter(&self, f: F) -> Self where F: Fn(Self::Scalar) -> bool + Send + Sync; - - fn par_take(&self, n: usize) -> Self; - fn par_skip(&self, n: usize) -> Self; - fn par_sum(&self) -> Self::Scalar; - fn par_prod(&self) -> Self::Scalar; } /// Functional Programming for Matrix in Parallel (Uses Rayon crate) @@ -101,16 +96,4 @@ pub trait ParallelFPMatrix { fn par_row_reduce(&self, f: F) -> Vec where F: Fn(Vec) -> f64 + Send + Sync; - fn par_take_row(&self, n: usize) -> Matrix; - fn par_take_col(&self, n: usize) -> Matrix; - fn par_skip_row(&self, n: usize) -> Matrix; - fn par_skip_col(&self, n: usize) -> Matrix; - - // These are not in parallel: - // fn col_map(&self, f: F) -> Matrix - // where - // F: Fn(Vec) -> Vec; - // fn row_map(&self, f: F) -> Matrix - // where - // F: Fn(Vec) -> Vec; } diff --git a/src/traits/general.rs b/src/traits/general.rs index 7ec7fed1..2737f97b 100644 --- a/src/traits/general.rs +++ b/src/traits/general.rs @@ -8,16 +8,3 @@ pub trait Algorithm { fn min(&self) -> f64; fn swap_with_perm(&mut self, p: &Vec<(usize, usize)>); } - -/// Some algorithms for Vector in Parallel -pub trait ParallelAlgorithm { - fn par_rank(&self) -> Vec; - fn par_arg_max(&self) -> usize; - fn par_arg_min(&self) -> usize; - fn par_max(&self) -> f64; - fn par_min(&self) -> f64; - - // Not implemented in parallel: - // fn par_sign(&self) -> f64; - // fn par_swap_with_perm(&mut self, p: &Vec<(usize, usize)>); -} From a239f98ddf174124dade3a943881a614b6ad8f4a Mon Sep 17 00:00:00 2001 From: Soumya Date: Mon, 14 Oct 2024 16:57:15 +0200 Subject: [PATCH 15/17] Implemented par_reduce - Implemented par_reduce operation using reduce_with () from rayon crate --- src/structure/vector.rs | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/src/structure/vector.rs b/src/structure/vector.rs index 438c0a1d..8d584942 100644 --- a/src/structure/vector.rs +++ b/src/structure/vector.rs @@ -454,21 +454,15 @@ impl ParallelFPVector for Vec { /// assert_eq!(a.par_reduce(0, |x,y| x + y), 5050f64); /// } /// ``` - fn par_reduce(&self, init: T, f: F) -> f64 + fn par_reduce(&self, _init: T, f: F) -> f64 where F: Fn(f64, f64) -> f64 + Send + Sync, T: Into + Send + Sync + Copy, { - self.iter().fold(init.into(), |x, &y| f(x, y)) - - // Parallel version unimplemented - - // self.par_iter() - // .cloned() - // .fold(|| init.into(), |x, y| f(x, y)) - // .sum::() // Combining fold and reduce to produce a single value - // // .reduce(|| init.into(), |x, y| f(x, y)) // can not use reduce instead of .sum(), since the fn f used might not be associative (e.g. check test_max_pool_1d) - // // https://docs.rs/rayon/latest/rayon/iter/trait.ParallelIterator.html#method.reduce + self.par_iter() + .cloned() + .reduce_with(|x, y| f(x, y)) + .expect("Unable to perform parallel reduce operation") } fn par_zip_with(&self, f: F, other: &Vec) -> Vec From 3737099dda63bb97014816003984bd868ab3e00d Mon Sep 17 00:00:00 2001 From: Soumya Date: Mon, 14 Oct 2024 17:11:21 +0200 Subject: [PATCH 16/17] Removed formatting from Cargo.toml file - Added rayon and criterion as dependencies --- Cargo.toml | 26 +++++--------------------- 1 file changed, 5 insertions(+), 21 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 39c51a7a..0c8f0cc7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,14 +10,7 @@ categories = ["science"] readme = "README.md" documentation = "https://axect.github.io/Peroxide_Doc" keywords = ["Numeric", "Science", "Dataframe", "Plot", "LinearAlgebra"] -exclude = [ - "example_data/", - "src/bin/", - # "benches/", - "example/", - "test_data/", - "peroxide-ad2", -] +exclude = ["example_data/", "src/bin/", "benches/", "example/", "test_data/", "peroxide-ad2"] [badges] travis-ci = { repository = "axect/peroxide" } @@ -40,27 +33,18 @@ anyhow = "1.0" paste = "1.0" #num-complex = "0.3" netcdf = { version = "0.7", optional = true, default-features = false } -pyo3 = { version = "0.22", optional = true, features = [ - "auto-initialize", - "gil-refs", -] } +pyo3 = { version = "0.22", optional = true, features = ["auto-initialize", "gil-refs"] } blas = { version = "0.22", optional = true } lapack = { version = "0.19", optional = true } serde = { version = "1.0", features = ["derive"], optional = true } json = { version = "0.12", optional = true } -arrow2 = { version = "0.18", features = [ - "io_parquet", - "io_parquet_compression", -], optional = true } +arrow2 = { version = "0.18", features = ["io_parquet", "io_parquet_compression"], optional = true } num-complex = { version = "0.4", optional = true } -lambert_w = { version = "0.3.0", default-features = false, features = [ - "24bits", - "50bits", -] } +lambert_w = { version = "0.3.0", default-features = false, features = ["24bits", "50bits"] } rayon = "1.10" [package.metadata.docs.rs] -rustdoc-args = ["--html-in-header", "katex-header.html", "--cfg", "docsrs"] +rustdoc-args = [ "--html-in-header", "katex-header.html", "--cfg", "docsrs"] [features] default = [] From 863ad6a5b34b247f100797fbd4ef9c99ef61c396 Mon Sep 17 00:00:00 2001 From: Soumya Date: Mon, 14 Oct 2024 17:21:19 +0200 Subject: [PATCH 17/17] Removed par_matrix implementation - par_matrix removed from matrix.rs and benchmark --- benches/parallel_rayon/matrix_benchmark.rs | 18 --------------- src/structure/matrix.rs | 27 +--------------------- 2 files changed, 1 insertion(+), 44 deletions(-) diff --git a/benches/parallel_rayon/matrix_benchmark.rs b/benches/parallel_rayon/matrix_benchmark.rs index eaaa1e13..86b39bc9 100644 --- a/benches/parallel_rayon/matrix_benchmark.rs +++ b/benches/parallel_rayon/matrix_benchmark.rs @@ -4,23 +4,6 @@ use peroxide::{ traits::math::{ParallelInnerProduct, ParallelNormed}, }; -pub fn par_matrix_benchmark(cr: &mut Criterion) { - let v: Vec = (0..1000000) - .into_iter() - .map(|i: i32| 2.0 * (i as f64)) - .collect::>(); - - // Result: 1000x1000 matrix: 630.92 µs - cr.bench_function("ser_matrix_bench", |b| { - b.iter(|| black_box(matrix(v.clone(), 1000, 1000, Shape::Row))) - }); - - // Result: 1000x1000 matrix: 9.6995 ms - cr.bench_function("par_matrix_bench", |b| { - b.iter(|| black_box(par_matrix(v.clone(), 1000, 1000, Shape::Row))) - }); -} - pub fn par_matrix_from_index_benchmark(cr: &mut Criterion) { let f = |x: usize, y: usize| 2.0 * (x as f64) * (y as f64); let size: (usize, usize) = (1000, 1000); @@ -112,7 +95,6 @@ pub fn par_matrix_inner_prod_benchmark(cr: &mut Criterion) { criterion_group!( benches, - par_matrix_benchmark, par_matrix_from_index_benchmark, par_matrix_norm_lpq_benchmark, par_matrix_norm_l1_benchmark, diff --git a/src/structure/matrix.rs b/src/structure/matrix.rs index b66267e6..226e7958 100644 --- a/src/structure/matrix.rs +++ b/src/structure/matrix.rs @@ -723,31 +723,6 @@ where } } -/// R-like matrix constructor in parallel -/// -/// # Examples -/// ``` -/// #[macro_use] -/// extern crate peroxide; -/// use peroxide::fuga::*; -/// -/// fn main() { -/// let a = par_matrix(c!(1,2,3,4), 2, 2, Row); -/// a.print(); // Print matrix -/// } -/// ``` -pub fn par_matrix(v: Vec, r: usize, c: usize, shape: Shape) -> Matrix -where - T: Into + Send + Sync, -{ - Matrix { - data: v.into_par_iter().map(|t| t.into()).collect::>(), - row: r, - col: c, - shape, - } -} - /// R-like matrix constructor (Explicit ver.) pub fn r_matrix(v: Vec, r: usize, c: usize, shape: Shape) -> Matrix where @@ -1385,7 +1360,7 @@ impl Matrix { .collect::>() }) .collect::>(); - par_matrix(data, row, col, Row) + matrix(data, row, col, Row) } /// Matrix to `Vec>`