diff --git a/Cargo.toml b/Cargo.toml index 89ef53a9..0c8f0cc7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,6 +18,7 @@ maintenance = { status = "actively-developed" } [dev-dependencies] float-cmp = "0.9" +criterion = { version = "0.5.1", features = ["html_reports"] } [dependencies] csv = { version = "1.3", optional = true, default-features = false } @@ -40,6 +41,7 @@ json = { version = "0.12", optional = true } arrow2 = { version = "0.18", features = ["io_parquet", "io_parquet_compression"], optional = true } num-complex = { version = "0.4", optional = true } lambert_w = { version = "0.3.0", default-features = false, features = ["24bits", "50bits"] } +rayon = "1.10" [package.metadata.docs.rs] rustdoc-args = [ "--html-in-header", "katex-header.html", "--cfg", "docsrs"] @@ -51,3 +53,8 @@ plot = ["pyo3"] nc = ["netcdf"] parquet = ["arrow2"] complex = ["num-complex", "matrixmultiply/cgemm"] + +[[bench]] +path = "benches/parallel_rayon/matrix_benchmark.rs" +name = "matrix_benchmark" +harness = false diff --git a/benches/data/rayon_matrix_benchmark_results.md b/benches/data/rayon_matrix_benchmark_results.md new file mode 100644 index 00000000..f85feae8 --- /dev/null +++ b/benches/data/rayon_matrix_benchmark_results.md @@ -0,0 +1,194 @@ +Lib used for benchmarking: Criterion +Matrix size: 1000x1000 + +Running benches/parallel_rayon/matrix_benchmark.rs + +ser_matrix_bench time: [535.12 µs 544.51 µs 556.68 µs] +Found 11 outliers among 100 measurements (11.00%) + 4 (4.00%) high mild + 7 (7.00%) high severe + +par_matrix_bench time: [5.0912 ms 5.1431 ms 5.1995 ms] +Found 7 outliers among 100 measurements (7.00%) + 1 (1.00%) low mild + 5 (5.00%) high mild + 1 (1.00%) high severe + +ser_py_matrix_bench time: [4.3100 ms 4.3309 ms 4.3544 ms] +Found 7 outliers among 100 measurements (7.00%) + 2 (2.00%) high mild + 5 (5.00%) high severe + +par_py_matrix_bench time: [11.667 ms 11.789 ms 11.920 ms] +Found 10 outliers among 100 measurements (10.00%) + 6 (6.00%) high mild + 4 (4.00%) high severe + +ser_matrix_change_shape_bench + time: [7.3630 ms 7.4075 ms 7.4608 ms] +Found 5 outliers among 100 measurements (5.00%) + 1 (1.00%) high mild + 4 (4.00%) high severe + +par_matrix_change_shape_bench + time: [10.276 ms 10.385 ms 10.499 ms] +Found 3 outliers among 100 measurements (3.00%) + 2 (2.00%) high mild + 1 (1.00%) high severe + +ser_matrix_extract_row_bench + time: [613.39 µs 622.44 µs 633.72 µs] +Found 7 outliers among 100 measurements (7.00%) + 7 (7.00%) high severe + +par_matrix_extract_row_bench + time: [5.4321 ms 5.4851 ms 5.5399 ms] +Found 4 outliers among 100 measurements (4.00%) + 4 (4.00%) high mild + +ser_matrix_from_index_bench + time: [2.4174 ms 2.4490 ms 2.4851 ms] +Found 14 outliers among 100 measurements (14.00%) + 1 (1.00%) high mild + 13 (13.00%) high severe + +par_matrix_from_index_bench + time: [2.3912 ms 2.4090 ms 2.4304 ms] +Found 9 outliers among 100 measurements (9.00%) + 2 (2.00%) high mild + 7 (7.00%) high severe + +ser_matrix_to_vec_bench time: [2.4800 ms 2.5082 ms 2.5423 ms] +Found 10 outliers among 100 measurements (10.00%) + 4 (4.00%) high mild + 6 (6.00%) high severe + +par_matrix_to_vec_bench time: [6.4041 ms 6.4618 ms 6.5250 ms] +Found 6 outliers among 100 measurements (6.00%) + 5 (5.00%) high mild + 1 (1.00%) high severe + +ser_matrix_to_diag_bench + time: [2.4335 ms 2.4526 ms 2.4750 ms] +Found 14 outliers among 100 measurements (14.00%) + 6 (6.00%) high mild + 8 (8.00%) high severe + +par_matrix_to_diag_bench + time: [13.514 ms 13.684 ms 13.868 ms] +Found 10 outliers among 100 measurements (10.00%) + 7 (7.00%) high mild + 3 (3.00%) high severe + +Benchmarking ser_matrix_submat_bench: Warming up for 3.0000 s +Warning: Unable to complete 100 samples in 5.0s. You may wish to increase target time to 8.3s, enable flat sampling, or reduce sample count to 50. +ser_matrix_submat_bench time: [1.6077 ms 1.6243 ms 1.6451 ms] +Found 16 outliers among 100 measurements (16.00%) + 3 (3.00%) high mild + 13 (13.00%) high severe + +par_matrix_submat_bench time: [10.611 ms 10.761 ms 10.942 ms] +Found 5 outliers among 100 measurements (5.00%) + 3 (3.00%) high mild + 2 (2.00%) high severe + +ser_matrix_add_vec_bench + time: [7.3077 ms 7.3485 ms 7.3946 ms] +Found 12 outliers among 100 measurements (12.00%) + 2 (2.00%) high mild + 10 (10.00%) high severe + +par_matrix_add_vec_bench + time: [11.331 ms 11.480 ms 11.636 ms] +Found 2 outliers among 100 measurements (2.00%) + 2 (2.00%) high mild + +ser_matrix_norm_bench time: [5.1600 ms 5.1864 ms 5.2165 ms] +Found 7 outliers among 100 measurements (7.00%) + 1 (1.00%) high mild + 6 (6.00%) high severe + +par_matrix_norm_bench time: [2.6565 ms 2.6810 ms 2.7091 ms] +Found 5 outliers among 100 measurements (5.00%) + 2 (2.00%) high mild + 3 (3.00%) high severe + +Benchmarking ser_matrix_norm_bench #2: Warming up for 3.0000 s +Warning: Unable to complete 100 samples in 5.0s. You may wish to increase target time to 8.9s, enable flat sampling, or reduce sample count to 50. +ser_matrix_norm_bench #2 + time: [1.7262 ms 1.7391 ms 1.7541 ms] +Found 15 outliers among 100 measurements (15.00%) + 10 (10.00%) high mild + 5 (5.00%) high severe + +par_matrix_norm_bench #2 + time: [6.7071 ms 6.7883 ms 6.8703 ms] +Found 1 outliers among 100 measurements (1.00%) + 1 (1.00%) high mild + +ser_matrix_norm_bench #3 + time: [9.7582 ms 9.9006 ms 10.057 ms] +Found 12 outliers among 100 measurements (12.00%) + 5 (5.00%) high mild + 7 (7.00%) high severe + +par_matrix_norm_bench #3 + time: [9.3004 ms 9.4088 ms 9.5239 ms] +Found 1 outliers among 100 measurements (1.00%) + 1 (1.00%) high mild + +ser_matrix_inner_prod_bench + time: [5.2730 ms 5.3590 ms 5.4583 ms] +Found 14 outliers among 100 measurements (14.00%) + 3 (3.00%) high mild + 11 (11.00%) high severe + +par_matrix_inner_prod_bench + time: [5.0987 ms 5.1644 ms 5.2402 ms] +Found 7 outliers among 100 measurements (7.00%) + 3 (3.00%) high mild + 4 (4.00%) high severe + +ser_matrix_hadamard_bench + time: [5.6521 ms 5.6870 ms 5.7262 ms] +Found 12 outliers among 100 measurements (12.00%) + 3 (3.00%) high mild + 9 (9.00%) high severe + +par_matrix_hadamard_bench + time: [14.155 ms 14.335 ms 14.527 ms] +Found 4 outliers among 100 measurements (4.00%) + 3 (3.00%) high mild + 1 (1.00%) high severe + +ser_matrix_take_row_bench + time: [3.7894 ms 3.8234 ms 3.8613 ms] +Found 15 outliers among 100 measurements (15.00%) + 7 (7.00%) high mild + 8 (8.00%) high severe + +par_matrix_take_row_bench + time: [8.4008 ms 8.5171 ms 8.6523 ms] +Found 9 outliers among 100 measurements (9.00%) + 6 (6.00%) high mild + 3 (3.00%) high severe + +ser_matrix_fpmap_bench time: [3.2526 ms 3.2739 ms 3.2977 ms] +Found 12 outliers among 100 measurements (12.00%) + 2 (2.00%) high mild + 10 (10.00%) high severe + +par_matrix_fpmap_bench time: [10.604 ms 10.765 ms 10.937 ms] +Found 11 outliers among 100 measurements (11.00%) + 8 (8.00%) high mild + 3 (3.00%) high severe + +ser_matrix_reduce_bench time: [2.6748 ms 2.6964 ms 2.7201 ms] +Found 9 outliers among 100 measurements (9.00%) + 6 (6.00%) high mild + 3 (3.00%) high severe + +par_matrix_reduce_bench time: [6.2453 ms 6.3198 ms 6.4034 ms] +Found 6 outliers among 100 measurements (6.00%) + 4 (4.00%) high mild + 2 (2.00%) high severe diff --git a/benches/parallel_rayon/matrix_benchmark.rs b/benches/parallel_rayon/matrix_benchmark.rs new file mode 100644 index 00000000..86b39bc9 --- /dev/null +++ b/benches/parallel_rayon/matrix_benchmark.rs @@ -0,0 +1,103 @@ +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use peroxide::{ + fuga::*, + traits::math::{ParallelInnerProduct, ParallelNormed}, +}; + +pub fn par_matrix_from_index_benchmark(cr: &mut Criterion) { + let f = |x: usize, y: usize| 2.0 * (x as f64) * (y as f64); + let size: (usize, usize) = (1000, 1000); + + // Result: 1000x1000 matrix: 2.3662 ms + cr.bench_function("ser_matrix_from_index_bench", |b| { + b.iter(|| black_box(Matrix::from_index(f, size))) + }); + + // Result: 1000x1000 matrix: 2.3355 ms + cr.bench_function("par_matrix_from_index_bench", |b| { + b.iter(|| black_box(Matrix::from_index(f, size))) + }); +} + +// Check: better parallel results (ran test 6 times) +pub fn par_matrix_norm_lpq_benchmark(cr: &mut Criterion) { + let v: Vec = (0..1000000) + .into_iter() + .map(|i: i32| 2.0 * (i as f64)) + .collect::>(); + + // Result: 1000x1000 matrix: [5.5969 ms 5.7555 ms 5.9515 ms 6.0843 ms 6.3072 ms 6.5636 ms] + cr.bench_function("ser_matrix_norm_bench", |b| { + b.iter(|| black_box(matrix(v.clone(), 1000, 1000, Shape::Row).norm(Norm::Lpq(4.0, 2.0)))) + }); + + // Result: 1000x1000 matrix: [3.1796 ms 3.2714 ms 3.3714 ms 3.6123 ms 3.7398 ms 3.8761 ms] + cr.bench_function("par_matrix_norm_bench", |b| { + b.iter(|| { + black_box(matrix(v.clone(), 1000, 1000, Shape::Row).par_norm(Norm::Lpq(4.0, 2.0))) + }) + }); +} + +pub fn par_matrix_norm_l1_benchmark(cr: &mut Criterion) { + let v: Vec = (0..1000000) + .into_iter() + .map(|i: i32| 2.0 * (i as f64)) + .collect::>(); + + // Result: 1000x1000 matrix: 9.0287 ms + cr.bench_function("ser_matrix_norm_bench", |b| { + b.iter(|| black_box(matrix(v.clone(), 1000, 1000, Shape::Row).norm(Norm::L1))) + }); + + // Result: 1000x1000 matrix: 10.393 ms + cr.bench_function("par_matrix_norm_bench", |b| { + b.iter(|| black_box(matrix(v.clone(), 1000, 1000, Shape::Row).par_norm(Norm::L1))) + }); +} + +// Check: better parallel results (ran test 6 times) +pub fn par_matrix_inner_prod_benchmark(cr: &mut Criterion) { + let v: Vec = (0..1000000) + .into_iter() + .map(|i: i32| 2.0 * (i as f64)) + .collect::>(); + + let w: Vec = (0..1000000) + .into_iter() + .map(|i: i32| 3.0 * (i as f64)) + .collect::>(); + + // Result: 1000x1000 matrix: [5.1075 ms 5.1505 ms 5.2013 ms 5.7617 ms 6.0196 ms 6.3009 ms] + cr.bench_function("ser_matrix_inner_prod_bench", |b| { + b.iter(|| { + black_box(matrix(v.clone(), 1000, 1000, Shape::Row).dot(&matrix( + w.clone(), + 1000, + 1000, + Shape::Row, + ))) + }) + }); + + // Result: 1000x1000 matrix: [4.9931 ms 5.0244 ms 5.0642 ms 5.0322 ms 5.0819 ms 5.1404 ms] + cr.bench_function("par_matrix_inner_prod_bench", |b| { + b.iter(|| { + black_box(matrix(v.clone(), 1000, 1000, Shape::Row).par_dot(&matrix( + w.clone(), + 1000, + 1000, + Shape::Row, + ))) + }) + }); +} + +criterion_group!( + benches, + par_matrix_from_index_benchmark, + par_matrix_norm_lpq_benchmark, + par_matrix_norm_l1_benchmark, + par_matrix_inner_prod_benchmark, +); +criterion_main!(benches); diff --git a/src/lib.rs b/src/lib.rs index e1929555..916878b5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -208,3 +208,5 @@ pub mod util; #[cfg(feature = "complex")] pub mod complex; + +extern crate rayon; diff --git a/src/numerical/integral.rs b/src/numerical/integral.rs index ac2165dd..d7650f3c 100644 --- a/src/numerical/integral.rs +++ b/src/numerical/integral.rs @@ -166,7 +166,7 @@ where match method { Integral::GaussLegendre(n) => gauss_legendre_quadrature(f, n, (a, b)), Integral::NewtonCotes(n) => newton_cotes_quadrature(f, n, (a, b)), - method => gauss_kronrod_quadrature(f, (a,b), method), + method => gauss_kronrod_quadrature(f, (a, b), method), } } @@ -217,9 +217,9 @@ where #[allow(non_snake_case)] pub fn gauss_kronrod_quadrature(f: F, (a, b): (T, S), method: Integral) -> f64 where - F: Fn(f64) -> f64 + Copy, - T: Into, - S: Into, + F: Fn(f64) -> f64 + Copy, + T: Into, + S: Into, { let (g, k) = method.get_gauss_kronrod_order(); let tol = method.get_tol(); @@ -234,13 +234,9 @@ where let G = gauss_legendre_quadrature(f, g as usize, (a, b)); let K = kronrod_quadrature(f, k as usize, (a, b)); let c = (a + b) / 2f64; - let tol_curr = if method.is_relative() { - tol * G - } else { - tol - }; + let tol_curr = if method.is_relative() { tol * G } else { tol }; if (G - K).abs() < tol_curr || a == b || max_iter == 0 { - if ! G.is_finite() { + if !G.is_finite() { return G; } I += G; @@ -255,11 +251,11 @@ where I } -pub fn kronrod_quadrature(f: F, n: usize, (a, b): (f64, f64)) -> f64 +pub fn kronrod_quadrature(f: F, n: usize, (a, b): (f64, f64)) -> f64 where F: Fn(f64) -> f64, { - (b - a) / 2f64 * unit_kronrod_quadrature(|x| f(x * (b-a) / 2f64 + (a + b) / 2f64), n) + (b - a) / 2f64 * unit_kronrod_quadrature(|x| f(x * (b - a) / 2f64 + (a + b) / 2f64), n) } // ============================================================================= @@ -382,9 +378,9 @@ fn unit_kronrod_quadrature(f: F, n: usize) -> f64 where F: Fn(f64) -> f64, { - let (a,x) = kronrod_table(n); + let (a, x) = kronrod_table(n); let mut s = 0f64; - for i in 0 .. a.len() { + for i in 0..a.len() { s += a[i] * f(x[i]); } s @@ -414,28 +410,28 @@ fn kronrod_table(n: usize) -> (Vec, Vec) { match n % 2 { 0 => { - for i in 0 .. ref_node.len() { + for i in 0..ref_node.len() { result_node[i] = ref_node[i]; result_weight[i] = ref_weight[i]; } - for i in ref_node.len() .. n { - result_node[i] = -ref_node[n-i-1]; - result_weight[i] = ref_weight[n-i-1]; + for i in ref_node.len()..n { + result_node[i] = -ref_node[n - i - 1]; + result_weight[i] = ref_weight[n - i - 1]; } } 1 => { - for i in 0 .. ref_node.len() { + for i in 0..ref_node.len() { result_node[i] = ref_node[i]; result_weight[i] = ref_weight[i]; } - for i in ref_node.len() .. n { - result_node[i] = -ref_node[n-i]; - result_weight[i] = ref_weight[n-i]; + for i in ref_node.len()..n { + result_node[i] = -ref_node[n - i]; + result_weight[i] = ref_weight[n - i]; } } - _ => unreachable!() + _ => unreachable!(), } (result_weight, result_node) } @@ -922,7 +918,7 @@ const LEGENDRE_WEIGHT_25: [f64; 13] = [ 0.054904695975835192, 0.040939156701306313, 0.026354986615032137, - 0.011393798501026288, + 0.011393798501026288, ]; const LEGENDRE_WEIGHT_26: [f64; 13] = [ 0.118321415279262277, diff --git a/src/prelude/simpler.rs b/src/prelude/simpler.rs index 819bc38f..c2c0c7d7 100644 --- a/src/prelude/simpler.rs +++ b/src/prelude/simpler.rs @@ -1,5 +1,3 @@ -#[cfg(feature="parquet")] -use std::error::Error; use crate::numerical::{ eigen, eigen::{Eigen, EigenMethod::Jacobi}, @@ -8,13 +6,15 @@ use crate::numerical::{ spline, spline::{CubicHermiteSpline, SlopeMethod::Quadratic}, }; +#[cfg(feature = "parquet")] +use crate::structure::dataframe::{DataFrame, WithParquet}; use crate::structure::matrix::{self, Matrix}; use crate::structure::polynomial; use crate::traits::math::{Norm, Normed}; -#[cfg(feature="parquet")] -use crate::structure::dataframe::{DataFrame, WithParquet}; -#[cfg(feature="parquet")] +#[cfg(feature = "parquet")] use arrow2::io::parquet::write::CompressionOptions; +#[cfg(feature = "parquet")] +use std::error::Error; /// Simple Norm pub trait SimpleNorm: Normed { @@ -35,7 +35,7 @@ pub trait SimplerLinearAlgebra { fn waz_diag(&self) -> Option; fn waz(&self) -> Option; fn qr(&self) -> matrix::QR; - #[cfg(feature="O3")] + #[cfg(feature = "O3")] fn cholesky(&self) -> Matrix; fn rref(&self) -> Matrix; fn det(&self) -> f64; @@ -99,7 +99,7 @@ impl SimplerLinearAlgebra for Matrix { matrix::LinearAlgebra::qr(self) } - #[cfg(feature="O3")] + #[cfg(feature = "O3")] fn cholesky(&self) -> Matrix { matrix::LinearAlgebra::cholesky(self, matrix::UPLO::Lower) } @@ -161,7 +161,7 @@ use crate::special::function::{ /// Returns [`NAN`](f64::NAN) if the given input is smaller than -1/e (≈ -0.36787944117144233). /// /// Accurate to 50 bits. -/// +/// /// Wrapper of `lambert_w_0` function of `lambert_w` crate. /// /// # Reference @@ -176,7 +176,7 @@ pub fn lambert_w0(z: f64) -> f64 { /// Returns [`NAN`](f64::NAN) if the given input is positive or smaller than -1/e (≈ -0.36787944117144233). /// /// Accurate to 50 bits. -/// +/// /// Wrapper of `lambert_w_m1` function of `lambert_w` crate. /// /// # Reference @@ -187,13 +187,13 @@ pub fn lambert_wm1(z: f64) -> f64 { } /// Simple handle parquet -#[cfg(feature="parquet")] +#[cfg(feature = "parquet")] pub trait SimpleParquet: Sized { fn write_parquet(&self, path: &str) -> Result<(), Box>; fn read_parquet(path: &str) -> Result>; } -#[cfg(feature="parquet")] +#[cfg(feature = "parquet")] impl SimpleParquet for DataFrame { fn write_parquet(&self, path: &str) -> Result<(), Box> { WithParquet::write_parquet(self, path, CompressionOptions::Uncompressed) diff --git a/src/statistics/dist.rs b/src/statistics/dist.rs index b409c197..ee150aca 100644 --- a/src/statistics/dist.rs +++ b/src/statistics/dist.rs @@ -234,15 +234,15 @@ use self::rand::distributions::uniform::SampleUniform; use self::rand::prelude::*; pub use self::OPDist::*; pub use self::TPDist::*; -use crate::traits::fp::FPVector; use crate::special::function::*; +use crate::traits::fp::FPVector; //use statistics::rand::ziggurat; +use self::WeightedUniformError::*; use crate::statistics::{ops::C, stat::Statistics}; use crate::util::non_macro::{linspace, seq}; use crate::util::useful::{auto_zip, find_interval}; +use anyhow::{bail, Result}; use std::f64::consts::E; -use self::WeightedUniformError::*; -use anyhow::{Result, bail}; /// One parameter distribution /// @@ -271,7 +271,7 @@ pub enum TPDist> { pub struct WeightedUniform> { weights: Vec, sum: T, - intervals: Vec<(T, T)> + intervals: Vec<(T, T)>, } #[derive(Debug, Clone, Copy)] @@ -286,7 +286,9 @@ impl std::fmt::Display for WeightedUniformError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { WeightedUniformError::AllZeroWeightError => write!(f, "all weights are zero"), - WeightedUniformError::LengthMismatchError => write!(f, "weights and intervals have different length"), + WeightedUniformError::LengthMismatchError => { + write!(f, "weights and intervals have different length") + } WeightedUniformError::NoNonZeroIntervalError => write!(f, "no non-zero interval found"), WeightedUniformError::EmptyWeightError => write!(f, "weights are empty"), } @@ -295,11 +297,11 @@ impl std::fmt::Display for WeightedUniformError { impl WeightedUniform { /// Create a new weighted uniform distribution - /// + /// /// # Examples /// ``` /// use peroxide::fuga::*; - /// + /// /// fn main() -> Result<(), Box> { /// let weights = vec![1f64, 3f64, 0f64, 2f64]; /// let intervals = vec![0f64, 1f64, 2f64, 4f64, 5f64]; @@ -336,32 +338,31 @@ impl WeightedUniform { } } - let sum = weights.iter() + let sum = weights + .iter() .zip(intervals.iter()) .fold(0f64, |acc, (w, (a, b))| acc + w * (b - a)); - Ok( - WeightedUniform { - weights, - sum, - intervals - } - ) + Ok(WeightedUniform { + weights, + sum, + intervals, + }) } /// Create WeightedUniform from max pooling - /// + /// /// # Examples /// ``` /// use peroxide::fuga::*; - /// + /// /// fn main() -> Result<(), Box> { /// let w = WeightedUniform::from_max_pool_1d(f, (-2f64, 3f64), 10, 1e-3)?; /// w.weights().print(); /// /// Ok(()) /// } - /// + /// /// fn f(x: f64) -> f64 { /// if x.abs() < 1f64 { /// 1f64 - x.abs() @@ -371,40 +372,39 @@ impl WeightedUniform { /// } /// ``` pub fn from_max_pool_1d(f: F, (a, b): (f64, f64), n: usize, eps: f64) -> Result - where F: Fn(f64) -> f64 + Copy { + where + F: Fn(f64) -> f64 + Copy, + { // Find non-zero intervals let mut a = a; let mut b = b; let trial = seq(a, b, eps); - for i in 0 .. trial.len() { + for i in 0..trial.len() { let x = trial[i]; if f(x) > 0f64 { - a = if i > 0 { trial[i-1] } else { x }; + a = if i > 0 { trial[i - 1] } else { x }; break; } } - for i in (0 .. trial.len()).rev() { + for i in (0..trial.len()).rev() { let x = trial[i]; if f(x) > 0f64 { - b = if i < trial.len() - 1 { trial[i+1] } else { x }; + b = if i < trial.len() - 1 { trial[i + 1] } else { x }; break; } } if a >= b { bail!(NoNonZeroIntervalError); } - let domain = linspace(a, b, n+1); + let domain = linspace(a, b, n + 1); // Find intervals let intervals = auto_zip(&domain); // Find weights - let weights: Vec = intervals.iter() - .map( - |(a, b)| { - seq(*a, *b+eps, eps).reduce(0f64, |acc, x| acc.max(f(x))) - } - ) + let weights: Vec = intervals + .iter() + .map(|(a, b)| seq(*a, *b + eps, eps).reduce(0f64, |acc, x| acc.max(f(x)))) .collect(); Self::new(weights, domain) @@ -419,11 +419,19 @@ impl WeightedUniform { } pub fn domain_linspace(&self, n: usize) -> Vec { - linspace(self.intervals[0].0, self.intervals[self.intervals.len()-1].1, n) + linspace( + self.intervals[0].0, + self.intervals[self.intervals.len() - 1].1, + n, + ) } pub fn domain_seq(&self, step: f64) -> Vec { - seq(self.intervals[0].0, self.intervals[self.intervals.len()-1].1, step) + seq( + self.intervals[0].0, + self.intervals[self.intervals.len() - 1].1, + step, + ) } pub fn sum(&self) -> f64 { @@ -433,15 +441,19 @@ impl WeightedUniform { pub fn update_weights(&mut self, weights: Vec) { assert_eq!(self.intervals.len(), weights.len()); self.weights = weights; - self.sum = self.weights.iter() + self.sum = self + .weights + .iter() .zip(self.intervals.iter()) .fold(0f64, |acc, (w, (a, b))| acc + w * (b - a)); } pub fn update_intervals(&mut self, intervals: Vec) { - assert_eq!(self.weights.len()+1, intervals.len()); + assert_eq!(self.weights.len() + 1, intervals.len()); self.intervals = auto_zip(&intervals); - self.sum = self.weights.iter() + self.sum = self + .weights + .iter() .zip(self.intervals.iter()) .fold(0f64, |acc, (w, (a, b))| acc + w * (b - a)); } @@ -493,7 +505,11 @@ impl> ParametricDist for Weight fn params(&self) -> Self::Parameter { let weights = self.weights.iter().map(|x| (*x).into()).collect(); - let intervals = self.intervals.iter().map(|(x, y)| ((*x).into(), (*y).into())).collect(); + let intervals = self + .intervals + .iter() + .map(|(x, y)| ((*x).into(), (*y).into())) + .collect(); (weights, intervals) } } @@ -619,11 +635,7 @@ impl> RNG for TPDist { Binomial(num, mu) => { let binom = rand_distr::Binomial::new(*num as u64, (*mu).into()).unwrap(); - binom - .sample_iter(rng) - .take(n) - .map(|t| t as f64) - .collect() + binom.sample_iter(rng).take(n).map(|t| t as f64).collect() } Normal(m, s) => { @@ -789,10 +801,12 @@ impl RNG for WeightedUniform { let ics: Vec = w.sample_iter(&mut rng_clip).take(n).collect(); *rng = rng_clip; - ics.into_iter().map(|idx| { - let (l, r) = self.intervals[idx]; - rng.gen_range(l ..=r) - }).collect::>() + ics.into_iter() + .map(|idx| { + let (l, r) = self.intervals[idx]; + rng.gen_range(l..=r) + }) + .collect::>() } fn pdf>(&self, x: S) -> f64 { @@ -812,11 +826,11 @@ impl RNG for WeightedUniform { return 1f64; } let idx = find_interval(self.intervals(), x); - self.weights[0 ..=idx].iter() - .zip(self.intervals[0 ..=idx].iter()) - .fold(0f64, |acc, (w, (a, b))| { - acc + w * (b - a) - }) / self.sum + self.weights[0..=idx] + .iter() + .zip(self.intervals[0..=idx].iter()) + .fold(0f64, |acc, (w, (a, b))| acc + w * (b - a)) + / self.sum } } @@ -912,16 +926,23 @@ impl Statistics for WeightedUniform { type Value = f64; fn mean(&self) -> Self::Value { - self.intervals().iter().zip(self.weights().iter()) - .map(|((l, r), w)| (r.powi(2)-l.powi(2))/2f64 * w) - .sum::() / self.sum + self.intervals() + .iter() + .zip(self.weights().iter()) + .map(|((l, r), w)| (r.powi(2) - l.powi(2)) / 2f64 * w) + .sum::() + / self.sum } fn var(&self) -> Self::Value { let mean = self.mean(); - self.intervals().iter().zip(self.weights().iter()) + self.intervals() + .iter() + .zip(self.weights().iter()) .map(|((l, r), w)| w * (r.powi(3) - l.powi(3)) / 3f64) - .sum::() / self.sum - mean * mean + .sum::() + / self.sum + - mean * mean } fn sd(&self) -> Self::Value { diff --git a/src/statistics/rand.rs b/src/statistics/rand.rs index 36590085..1e577fbe 100644 --- a/src/statistics/rand.rs +++ b/src/statistics/rand.rs @@ -24,9 +24,9 @@ extern crate rand; use self::rand::distributions::uniform::SampleUniform; use self::rand::prelude::*; +use crate::statistics::dist::{WeightedUniform, RNG}; #[allow(unused_imports)] use crate::structure::matrix::*; -use crate::statistics::dist::{RNG, WeightedUniform}; /// Small random number generator from seed /// @@ -466,7 +466,9 @@ pub fn ziggurat(rng: &mut ThreadRng, sigma: f64) -> f64 { /// Ok(()) /// } pub fn prs(f: F, n: usize, (a, b): (f64, f64), m: usize, eps: f64) -> anyhow::Result> -where F: Fn(f64) -> f64 + Copy { +where + F: Fn(f64) -> f64 + Copy, +{ let mut rng = thread_rng(); let mut result = vec![0f64; n]; @@ -482,7 +484,7 @@ where F: Fn(f64) -> f64 + Copy { if weight <= 0f64 { continue; } else { - let y = rng.gen_range(0f64 ..=weight); + let y = rng.gen_range(0f64..=weight); if y <= f(x) { result[n - left_num] = x; @@ -527,8 +529,17 @@ where F: Fn(f64) -> f64 + Copy { /// /// Ok(()) /// } -pub fn prs_with_rng(f: F, n: usize, (a, b): (f64, f64), m: usize, eps: f64, rng: &mut R) -> anyhow::Result> - where F: Fn(f64) -> f64 + Copy { +pub fn prs_with_rng( + f: F, + n: usize, + (a, b): (f64, f64), + m: usize, + eps: f64, + rng: &mut R, +) -> anyhow::Result> +where + F: Fn(f64) -> f64 + Copy, +{ let mut result = vec![0f64; n]; let w = WeightedUniform::from_max_pool_1d(f, (a, b), m, eps)?; @@ -542,7 +553,7 @@ pub fn prs_with_rng(f: F, n: usize, (a, b): (f64, f64), m: us if weight <= 0f64 { continue; } else { - let y = rng.gen_range(0f64 ..=weight); + let y = rng.gen_range(0f64..=weight); if y <= f(x) { result[n - left_num] = x; diff --git a/src/structure/matrix.rs b/src/structure/matrix.rs index 797792ca..226e7958 100644 --- a/src/structure/matrix.rs +++ b/src/structure/matrix.rs @@ -192,7 +192,7 @@ //! To write columns or rows, `DataFrame` and `nc` feature could be the best choice. //! //! * `nc` feature should be required - `netcdf` or `libnetcdf` are prerequisites. -//! +//! //! ```no_run //! #[macro_use] //! extern crate peroxide; @@ -596,10 +596,10 @@ //! } //! ``` -#[cfg(feature="csv")] +#[cfg(feature = "csv")] extern crate csv; -#[cfg(feature="csv")] +#[cfg(feature = "csv")] use self::csv::{ReaderBuilder, StringRecord, WriterBuilder}; #[cfg(feature = "O3")] @@ -609,30 +609,32 @@ extern crate lapack; #[cfg(feature = "O3")] use blas::{daxpy, dgemm, dgemv}; #[cfg(feature = "O3")] -use lapack::{dgecon, dgeqrf, dgetrf, dgetri, dgetrs, dorgqr, dgesvd, dpotrf}; +use lapack::{dgecon, dgeqrf, dgesvd, dgetrf, dgetri, dgetrs, dorgqr, dpotrf}; +use rayon::iter::{IntoParallelIterator, IntoParallelRefIterator, ParallelIterator}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; pub use self::Shape::{Col, Row}; use crate::numerical::eigen::{eigen, EigenMethod}; +use crate::structure::dataframe::{Series, TypedVector}; +use crate::traits::math::{ParallelInnerProduct, ParallelNormed}; +use crate::traits::sugar::ScalableMut; use crate::traits::{ - general::Algorithm, fp::{FPMatrix, FPVector}, + general::Algorithm, math::{InnerProduct, LinearOp, MatrixProduct, Norm, Normed, Vector}, mutable::MutMatrix, }; use crate::util::{ - low_level::{swap_vec_ptr, copy_vec_ptr}, + low_level::{copy_vec_ptr, swap_vec_ptr}, non_macro::{cbind, eye, rbind, zeros}, useful::{nearly_eq, tab}, }; -use crate::structure::dataframe::{Series, TypedVector}; +use peroxide_num::{ExpLogOps, Numeric, PowOps, TrigOps}; use std::cmp::{max, min}; pub use std::error::Error; use std::fmt; use std::ops::{Add, Div, Index, IndexMut, Mul, Neg, Sub}; -use crate::traits::sugar::ScalableMut; -use peroxide_num::{ExpLogOps, PowOps, TrigOps, Numeric}; pub type Perms = Vec<(usize, usize)>; @@ -1143,7 +1145,7 @@ impl Matrix { /// Ok(()) /// } /// ``` - #[cfg(feature="csv")] + #[cfg(feature = "csv")] pub fn write(&self, file_path: &str) -> Result<(), Box> { let mut wtr = WriterBuilder::new().from_path(file_path)?; let r = self.row; @@ -1174,7 +1176,7 @@ impl Matrix { /// Ok(()) /// } /// ``` - #[cfg(feature="csv")] + #[cfg(feature = "csv")] pub fn write_round(&self, file_path: &str, round: usize) -> Result<(), Box> { let mut wtr = WriterBuilder::new().from_path(file_path)?; let r = self.row; @@ -1190,7 +1192,7 @@ impl Matrix { Ok(()) } - #[cfg(feature="csv")] + #[cfg(feature = "csv")] pub fn write_with_header( &self, file_path: &str, @@ -1212,7 +1214,7 @@ impl Matrix { Ok(()) } - #[cfg(feature="csv")] + #[cfg(feature = "csv")] pub fn write_with_header_round( &self, file_path: &str, @@ -1260,7 +1262,7 @@ impl Matrix { /// Ok(()) /// } /// ``` - #[cfg(feature="csv")] + #[cfg(feature = "csv")] pub fn read(file_path: &str, header: bool, delimiter: char) -> Result> { let mut rdr = ReaderBuilder::new() .has_headers(header) @@ -1340,6 +1342,27 @@ impl Matrix { mat } + /// From index operations in parallel + pub fn par_from_index(f: F, size: (usize, usize)) -> Matrix + where + F: Fn(usize, usize) -> G + Copy + Send + Sync, + G: Into, + { + let row = size.0; + let col = size.1; + + let data = (0..row) + .into_par_iter() + .flat_map(|i| { + (0..col) + .into_par_iter() + .map(|j| f(i, j).into()) + .collect::>() + }) + .collect::>(); + matrix(data, row, col, Row) + } + /// Matrix to `Vec>` /// /// To send `Matrix` to `inline-python` @@ -1365,7 +1388,7 @@ impl Matrix { /// /// # Description /// Return below elements of matrix to new matrix - /// + /// /// $$ /// \begin{pmatrix} /// \\ddots & & & & \\\\ @@ -1380,7 +1403,7 @@ impl Matrix { /// ``` /// extern crate peroxide; /// use peroxide::fuga::*; - /// + /// /// fn main() { /// let a = ml_matrix("1 2 3;4 5 6;7 8 9"); /// let b = ml_matrix("5 6;8 9"); @@ -1392,8 +1415,8 @@ impl Matrix { let row = end.0 - start.0 + 1; let col = end.1 - start.1 + 1; let mut result = matrix(vec![0f64; row * col], row, col, self.shape); - for i in 0 .. row { - for j in 0 .. col { + for i in 0..row { + for j in 0..col { result[(i, j)] = self[(start.0 + i, start.1 + j)]; } } @@ -1404,7 +1427,7 @@ impl Matrix { /// /// # Description /// Substitute below elements of matrix - /// + /// /// $$ /// \begin{pmatrix} /// \\ddots & & & & \\\\ @@ -1414,12 +1437,12 @@ impl Matrix { /// & & & & \\ddots /// \end{pmatrix} /// $$ - /// + /// /// # Examples /// ``` /// extern crate peroxide; /// use peroxide::fuga::*; - /// + /// /// fn main() { /// let mut a = ml_matrix("1 2 3;4 5 6;7 8 9"); /// let b = ml_matrix("1 2;3 4"); @@ -1431,8 +1454,8 @@ impl Matrix { pub fn subs_mat(&mut self, start: (usize, usize), end: (usize, usize), m: &Matrix) { let row = end.0 - start.0 + 1; let col = end.1 - start.1 + 1; - for i in 0 .. row { - for j in 0 .. col { + for i in 0..row { + for j in 0..col { self[(start.0 + i, start.1 + j)] = m[(i, j)]; } } @@ -1616,6 +1639,71 @@ impl Normed for Matrix { } } +impl ParallelNormed for Matrix { + type UnsignedScalar = f64; + + fn par_norm(&self, kind: Norm) -> f64 { + match kind { + Norm::F => { + let mut s = 0f64; + for i in 0..self.data.len() { + s += self.data[i].powi(2); + } + s.sqrt() + } + Norm::Lpq(p, q) => { + let s = (0..self.col) + .into_par_iter() + .map(|j| { + let s_row = (0..self.row) + .into_iter() + .map(|i| self[(i, j)].powi(p as i32)) + .sum::(); + s_row.powf(q / p) + }) + .sum::(); + s.powf(1f64 / q) + } + Norm::L1 => { + let mut m = f64::MIN; + match self.shape { + Row => self.change_shape().norm(Norm::L1), + Col => { + for c in 0..self.col { + let s = self.col(c).par_iter().sum(); + if s > m { + m = s; + } + } + m + } + } + } + Norm::LInf => { + let mut m = f64::MIN; + match self.shape { + Col => self.change_shape().norm(Norm::LInf), + Row => { + for r in 0..self.row { + let s = self.row(r).iter().sum(); + if s > m { + m = s; + } + } + m + } + } + } + Norm::L2 => { + let a = &self.t() * self; + let eig = eigen(&a, EigenMethod::Jacobi); + eig.eigenvalue.norm(Norm::LInf) + } + Norm::Lp(_) => unimplemented!(), + } + } +} + /// Frobenius inner product impl InnerProduct for Matrix { fn dot(&self, rhs: &Self) -> f64 { @@ -1627,6 +1715,17 @@ impl InnerProduct for Matrix { } } +/// Frobenius inner product in parallel +impl ParallelInnerProduct for Matrix { + fn par_dot(&self, rhs: &Self) -> f64 { + if self.shape == rhs.shape { + self.data.par_dot(&rhs.data) + } else { + self.data.par_dot(&rhs.change_shape().data) + } + } +} + /// TODO: Transpose /// Matrix as Linear operator for Vector @@ -2918,7 +3017,7 @@ impl SVD { pub fn s_mat(&self) -> Matrix { let mut mat = zeros(self.u.col, self.vt.row); - for i in 0 .. mat.row.min(mat.col) { + for i in 0..mat.row.min(mat.col) { mat[(i, i)] = self.s[i]; } mat @@ -2961,11 +3060,7 @@ impl SVD { } } - SVD { - s, - u, - vt, - } + SVD { s, u, vt } } } @@ -3151,7 +3246,7 @@ impl LinearAlgebra for Matrix { #[allow(non_snake_case)] fn qr(&self) -> QR { match () { - #[cfg(feature="O3")] + #[cfg(feature = "O3")] () => { let opt_dgeqrf = lapack_dgeqrf(self); match opt_dgeqrf { @@ -3159,10 +3254,7 @@ impl LinearAlgebra for Matrix { Some(dgeqrf) => { let q = dgeqrf.get_Q(); let r = dgeqrf.get_R(); - QR { - q, - r - } + QR { q, r } } } } @@ -3209,7 +3301,7 @@ impl LinearAlgebra for Matrix { /// ``` fn svd(&self) -> SVD { match () { - #[cfg(feature="O3")] + #[cfg(feature = "O3")] () => { let opt_dgesvd = lapack_dgesvd(self); match opt_dgesvd { @@ -3218,14 +3310,12 @@ impl LinearAlgebra for Matrix { SVD_STATUS::Diverge(i) => { panic!("Divergent occurs in SVD - {} iterations", i) } - SVD_STATUS::Success => { - SVD { - s: dgesvd.s, - u: dgesvd.u, - vt: dgesvd.vt, - } - } - } + SVD_STATUS::Success => SVD { + s: dgesvd.s, + u: dgesvd.u, + vt: dgesvd.vt, + }, + }, } } _ => { @@ -3465,7 +3555,9 @@ impl LinearAlgebra for Matrix { None => panic!("Singular matrix (opt_dgrf)"), Some(dgrf) => match dgrf.status { LAPACK_STATUS::NonSingular => lapack_dgetri(&dgrf).unwrap(), - LAPACK_STATUS::Singular => panic!("Singular matrix (LAPACK_STATUS Singular)"), + LAPACK_STATUS::Singular => { + panic!("Singular matrix (LAPACK_STATUS Singular)") + } }, } } @@ -3500,13 +3592,13 @@ impl LinearAlgebra for Matrix { /// ``` fn pseudo_inv(&self) -> Self { match () { - #[cfg(feature="O3")] + #[cfg(feature = "O3")] () => { let svd = self.svd(); let row = svd.vt.row; let col = svd.u.row; let mut sp = zeros(row, col); - for i in 0 .. row.min(col) { + for i in 0..row.min(col) { sp[(i, i)] = 1f64 / svd.s[i]; } svd.vt.t() * sp * svd.u.t() @@ -3620,9 +3712,9 @@ impl LinearAlgebra for Matrix { return false; } - for i in 0 .. self.row { - for j in i .. self.col { - if !nearly_eq(self[(i,j)], self[(j,i)]) { + for i in 0..self.row { + for j in i..self.col { + if !nearly_eq(self[(i, j)], self[(j, i)]) { return false; } } @@ -3722,7 +3814,6 @@ impl Div for Matrix { } } - impl ExpLogOps for Matrix { type Float = f64; fn exp(&self) -> Self { @@ -4248,7 +4339,7 @@ pub enum POSITIVE_STATUS { #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub enum UPLO { Upper, - Lower + Lower, } /// Temporary data structure from `dgetrf` @@ -4279,7 +4370,7 @@ pub struct DGESVD { pub struct DPOTRF { pub fact_mat: Matrix, pub uplo: UPLO, - pub status: POSITIVE_STATUS + pub status: POSITIVE_STATUS, } ///// Temporary data structure from `dgeev` @@ -4473,7 +4564,22 @@ pub fn lapack_dgesvd(mat: &Matrix) -> Option { // Workspace query unsafe { - dgesvd(jobu, jobvt, m, n, &mut A.data, lda, &mut s, &mut u, ldu, &mut vt, ldvt, &mut work, lwork, &mut info); + dgesvd( + jobu, + jobvt, + m, + n, + &mut A.data, + lda, + &mut s, + &mut u, + ldu, + &mut vt, + ldvt, + &mut work, + lwork, + &mut info, + ); } let optimal_lwork = work[0] as usize; @@ -4532,35 +4638,23 @@ pub fn lapack_dpotrf(mat: &Matrix, UPLO: UPLO) -> Option { let mut info = 0i32; let uplo = match UPLO { UPLO::Upper => b'U', - UPLO::Lower => b'L' + UPLO::Lower => b'L', }; - unsafe { - dpotrf( - uplo, - N, - &mut A.data, - lda, - &mut info, - ) - } + unsafe { dpotrf(uplo, N, &mut A.data, lda, &mut info) } if info == 0 { - Some( - DPOTRF { - fact_mat: matrix(A.data, mat.row, mat.col, Col), - uplo: UPLO, - status: POSITIVE_STATUS::Success - } - ) + Some(DPOTRF { + fact_mat: matrix(A.data, mat.row, mat.col, Col), + uplo: UPLO, + status: POSITIVE_STATUS::Success, + }) } else if info > 0 { - Some( - DPOTRF { - fact_mat: matrix(A.data, mat.row, mat.col, Col), - uplo: UPLO, - status: POSITIVE_STATUS::Failed(info) - } - ) + Some(DPOTRF { + fact_mat: matrix(A.data, mat.row, mat.col, Col), + uplo: UPLO, + status: POSITIVE_STATUS::Failed(info), + }) } else { None } @@ -4674,8 +4768,8 @@ impl DPOTRF { let mat = &self.fact_mat; let n = mat.col; let mut result = matrix(vec![0f64; n.pow(2)], n, n, mat.shape); - for i in 0 .. n { - for j in i .. n { + for i in 0..n { + for j in i..n { result[(i, j)] = mat[(i, j)]; } } @@ -4691,8 +4785,8 @@ impl DPOTRF { let n = mat.col; let mut result = matrix(vec![0f64; n.pow(2)], n, n, mat.shape); - for i in 0 .. n { - for j in 0 .. i+1 { + for i in 0..n { + for j in 0..i + 1 { result[(i, j)] = mat[(i, j)]; } } diff --git a/src/structure/vector.rs b/src/structure/vector.rs index 0e977361..8d584942 100644 --- a/src/structure/vector.rs +++ b/src/structure/vector.rs @@ -267,7 +267,12 @@ extern crate blas; #[cfg(feature = "O3")] use blas::{daxpy, ddot, dnrm2, idamax}; +use crate::rayon::prelude::*; + use crate::structure::matrix::{matrix, Matrix, Row}; +use crate::traits::fp::ParallelFPVector; +use crate::traits::math::{ParallelInnerProduct, ParallelNormed, ParallelVectorProduct}; +use crate::traits::mutable::ParallelMutFP; use crate::traits::{ fp::FPVector, general::Algorithm, @@ -318,7 +323,7 @@ impl FPVector for Vec { fn reduce(&self, init: T, f: F) -> f64 where F: Fn(f64, f64) -> f64, - T: Into, + T: Into + Copy, { self.iter().fold(init.into(), |x, &y| f(x, y)) } @@ -409,6 +414,93 @@ impl FPVector for Vec { } } +impl ParallelFPVector for Vec { + type Scalar = f64; + + /// par_fmap for `Vec` + /// + /// # Examples + /// ``` + /// #[macro_use] + /// extern crate peroxide; + /// use peroxide::fuga::*; + /// use peroxide::traits::fp::ParallelFPVector; + /// + /// fn main() { + /// let a = c!(1,2,3,4,5); + /// assert_eq!(a.par_fmap(|x| x*2f64), seq!(2,10,2)); + /// } + /// ``` + fn par_fmap(&self, f: F) -> Vec + where + F: Fn(f64) -> f64 + Sync + Send, + { + let mut v = self.clone(); + v.par_iter_mut().for_each(|x| *x = f(*x)); + v + } + + /// reduce for `Vec` + /// + /// # Examples + /// ``` + /// #[macro_use] + /// extern crate peroxide; + /// use peroxide::fuga::*; + /// use peroxide::traits::fp::ParallelFPVector; + /// + /// fn main() { + /// let a = seq!(1,100,1); + /// assert_eq!(a.par_reduce(0, |x,y| x + y), 5050f64); + /// } + /// ``` + fn par_reduce(&self, _init: T, f: F) -> f64 + where + F: Fn(f64, f64) -> f64 + Send + Sync, + T: Into + Send + Sync + Copy, + { + self.par_iter() + .cloned() + .reduce_with(|x, y| f(x, y)) + .expect("Unable to perform parallel reduce operation") + } + + fn par_zip_with(&self, f: F, other: &Vec) -> Vec + where + F: Fn(f64, f64) -> f64 + Send + Sync, + { + self.par_iter() + .zip(other) + .map(|(x, y)| f(*x, *y)) + .collect::>() + } + + /// Filter for `Vec` + /// + /// # Examples + /// ``` + /// #[macro_use] + /// extern crate peroxide; + /// use peroxide::fuga::*; + /// use peroxide::traits::fp::ParallelFPVector; + /// + /// fn main() { + /// let a = c!(1,2,3,4,5); + /// let b = a.par_filter(|x| x > 3.); + /// assert_eq!(b, c!(4,5)); + /// } + /// ``` + fn par_filter(&self, f: F) -> Vec + where + F: Fn(f64) -> bool + Send + Sync, + { + self.clone() + .into_par_iter() + .filter(|x| f(*x)) + .collect::>() + } +} + /// Explicit version of `map` pub fn map(f: F, xs: &[T]) -> Vec where @@ -423,6 +515,17 @@ where result } +/// Explicit version of `map` in parallel +pub fn par_map(f: F, xs: &[T]) -> Vec +where + F: Fn(T) -> T + Send + Sync, + T: Default + Copy + Send + Sync, +{ + let mut result = vec![T::default(); xs.len()]; + result.par_iter_mut().for_each(|x| *x = f(*x)); + result +} + /// Explicit version of `reduce` pub fn reduce(f: F, init: T, xs: &[T]) -> T where @@ -434,6 +537,7 @@ where s = f(s, x); } s + // Parallel version: !unimplemented() } /// Explicit version of `zip_with` @@ -450,6 +554,18 @@ where result } +/// Explicit version of `zip_with` in parallel +pub fn par_zip_with(f: F, xs: &[T], ys: &[T]) -> Vec +where + F: Fn(T, T) -> T + Send + Sync, + T: Default + Copy + Send + Sync, +{ + xs.par_iter() + .zip(ys.into_par_iter()) + .map(|(x, y)| f(*x, *y)) + .collect::>() +} + impl MutFP for Vec { type Scalar = f64; @@ -472,6 +588,26 @@ impl MutFP for Vec { } } +impl ParallelMutFP for Vec { + type Scalar = f64; + + fn par_mut_map(&mut self, f: F) + where + F: Fn(Self::Scalar) -> Self::Scalar + Send + Sync, + { + self.par_iter_mut().for_each(|x| *x = f(*x)); + } + + fn par_mut_zip_with(&mut self, f: F, other: &Self) + where + F: Fn(Self::Scalar, Self::Scalar) -> Self::Scalar + Send + Sync, + { + self.par_iter_mut() + .zip(other.into_par_iter()) + .for_each(|(x, y)| *x = f(*x, *y)); + } +} + impl Algorithm for Vec { /// Assign rank /// @@ -693,6 +829,51 @@ impl Normed for Vec { } } +impl ParallelNormed for Vec { + type UnsignedScalar = f64; + + fn par_norm(&self, kind: Norm) -> f64 { + match kind { + Norm::L1 => self.iter().map(|x| x.abs()).sum(), + Norm::L2 => { + #[cfg(feature = "O3")] + { + let n_i32 = self.len() as i32; + let res: f64; + unsafe { + res = dnrm2(n_i32, self, 1); + } + res + } + #[cfg(not(feature = "O3"))] + { + self.par_iter() + .map(|x| x.powi(2)) + .sum::() + .sqrt() + } + } + Norm::Lp(p) => { + assert!( + p >= 1f64, + "lp norm is only defined for p>=1, the given value was p={}", + p + ); + self.par_iter() + .map(|x| x.powf(p)) + .sum::() + .powf(1_f64 / p) + } + Norm::LInf => self + .par_iter() + .fold(|| 0f64, |x, y| x.max(y.abs())) + .reduce(|| 0f64, |acc1, acc2| acc1.max(acc2.abs())), + Norm::F => unimplemented!(), + Norm::Lpq(_, _) => unimplemented!(), + } + } +} + impl InnerProduct for Vec { fn dot(&self, rhs: &Self) -> f64 { #[cfg(feature = "O3")] @@ -713,6 +894,27 @@ impl InnerProduct for Vec { } } +impl ParallelInnerProduct for Vec { + fn par_dot(&self, rhs: &Self) -> f64 { + #[cfg(feature = "O3")] + { + let n_i32 = self.len() as i32; + let res: f64; + unsafe { + res = ddot(n_i32, self, 1, rhs, 1); + } + res + } + #[cfg(not(feature = "O3"))] + { + self.par_iter() + .zip(rhs.into_par_iter()) + .fold(|| 0_f64, |x, (y1, y2)| x + y1 * y2) + .sum::() + } + } +} + impl LinearOp, f64> for Vec { fn apply(&self, rhs: &Vec) -> f64 { self.dot(rhs) @@ -729,18 +931,22 @@ impl VectorProduct for Vec { fn cross(&self, other: &Self) -> Self { assert_eq!(self.len(), other.len()); // 2D cross product is ill-defined - if self.len() == 2 { - let mut v = vec![0f64; 1]; - v[0] = self[0] * other[1] - self[1] * other[0]; - v - } else if self.len() == 3 { - let mut v = vec![0f64; 3]; - v[0] = self[1] * other[2] - self[2] * other[1]; - v[1] = self[2] * other[0] - self[0] * other[2]; - v[2] = self[0] * other[1] - self[1] * other[0]; - v - } else { - panic!("Cross product can be defined only in 2 or 3 dimension") + match self.len() { + 2 => { + let mut v = vec![0f64; 1]; + v[0] = self[0] * other[1] - self[1] * other[0]; + v + } + 3 => { + let mut v = vec![0f64; 3]; + v[0] = self[1] * other[2] - self[2] * other[1]; + v[1] = self[2] * other[0] - self[0] * other[2]; + v[2] = self[0] * other[1] - self[1] * other[0]; + v + } + _ => { + panic!("Cross product can be defined only in 2 or 3 dimension") + } } } @@ -751,6 +957,33 @@ impl VectorProduct for Vec { } } +impl ParallelVectorProduct for Vec { + fn par_cross(&self, other: &Self) -> Self { + assert_eq!(self.len(), other.len()); + // 2D cross product is ill-defined + match self.len() { + 2 => { + let mut v = vec![0f64; 1]; + v[0] = self[0] * other[1] - self[1] * other[0]; + v + } + 3 => { + let v = (0..3) + .into_par_iter() + .map(|index| { + self[(index + 1) % 3] * other[(index + 2) % 3] + - self[(index + 2) % 3] * other[(index + 1) % 3] + }) + .collect::>(); + v + } + _ => { + panic!("Cross product can be defined only in 2 or 3 dimension") + } + } + } +} + // /// Convenient Vec Operations (No Clone, No Copy) // impl VecOps for Vec { // fn s_add(&self, scala: f64) -> Self { diff --git a/src/traits/fp.rs b/src/traits/fp.rs index 41f11463..07782cb0 100644 --- a/src/traits/fp.rs +++ b/src/traits/fp.rs @@ -10,7 +10,7 @@ pub trait FPVector { fn reduce(&self, init: T, f: F) -> Self::Scalar where F: Fn(Self::Scalar, Self::Scalar) -> Self::Scalar, - T: Into; + T: Into + Copy; fn zip_with(&self, f: F, other: &Self) -> Self where F: Fn(Self::Scalar, Self::Scalar) -> Self::Scalar; @@ -47,7 +47,7 @@ pub trait FPMatrix { fn reduce(&self, init: T, f: F) -> f64 where F: Fn(f64, f64) -> f64, - T: Into; + T: Into + Copy + Clone; fn zip_with(&self, f: F, other: &Matrix) -> Matrix where F: Fn(f64, f64) -> f64; @@ -58,3 +58,42 @@ pub trait FPMatrix { where F: Fn(Vec) -> f64; } + +/// Functional Programming tools for Vector in Parallel (Uses Rayon crate) +pub trait ParallelFPVector { + type Scalar; + + fn par_fmap(&self, f: F) -> Self + where + F: Fn(Self::Scalar) -> Self::Scalar + Send + Sync; + fn par_reduce(&self, init: T, f: F) -> Self::Scalar + where + F: Fn(Self::Scalar, Self::Scalar) -> Self::Scalar + Send + Sync, + T: Into + Send + Sync + Copy; + fn par_zip_with(&self, f: F, other: &Self) -> Self + where + F: Fn(Self::Scalar, Self::Scalar) -> Self::Scalar + Send + Sync; + fn par_filter(&self, f: F) -> Self + where + F: Fn(Self::Scalar) -> bool + Send + Sync; +} + +/// Functional Programming for Matrix in Parallel (Uses Rayon crate) +pub trait ParallelFPMatrix { + fn par_fmap(&self, f: F) -> Matrix + where + F: Fn(f64) -> f64 + Send + Sync; + fn par_reduce(&self, init: T, f: F) -> f64 + where + F: Fn(f64, f64) -> f64 + Send + Sync, + T: Into + Copy + Clone + Send + Sync; + fn par_zip_with(&self, f: F, other: &Matrix) -> Matrix + where + F: Fn(f64, f64) -> f64 + Send + Sync; + fn par_col_reduce(&self, f: F) -> Vec + where + F: Fn(Vec) -> f64 + Send + Sync; + fn par_row_reduce(&self, f: F) -> Vec + where + F: Fn(Vec) -> f64 + Send + Sync; +} diff --git a/src/traits/math.rs b/src/traits/math.rs index 228aad9e..2100fd1b 100644 --- a/src/traits/math.rs +++ b/src/traits/math.rs @@ -97,3 +97,36 @@ impl Normed for f64 { self / self.abs() } } + +// ============================================================================= +// Implementation for parallel traits +// ============================================================================= + +/// Mathematical Vector in Parallel +pub trait ParallelVector { + type Scalar; + fn par_add_vec(&self, rhs: &Self) -> Self; + fn par_sub_vec(&self, rhs: &Self) -> Self; + fn par_mul_scalar(&self, rhs: Self::Scalar) -> Self; +} + +/// Normed Vector in Parallel +pub trait ParallelNormed: Vector { + type UnsignedScalar; + fn par_norm(&self, kind: Norm) -> Self::UnsignedScalar; +} + +/// Inner product Vector in Parallel +pub trait ParallelInnerProduct: ParallelNormed { + fn par_dot(&self, rhs: &Self) -> Self::Scalar; +} + +/// Matrix Products in Parallel +pub trait ParallelMatrixProduct { + fn par_hadamard(&self, other: &Self) -> Matrix; +} + +/// Vector Products in Parallel +pub trait ParallelVectorProduct: Vector { + fn par_cross(&self, other: &Self) -> Self; +} diff --git a/src/traits/mod.rs b/src/traits/mod.rs index 92b224a6..2709f2db 100644 --- a/src/traits/mod.rs +++ b/src/traits/mod.rs @@ -1,3 +1,4 @@ +pub mod float; pub mod fp; pub mod general; pub mod math; @@ -6,4 +7,3 @@ pub mod num; pub mod pointer; pub mod stable; pub mod sugar; -pub mod float; diff --git a/src/traits/mutable.rs b/src/traits/mutable.rs index 7b0932fc..fccb56ce 100644 --- a/src/traits/mutable.rs +++ b/src/traits/mutable.rs @@ -16,3 +16,14 @@ pub trait MutMatrix { unsafe fn swap(&mut self, idx1: usize, idx2: usize, shape: Shape); unsafe fn swap_with_perm(&mut self, p: &Vec<(usize, usize)>, shape: Shape); } + +// Mutable trait for Vector in Parallel (Uses Rayon crate) +pub trait ParallelMutFP { + type Scalar; + fn par_mut_map(&mut self, f: F) + where + F: Fn(Self::Scalar) -> Self::Scalar + Send + Sync; + fn par_mut_zip_with(&mut self, f: F, other: &Self) + where + F: Fn(Self::Scalar, Self::Scalar) -> Self::Scalar + Send + Sync; +} diff --git a/src/traits/sugar.rs b/src/traits/sugar.rs index a51d9269..e1ce7f1e 100644 --- a/src/traits/sugar.rs +++ b/src/traits/sugar.rs @@ -1,15 +1,17 @@ -use crate::structure::matrix::{Matrix, Shape, matrix}; +use crate::structure::matrix::{matrix, Matrix, Shape}; use crate::traits::fp::FPVector; use crate::util::non_macro::zeros_shape; -use std::ops::{Add, Sub, Mul, Div}; +use std::ops::{Add, Div, Mul, Sub}; /// Syntactic sugar for Vector operations -pub trait VecOps: Sized + FPVector -where Self::Scalar: Copy + Clone - + Add - + Sub - + Mul - + Div +pub trait VecOps: Sized + FPVector +where + Self::Scalar: Copy + + Clone + + Add + + Sub + + Mul + + Div, { //type Scalar; fn add_v(&self, v: &Self) -> Self { @@ -449,7 +451,7 @@ impl ConvToMat for Vec { fn to_col(&self) -> Matrix { matrix(self.clone(), self.len(), 1, Shape::Col) } - + fn to_row(&self) -> Matrix { matrix(self.clone(), 1, self.len(), Shape::Row) }