diff --git a/Cargo.toml b/Cargo.toml
index 89ef53a9..0c8f0cc7 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -18,6 +18,7 @@ maintenance = { status = "actively-developed" }
 
 [dev-dependencies]
 float-cmp = "0.9"
+criterion = { version = "0.5.1", features = ["html_reports"] }
 
 [dependencies]
 csv = { version = "1.3", optional = true, default-features = false }
@@ -40,6 +41,7 @@ json = { version = "0.12", optional = true }
 arrow2 = { version = "0.18", features = ["io_parquet", "io_parquet_compression"], optional = true }
 num-complex = { version = "0.4", optional = true }
 lambert_w = { version = "0.3.0", default-features = false, features = ["24bits", "50bits"] }
+rayon = "1.10"
 
 [package.metadata.docs.rs]
 rustdoc-args = [ "--html-in-header", "katex-header.html", "--cfg", "docsrs"]
@@ -51,3 +53,8 @@ plot = ["pyo3"]
 nc = ["netcdf"]
 parquet = ["arrow2"]
 complex = ["num-complex", "matrixmultiply/cgemm"]
+
+[[bench]]
+path = "benches/parallel_rayon/matrix_benchmark.rs"
+name = "matrix_benchmark"
+harness = false
diff --git a/benches/data/rayon_matrix_benchmark_results.md b/benches/data/rayon_matrix_benchmark_results.md
new file mode 100644
index 00000000..f85feae8
--- /dev/null
+++ b/benches/data/rayon_matrix_benchmark_results.md
@@ -0,0 +1,194 @@
+Lib used for benchmarking: Criterion
+Matrix size: 1000x1000
+
+Running benches/parallel_rayon/matrix_benchmark.rs
+
+ser_matrix_bench        time:   [535.12 µs 544.51 µs 556.68 µs]
+Found 11 outliers among 100 measurements (11.00%)
+  4 (4.00%) high mild
+  7 (7.00%) high severe
+
+par_matrix_bench        time:   [5.0912 ms 5.1431 ms 5.1995 ms]
+Found 7 outliers among 100 measurements (7.00%)
+  1 (1.00%) low mild
+  5 (5.00%) high mild
+  1 (1.00%) high severe
+
+ser_py_matrix_bench     time:   [4.3100 ms 4.3309 ms 4.3544 ms]
+Found 7 outliers among 100 measurements (7.00%)
+  2 (2.00%) high mild
+  5 (5.00%) high severe
+
+par_py_matrix_bench     time:   [11.667 ms 11.789 ms 11.920 ms]
+Found 10 outliers among 100 measurements (10.00%)
+  6 (6.00%) high mild
+  4 (4.00%) high severe
+
+ser_matrix_change_shape_bench
+                        time:   [7.3630 ms 7.4075 ms 7.4608 ms]
+Found 5 outliers among 100 measurements (5.00%)
+  1 (1.00%) high mild
+  4 (4.00%) high severe
+
+par_matrix_change_shape_bench
+                        time:   [10.276 ms 10.385 ms 10.499 ms]
+Found 3 outliers among 100 measurements (3.00%)
+  2 (2.00%) high mild
+  1 (1.00%) high severe
+
+ser_matrix_extract_row_bench
+                        time:   [613.39 µs 622.44 µs 633.72 µs]
+Found 7 outliers among 100 measurements (7.00%)
+  7 (7.00%) high severe
+
+par_matrix_extract_row_bench
+                        time:   [5.4321 ms 5.4851 ms 5.5399 ms]
+Found 4 outliers among 100 measurements (4.00%)
+  4 (4.00%) high mild
+
+ser_matrix_from_index_bench
+                        time:   [2.4174 ms 2.4490 ms 2.4851 ms]
+Found 14 outliers among 100 measurements (14.00%)
+  1 (1.00%) high mild
+  13 (13.00%) high severe
+
+par_matrix_from_index_bench
+                        time:   [2.3912 ms 2.4090 ms 2.4304 ms]
+Found 9 outliers among 100 measurements (9.00%)
+  2 (2.00%) high mild
+  7 (7.00%) high severe
+
+ser_matrix_to_vec_bench time:   [2.4800 ms 2.5082 ms 2.5423 ms]
+Found 10 outliers among 100 measurements (10.00%)
+  4 (4.00%) high mild
+  6 (6.00%) high severe
+
+par_matrix_to_vec_bench time:   [6.4041 ms 6.4618 ms 6.5250 ms]
+Found 6 outliers among 100 measurements (6.00%)
+  5 (5.00%) high mild
+  1 (1.00%) high severe
+
+ser_matrix_to_diag_bench
+                        time:   [2.4335 ms 2.4526 ms 2.4750 ms]
+Found 14 outliers among 100 measurements (14.00%)
+  6 (6.00%) high mild
+  8 (8.00%) high severe
+
+par_matrix_to_diag_bench
+                        time:   [13.514 ms 13.684 ms 13.868 ms]
+Found 10 outliers among 100 measurements (10.00%)
+  7 (7.00%) high mild
+  3 (3.00%) high severe
+
+Benchmarking ser_matrix_submat_bench: Warming up for 3.0000 s
+Warning: Unable to complete 100 samples in 5.0s. You may wish to increase target time to 8.3s, enable flat sampling, or reduce sample count to 50.
+ser_matrix_submat_bench time:   [1.6077 ms 1.6243 ms 1.6451 ms]
+Found 16 outliers among 100 measurements (16.00%)
+  3 (3.00%) high mild
+  13 (13.00%) high severe
+
+par_matrix_submat_bench time:   [10.611 ms 10.761 ms 10.942 ms]
+Found 5 outliers among 100 measurements (5.00%)
+  3 (3.00%) high mild
+  2 (2.00%) high severe
+
+ser_matrix_add_vec_bench
+                        time:   [7.3077 ms 7.3485 ms 7.3946 ms]
+Found 12 outliers among 100 measurements (12.00%)
+  2 (2.00%) high mild
+  10 (10.00%) high severe
+
+par_matrix_add_vec_bench
+                        time:   [11.331 ms 11.480 ms 11.636 ms]
+Found 2 outliers among 100 measurements (2.00%)
+  2 (2.00%) high mild
+
+ser_matrix_norm_bench   time:   [5.1600 ms 5.1864 ms 5.2165 ms]
+Found 7 outliers among 100 measurements (7.00%)
+  1 (1.00%) high mild
+  6 (6.00%) high severe
+
+par_matrix_norm_bench   time:   [2.6565 ms 2.6810 ms 2.7091 ms]
+Found 5 outliers among 100 measurements (5.00%)
+  2 (2.00%) high mild
+  3 (3.00%) high severe
+
+Benchmarking ser_matrix_norm_bench #2: Warming up for 3.0000 s
+Warning: Unable to complete 100 samples in 5.0s. You may wish to increase target time to 8.9s, enable flat sampling, or reduce sample count to 50.
+ser_matrix_norm_bench #2
+                        time:   [1.7262 ms 1.7391 ms 1.7541 ms]
+Found 15 outliers among 100 measurements (15.00%)
+  10 (10.00%) high mild
+  5 (5.00%) high severe
+
+par_matrix_norm_bench #2
+                        time:   [6.7071 ms 6.7883 ms 6.8703 ms]
+Found 1 outliers among 100 measurements (1.00%)
+  1 (1.00%) high mild
+
+ser_matrix_norm_bench #3
+                        time:   [9.7582 ms 9.9006 ms 10.057 ms]
+Found 12 outliers among 100 measurements (12.00%)
+  5 (5.00%) high mild
+  7 (7.00%) high severe
+
+par_matrix_norm_bench #3
+                        time:   [9.3004 ms 9.4088 ms 9.5239 ms]
+Found 1 outliers among 100 measurements (1.00%)
+  1 (1.00%) high mild
+
+ser_matrix_inner_prod_bench
+                        time:   [5.2730 ms 5.3590 ms 5.4583 ms]
+Found 14 outliers among 100 measurements (14.00%)
+  3 (3.00%) high mild
+  11 (11.00%) high severe
+
+par_matrix_inner_prod_bench
+                        time:   [5.0987 ms 5.1644 ms 5.2402 ms]
+Found 7 outliers among 100 measurements (7.00%)
+  3 (3.00%) high mild
+  4 (4.00%) high severe
+
+ser_matrix_hadamard_bench
+                        time:   [5.6521 ms 5.6870 ms 5.7262 ms]
+Found 12 outliers among 100 measurements (12.00%)
+  3 (3.00%) high mild
+  9 (9.00%) high severe
+
+par_matrix_hadamard_bench
+                        time:   [14.155 ms 14.335 ms 14.527 ms]
+Found 4 outliers among 100 measurements (4.00%)
+  3 (3.00%) high mild
+  1 (1.00%) high severe
+
+ser_matrix_take_row_bench
+                        time:   [3.7894 ms 3.8234 ms 3.8613 ms]
+Found 15 outliers among 100 measurements (15.00%)
+  7 (7.00%) high mild
+  8 (8.00%) high severe
+
+par_matrix_take_row_bench
+                        time:   [8.4008 ms 8.5171 ms 8.6523 ms]
+Found 9 outliers among 100 measurements (9.00%)
+  6 (6.00%) high mild
+  3 (3.00%) high severe
+
+ser_matrix_fpmap_bench  time:   [3.2526 ms 3.2739 ms 3.2977 ms]
+Found 12 outliers among 100 measurements (12.00%)
+  2 (2.00%) high mild
+  10 (10.00%) high severe
+
+par_matrix_fpmap_bench  time:   [10.604 ms 10.765 ms 10.937 ms]
+Found 11 outliers among 100 measurements (11.00%)
+  8 (8.00%) high mild
+  3 (3.00%) high severe
+
+ser_matrix_reduce_bench time:   [2.6748 ms 2.6964 ms 2.7201 ms]
+Found 9 outliers among 100 measurements (9.00%)
+  6 (6.00%) high mild
+  3 (3.00%) high severe
+
+par_matrix_reduce_bench time:   [6.2453 ms 6.3198 ms 6.4034 ms]
+Found 6 outliers among 100 measurements (6.00%)
+  4 (4.00%) high mild
+  2 (2.00%) high severe
diff --git a/benches/parallel_rayon/matrix_benchmark.rs b/benches/parallel_rayon/matrix_benchmark.rs
new file mode 100644
index 00000000..86b39bc9
--- /dev/null
+++ b/benches/parallel_rayon/matrix_benchmark.rs
@@ -0,0 +1,103 @@
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use peroxide::{
+    fuga::*,
+    traits::math::{ParallelInnerProduct, ParallelNormed},
+};
+
+pub fn par_matrix_from_index_benchmark(cr: &mut Criterion) {
+    let f = |x: usize, y: usize| 2.0 * (x as f64) * (y as f64);
+    let size: (usize, usize) = (1000, 1000);
+
+    // Result: 1000x1000 matrix: 2.3662 ms
+    cr.bench_function("ser_matrix_from_index_bench", |b| {
+        b.iter(|| black_box(Matrix::from_index(f, size)))
+    });
+
+    // Result: 1000x1000 matrix:  2.3355 ms
+    cr.bench_function("par_matrix_from_index_bench", |b| {
+        b.iter(|| black_box(Matrix::from_index(f, size)))
+    });
+}
+
+// Check: better parallel results (ran test 6 times)
+pub fn par_matrix_norm_lpq_benchmark(cr: &mut Criterion) {
+    let v: Vec<f64> = (0..1000000)
+        .into_iter()
+        .map(|i: i32| 2.0 * (i as f64))
+        .collect::<Vec<f64>>();
+
+    // Result: 1000x1000 matrix: [5.5969 ms 5.7555 ms 5.9515 ms 6.0843 ms 6.3072 ms 6.5636 ms]
+    cr.bench_function("ser_matrix_norm_bench", |b| {
+        b.iter(|| black_box(matrix(v.clone(), 1000, 1000, Shape::Row).norm(Norm::Lpq(4.0, 2.0))))
+    });
+
+    // Result: 1000x1000 matrix: [3.1796 ms 3.2714 ms 3.3714 ms 3.6123 ms 3.7398 ms 3.8761 ms]
+    cr.bench_function("par_matrix_norm_bench", |b| {
+        b.iter(|| {
+            black_box(matrix(v.clone(), 1000, 1000, Shape::Row).par_norm(Norm::Lpq(4.0, 2.0)))
+        })
+    });
+}
+
+pub fn par_matrix_norm_l1_benchmark(cr: &mut Criterion) {
+    let v: Vec<f64> = (0..1000000)
+        .into_iter()
+        .map(|i: i32| 2.0 * (i as f64))
+        .collect::<Vec<f64>>();
+
+    // Result: 1000x1000 matrix: 9.0287 ms
+    cr.bench_function("ser_matrix_norm_bench", |b| {
+        b.iter(|| black_box(matrix(v.clone(), 1000, 1000, Shape::Row).norm(Norm::L1)))
+    });
+
+    // Result: 1000x1000 matrix: 10.393 ms
+    cr.bench_function("par_matrix_norm_bench", |b| {
+        b.iter(|| black_box(matrix(v.clone(), 1000, 1000, Shape::Row).par_norm(Norm::L1)))
+    });
+}
+
+// Check: better parallel results (ran test 6 times)
+pub fn par_matrix_inner_prod_benchmark(cr: &mut Criterion) {
+    let v: Vec<f64> = (0..1000000)
+        .into_iter()
+        .map(|i: i32| 2.0 * (i as f64))
+        .collect::<Vec<f64>>();
+
+    let w: Vec<f64> = (0..1000000)
+        .into_iter()
+        .map(|i: i32| 3.0 * (i as f64))
+        .collect::<Vec<f64>>();
+
+    // Result: 1000x1000 matrix:  [5.1075 ms 5.1505 ms 5.2013 ms 5.7617 ms 6.0196 ms 6.3009 ms]
+    cr.bench_function("ser_matrix_inner_prod_bench", |b| {
+        b.iter(|| {
+            black_box(matrix(v.clone(), 1000, 1000, Shape::Row).dot(&matrix(
+                w.clone(),
+                1000,
+                1000,
+                Shape::Row,
+            )))
+        })
+    });
+
+    // Result: 1000x1000 matrix: [4.9931 ms 5.0244 ms 5.0642 ms 5.0322 ms 5.0819 ms 5.1404 ms]
+    cr.bench_function("par_matrix_inner_prod_bench", |b| {
+        b.iter(|| {
+            black_box(matrix(v.clone(), 1000, 1000, Shape::Row).par_dot(&matrix(
+                w.clone(),
+                1000,
+                1000,
+                Shape::Row,
+            )))
+        })
+    });
+}
+
+criterion_group!(
+    benches,
+    par_matrix_from_index_benchmark,
+    par_matrix_norm_lpq_benchmark,
+    par_matrix_norm_l1_benchmark,
+    par_matrix_inner_prod_benchmark,
+);
+criterion_main!(benches);
diff --git a/src/lib.rs b/src/lib.rs
index e1929555..916878b5 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -208,3 +208,5 @@ pub mod util;
 
 #[cfg(feature = "complex")]
 pub mod complex;
+
+extern crate rayon;
diff --git a/src/numerical/integral.rs b/src/numerical/integral.rs
index ac2165dd..d7650f3c 100644
--- a/src/numerical/integral.rs
+++ b/src/numerical/integral.rs
@@ -166,7 +166,7 @@ where
     match method {
         Integral::GaussLegendre(n) => gauss_legendre_quadrature(f, n, (a, b)),
         Integral::NewtonCotes(n) => newton_cotes_quadrature(f, n, (a, b)),
-        method => gauss_kronrod_quadrature(f, (a,b), method),
+        method => gauss_kronrod_quadrature(f, (a, b), method),
     }
 }
 
@@ -217,9 +217,9 @@ where
 #[allow(non_snake_case)]
 pub fn gauss_kronrod_quadrature<F, T, S>(f: F, (a, b): (T, S), method: Integral) -> f64
 where
-     F: Fn(f64) -> f64 + Copy,
-     T: Into<f64>,
-     S: Into<f64>,
+    F: Fn(f64) -> f64 + Copy,
+    T: Into<f64>,
+    S: Into<f64>,
 {
     let (g, k) = method.get_gauss_kronrod_order();
     let tol = method.get_tol();
@@ -234,13 +234,9 @@ where
                 let G = gauss_legendre_quadrature(f, g as usize, (a, b));
                 let K = kronrod_quadrature(f, k as usize, (a, b));
                 let c = (a + b) / 2f64;
-                let tol_curr = if method.is_relative() {
-                    tol * G
-                } else {
-                    tol
-                };
+                let tol_curr = if method.is_relative() { tol * G } else { tol };
                 if (G - K).abs() < tol_curr || a == b || max_iter == 0 {
-                    if ! G.is_finite() {
+                    if !G.is_finite() {
                         return G;
                     }
                     I += G;
@@ -255,11 +251,11 @@ where
     I
 }
 
-pub fn kronrod_quadrature<F>(f: F, n: usize, (a, b): (f64, f64)) -> f64 
+pub fn kronrod_quadrature<F>(f: F, n: usize, (a, b): (f64, f64)) -> f64
 where
     F: Fn(f64) -> f64,
 {
-    (b - a) / 2f64 * unit_kronrod_quadrature(|x| f(x * (b-a) / 2f64 + (a + b) / 2f64), n)   
+    (b - a) / 2f64 * unit_kronrod_quadrature(|x| f(x * (b - a) / 2f64 + (a + b) / 2f64), n)
 }
 
 // =============================================================================
@@ -382,9 +378,9 @@ fn unit_kronrod_quadrature<F>(f: F, n: usize) -> f64
 where
     F: Fn(f64) -> f64,
 {
-    let (a,x) = kronrod_table(n);
+    let (a, x) = kronrod_table(n);
     let mut s = 0f64;
-    for i in 0 .. a.len() {
+    for i in 0..a.len() {
         s += a[i] * f(x[i]);
     }
     s
@@ -414,28 +410,28 @@ fn kronrod_table(n: usize) -> (Vec<f64>, Vec<f64>) {
 
     match n % 2 {
         0 => {
-            for i in 0 .. ref_node.len() {
+            for i in 0..ref_node.len() {
                 result_node[i] = ref_node[i];
                 result_weight[i] = ref_weight[i];
             }
 
-            for i in ref_node.len() .. n {
-                result_node[i] = -ref_node[n-i-1];
-                result_weight[i] = ref_weight[n-i-1];
+            for i in ref_node.len()..n {
+                result_node[i] = -ref_node[n - i - 1];
+                result_weight[i] = ref_weight[n - i - 1];
             }
         }
         1 => {
-            for i in 0 .. ref_node.len() {
+            for i in 0..ref_node.len() {
                 result_node[i] = ref_node[i];
                 result_weight[i] = ref_weight[i];
             }
 
-            for i in ref_node.len() .. n {
-                result_node[i] = -ref_node[n-i];
-                result_weight[i] = ref_weight[n-i];
+            for i in ref_node.len()..n {
+                result_node[i] = -ref_node[n - i];
+                result_weight[i] = ref_weight[n - i];
             }
         }
-        _ => unreachable!()
+        _ => unreachable!(),
     }
     (result_weight, result_node)
 }
@@ -922,7 +918,7 @@ const LEGENDRE_WEIGHT_25: [f64; 13] = [
     0.054904695975835192,
     0.040939156701306313,
     0.026354986615032137,
-    0.011393798501026288,    
+    0.011393798501026288,
 ];
 const LEGENDRE_WEIGHT_26: [f64; 13] = [
     0.118321415279262277,
diff --git a/src/prelude/simpler.rs b/src/prelude/simpler.rs
index 819bc38f..c2c0c7d7 100644
--- a/src/prelude/simpler.rs
+++ b/src/prelude/simpler.rs
@@ -1,5 +1,3 @@
-#[cfg(feature="parquet")]
-use std::error::Error;
 use crate::numerical::{
     eigen,
     eigen::{Eigen, EigenMethod::Jacobi},
@@ -8,13 +6,15 @@ use crate::numerical::{
     spline,
     spline::{CubicHermiteSpline, SlopeMethod::Quadratic},
 };
+#[cfg(feature = "parquet")]
+use crate::structure::dataframe::{DataFrame, WithParquet};
 use crate::structure::matrix::{self, Matrix};
 use crate::structure::polynomial;
 use crate::traits::math::{Norm, Normed};
-#[cfg(feature="parquet")]
-use crate::structure::dataframe::{DataFrame, WithParquet};
-#[cfg(feature="parquet")]
+#[cfg(feature = "parquet")]
 use arrow2::io::parquet::write::CompressionOptions;
+#[cfg(feature = "parquet")]
+use std::error::Error;
 
 /// Simple Norm
 pub trait SimpleNorm: Normed {
@@ -35,7 +35,7 @@ pub trait SimplerLinearAlgebra {
     fn waz_diag(&self) -> Option<matrix::WAZD>;
     fn waz(&self) -> Option<matrix::WAZD>;
     fn qr(&self) -> matrix::QR;
-    #[cfg(feature="O3")]
+    #[cfg(feature = "O3")]
     fn cholesky(&self) -> Matrix;
     fn rref(&self) -> Matrix;
     fn det(&self) -> f64;
@@ -99,7 +99,7 @@ impl SimplerLinearAlgebra for Matrix {
         matrix::LinearAlgebra::qr(self)
     }
 
-    #[cfg(feature="O3")]
+    #[cfg(feature = "O3")]
     fn cholesky(&self) -> Matrix {
         matrix::LinearAlgebra::cholesky(self, matrix::UPLO::Lower)
     }
@@ -161,7 +161,7 @@ use crate::special::function::{
 /// Returns [`NAN`](f64::NAN) if the given input is smaller than -1/e (≈ -0.36787944117144233).
 ///
 /// Accurate to 50 bits.
-/// 
+///
 /// Wrapper of `lambert_w_0` function of `lambert_w` crate.
 ///
 /// # Reference
@@ -176,7 +176,7 @@ pub fn lambert_w0(z: f64) -> f64 {
 /// Returns [`NAN`](f64::NAN) if the given input is positive or smaller than -1/e (≈ -0.36787944117144233).
 ///
 /// Accurate to 50 bits.
-/// 
+///
 /// Wrapper of `lambert_w_m1` function of `lambert_w` crate.
 ///
 /// # Reference
@@ -187,13 +187,13 @@ pub fn lambert_wm1(z: f64) -> f64 {
 }
 
 /// Simple handle parquet
-#[cfg(feature="parquet")]
+#[cfg(feature = "parquet")]
 pub trait SimpleParquet: Sized {
     fn write_parquet(&self, path: &str) -> Result<(), Box<dyn Error>>;
     fn read_parquet(path: &str) -> Result<Self, Box<dyn Error>>;
 }
 
-#[cfg(feature="parquet")]
+#[cfg(feature = "parquet")]
 impl SimpleParquet for DataFrame {
     fn write_parquet(&self, path: &str) -> Result<(), Box<dyn Error>> {
         WithParquet::write_parquet(self, path, CompressionOptions::Uncompressed)
diff --git a/src/statistics/dist.rs b/src/statistics/dist.rs
index b409c197..ee150aca 100644
--- a/src/statistics/dist.rs
+++ b/src/statistics/dist.rs
@@ -234,15 +234,15 @@ use self::rand::distributions::uniform::SampleUniform;
 use self::rand::prelude::*;
 pub use self::OPDist::*;
 pub use self::TPDist::*;
-use crate::traits::fp::FPVector;
 use crate::special::function::*;
+use crate::traits::fp::FPVector;
 //use statistics::rand::ziggurat;
+use self::WeightedUniformError::*;
 use crate::statistics::{ops::C, stat::Statistics};
 use crate::util::non_macro::{linspace, seq};
 use crate::util::useful::{auto_zip, find_interval};
+use anyhow::{bail, Result};
 use std::f64::consts::E;
-use self::WeightedUniformError::*;
-use anyhow::{Result, bail};
 
 /// One parameter distribution
 ///
@@ -271,7 +271,7 @@ pub enum TPDist<T: PartialOrd + SampleUniform + Copy + Into<f64>> {
 pub struct WeightedUniform<T: PartialOrd + SampleUniform + Copy + Into<f64>> {
     weights: Vec<T>,
     sum: T,
-    intervals: Vec<(T, T)>
+    intervals: Vec<(T, T)>,
 }
 
 #[derive(Debug, Clone, Copy)]
@@ -286,7 +286,9 @@ impl std::fmt::Display for WeightedUniformError {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
             WeightedUniformError::AllZeroWeightError => write!(f, "all weights are zero"),
-            WeightedUniformError::LengthMismatchError => write!(f, "weights and intervals have different length"),
+            WeightedUniformError::LengthMismatchError => {
+                write!(f, "weights and intervals have different length")
+            }
             WeightedUniformError::NoNonZeroIntervalError => write!(f, "no non-zero interval found"),
             WeightedUniformError::EmptyWeightError => write!(f, "weights are empty"),
         }
@@ -295,11 +297,11 @@ impl std::fmt::Display for WeightedUniformError {
 
 impl WeightedUniform<f64> {
     /// Create a new weighted uniform distribution
-    /// 
+    ///
     /// # Examples
     /// ```
     /// use peroxide::fuga::*;
-    /// 
+    ///
     /// fn main() -> Result<(), Box<dyn Error>> {
     ///     let weights = vec![1f64, 3f64, 0f64, 2f64];
     ///     let intervals = vec![0f64, 1f64, 2f64, 4f64, 5f64];
@@ -336,32 +338,31 @@ impl WeightedUniform<f64> {
             }
         }
 
-        let sum = weights.iter()
+        let sum = weights
+            .iter()
             .zip(intervals.iter())
             .fold(0f64, |acc, (w, (a, b))| acc + w * (b - a));
 
-        Ok(
-            WeightedUniform {
-                weights,
-                sum,
-                intervals
-            }
-        )
+        Ok(WeightedUniform {
+            weights,
+            sum,
+            intervals,
+        })
     }
 
     /// Create WeightedUniform from max pooling
-    /// 
+    ///
     /// # Examples
     /// ```
     /// use peroxide::fuga::*;
-    /// 
+    ///
     /// fn main() -> Result<(), Box<dyn Error>> {
     ///     let w = WeightedUniform::from_max_pool_1d(f, (-2f64, 3f64), 10, 1e-3)?;
     ///     w.weights().print();
     ///
     ///     Ok(())
     /// }
-    /// 
+    ///
     /// fn f(x: f64) -> f64 {
     ///     if x.abs() < 1f64 {
     ///         1f64 - x.abs()
@@ -371,40 +372,39 @@ impl WeightedUniform<f64> {
     /// }
     /// ```
     pub fn from_max_pool_1d<F>(f: F, (a, b): (f64, f64), n: usize, eps: f64) -> Result<Self>
-    where F: Fn(f64) -> f64 + Copy {
+    where
+        F: Fn(f64) -> f64 + Copy,
+    {
         // Find non-zero intervals
         let mut a = a;
         let mut b = b;
         let trial = seq(a, b, eps);
-        for i in 0 .. trial.len() {
+        for i in 0..trial.len() {
             let x = trial[i];
             if f(x) > 0f64 {
-                a = if i > 0 { trial[i-1] } else { x };
+                a = if i > 0 { trial[i - 1] } else { x };
                 break;
             }
         }
-        for i in (0 .. trial.len()).rev() {
+        for i in (0..trial.len()).rev() {
             let x = trial[i];
             if f(x) > 0f64 {
-                b = if i < trial.len() - 1 { trial[i+1] } else { x };
+                b = if i < trial.len() - 1 { trial[i + 1] } else { x };
                 break;
             }
         }
         if a >= b {
             bail!(NoNonZeroIntervalError);
         }
-        let domain = linspace(a, b, n+1);
+        let domain = linspace(a, b, n + 1);
 
         // Find intervals
         let intervals = auto_zip(&domain);
 
         // Find weights
-        let weights: Vec<f64> = intervals.iter()
-            .map(
-                |(a, b)| {
-                    seq(*a, *b+eps, eps).reduce(0f64, |acc, x| acc.max(f(x)))
-                }
-            )
+        let weights: Vec<f64> = intervals
+            .iter()
+            .map(|(a, b)| seq(*a, *b + eps, eps).reduce(0f64, |acc, x| acc.max(f(x))))
             .collect();
 
         Self::new(weights, domain)
@@ -419,11 +419,19 @@ impl WeightedUniform<f64> {
     }
 
     pub fn domain_linspace(&self, n: usize) -> Vec<f64> {
-        linspace(self.intervals[0].0, self.intervals[self.intervals.len()-1].1, n)
+        linspace(
+            self.intervals[0].0,
+            self.intervals[self.intervals.len() - 1].1,
+            n,
+        )
     }
 
     pub fn domain_seq(&self, step: f64) -> Vec<f64> {
-        seq(self.intervals[0].0, self.intervals[self.intervals.len()-1].1, step)
+        seq(
+            self.intervals[0].0,
+            self.intervals[self.intervals.len() - 1].1,
+            step,
+        )
     }
 
     pub fn sum(&self) -> f64 {
@@ -433,15 +441,19 @@ impl WeightedUniform<f64> {
     pub fn update_weights(&mut self, weights: Vec<f64>) {
         assert_eq!(self.intervals.len(), weights.len());
         self.weights = weights;
-        self.sum = self.weights.iter()
+        self.sum = self
+            .weights
+            .iter()
             .zip(self.intervals.iter())
             .fold(0f64, |acc, (w, (a, b))| acc + w * (b - a));
     }
 
     pub fn update_intervals(&mut self, intervals: Vec<f64>) {
-        assert_eq!(self.weights.len()+1, intervals.len());
+        assert_eq!(self.weights.len() + 1, intervals.len());
         self.intervals = auto_zip(&intervals);
-        self.sum = self.weights.iter()
+        self.sum = self
+            .weights
+            .iter()
             .zip(self.intervals.iter())
             .fold(0f64, |acc, (w, (a, b))| acc + w * (b - a));
     }
@@ -493,7 +505,11 @@ impl<T: PartialOrd + SampleUniform + Copy + Into<f64>> ParametricDist for Weight
 
     fn params(&self) -> Self::Parameter {
         let weights = self.weights.iter().map(|x| (*x).into()).collect();
-        let intervals = self.intervals.iter().map(|(x, y)| ((*x).into(), (*y).into())).collect();
+        let intervals = self
+            .intervals
+            .iter()
+            .map(|(x, y)| ((*x).into(), (*y).into()))
+            .collect();
         (weights, intervals)
     }
 }
@@ -619,11 +635,7 @@ impl<T: PartialOrd + SampleUniform + Copy + Into<f64>> RNG for TPDist<T> {
 
             Binomial(num, mu) => {
                 let binom = rand_distr::Binomial::new(*num as u64, (*mu).into()).unwrap();
-                binom
-                    .sample_iter(rng)
-                    .take(n)
-                    .map(|t| t as f64)
-                    .collect()
+                binom.sample_iter(rng).take(n).map(|t| t as f64).collect()
             }
 
             Normal(m, s) => {
@@ -789,10 +801,12 @@ impl RNG for WeightedUniform<f64> {
         let ics: Vec<usize> = w.sample_iter(&mut rng_clip).take(n).collect();
         *rng = rng_clip;
 
-        ics.into_iter().map(|idx| {
-            let (l, r) = self.intervals[idx];
-            rng.gen_range(l ..=r)
-        }).collect::<Vec<f64>>()
+        ics.into_iter()
+            .map(|idx| {
+                let (l, r) = self.intervals[idx];
+                rng.gen_range(l..=r)
+            })
+            .collect::<Vec<f64>>()
     }
 
     fn pdf<S: PartialOrd + SampleUniform + Copy + Into<f64>>(&self, x: S) -> f64 {
@@ -812,11 +826,11 @@ impl RNG for WeightedUniform<f64> {
             return 1f64;
         }
         let idx = find_interval(self.intervals(), x);
-        self.weights[0 ..=idx].iter()
-            .zip(self.intervals[0 ..=idx].iter())
-            .fold(0f64, |acc, (w, (a, b))| {
-                acc + w * (b - a)
-            }) / self.sum
+        self.weights[0..=idx]
+            .iter()
+            .zip(self.intervals[0..=idx].iter())
+            .fold(0f64, |acc, (w, (a, b))| acc + w * (b - a))
+            / self.sum
     }
 }
 
@@ -912,16 +926,23 @@ impl Statistics for WeightedUniform<f64> {
     type Value = f64;
 
     fn mean(&self) -> Self::Value {
-        self.intervals().iter().zip(self.weights().iter())
-            .map(|((l, r), w)| (r.powi(2)-l.powi(2))/2f64 * w)
-            .sum::<f64>() / self.sum
+        self.intervals()
+            .iter()
+            .zip(self.weights().iter())
+            .map(|((l, r), w)| (r.powi(2) - l.powi(2)) / 2f64 * w)
+            .sum::<f64>()
+            / self.sum
     }
 
     fn var(&self) -> Self::Value {
         let mean = self.mean();
-        self.intervals().iter().zip(self.weights().iter())
+        self.intervals()
+            .iter()
+            .zip(self.weights().iter())
             .map(|((l, r), w)| w * (r.powi(3) - l.powi(3)) / 3f64)
-            .sum::<f64>() / self.sum - mean * mean
+            .sum::<f64>()
+            / self.sum
+            - mean * mean
     }
 
     fn sd(&self) -> Self::Value {
diff --git a/src/statistics/rand.rs b/src/statistics/rand.rs
index 36590085..1e577fbe 100644
--- a/src/statistics/rand.rs
+++ b/src/statistics/rand.rs
@@ -24,9 +24,9 @@ extern crate rand;
 use self::rand::distributions::uniform::SampleUniform;
 use self::rand::prelude::*;
 
+use crate::statistics::dist::{WeightedUniform, RNG};
 #[allow(unused_imports)]
 use crate::structure::matrix::*;
-use crate::statistics::dist::{RNG, WeightedUniform};
 
 /// Small random number generator from seed
 ///
@@ -466,7 +466,9 @@ pub fn ziggurat(rng: &mut ThreadRng, sigma: f64) -> f64 {
 ///     Ok(())
 /// }
 pub fn prs<F>(f: F, n: usize, (a, b): (f64, f64), m: usize, eps: f64) -> anyhow::Result<Vec<f64>>
-where F: Fn(f64) -> f64 + Copy {
+where
+    F: Fn(f64) -> f64 + Copy,
+{
     let mut rng = thread_rng();
 
     let mut result = vec![0f64; n];
@@ -482,7 +484,7 @@ where F: Fn(f64) -> f64 + Copy {
             if weight <= 0f64 {
                 continue;
             } else {
-                let y = rng.gen_range(0f64 ..=weight);
+                let y = rng.gen_range(0f64..=weight);
 
                 if y <= f(x) {
                     result[n - left_num] = x;
@@ -527,8 +529,17 @@ where F: Fn(f64) -> f64 + Copy {
 ///
 ///     Ok(())
 /// }
-pub fn prs_with_rng<F, R: Rng + Clone>(f: F, n: usize, (a, b): (f64, f64), m: usize, eps: f64, rng: &mut R) -> anyhow::Result<Vec<f64>>
-    where F: Fn(f64) -> f64 + Copy {
+pub fn prs_with_rng<F, R: Rng + Clone>(
+    f: F,
+    n: usize,
+    (a, b): (f64, f64),
+    m: usize,
+    eps: f64,
+    rng: &mut R,
+) -> anyhow::Result<Vec<f64>>
+where
+    F: Fn(f64) -> f64 + Copy,
+{
     let mut result = vec![0f64; n];
 
     let w = WeightedUniform::from_max_pool_1d(f, (a, b), m, eps)?;
@@ -542,7 +553,7 @@ pub fn prs_with_rng<F, R: Rng + Clone>(f: F, n: usize, (a, b): (f64, f64), m: us
             if weight <= 0f64 {
                 continue;
             } else {
-                let y = rng.gen_range(0f64 ..=weight);
+                let y = rng.gen_range(0f64..=weight);
 
                 if y <= f(x) {
                     result[n - left_num] = x;
diff --git a/src/structure/matrix.rs b/src/structure/matrix.rs
index 797792ca..226e7958 100644
--- a/src/structure/matrix.rs
+++ b/src/structure/matrix.rs
@@ -192,7 +192,7 @@
 //! To write columns or rows, `DataFrame` and `nc` feature could be the best choice.
 //!
 //! * `nc` feature should be required - `netcdf` or `libnetcdf` are prerequisites.
-//! 
+//!
 //! ```no_run
 //! #[macro_use]
 //! extern crate peroxide;
@@ -596,10 +596,10 @@
 //!     }
 //!     ```
 
-#[cfg(feature="csv")]
+#[cfg(feature = "csv")]
 extern crate csv;
 
-#[cfg(feature="csv")]
+#[cfg(feature = "csv")]
 use self::csv::{ReaderBuilder, StringRecord, WriterBuilder};
 
 #[cfg(feature = "O3")]
@@ -609,30 +609,32 @@ extern crate lapack;
 #[cfg(feature = "O3")]
 use blas::{daxpy, dgemm, dgemv};
 #[cfg(feature = "O3")]
-use lapack::{dgecon, dgeqrf, dgetrf, dgetri, dgetrs, dorgqr, dgesvd, dpotrf};
+use lapack::{dgecon, dgeqrf, dgesvd, dgetrf, dgetri, dgetrs, dorgqr, dpotrf};
+use rayon::iter::{IntoParallelIterator, IntoParallelRefIterator, ParallelIterator};
 #[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};
 
 pub use self::Shape::{Col, Row};
 use crate::numerical::eigen::{eigen, EigenMethod};
+use crate::structure::dataframe::{Series, TypedVector};
+use crate::traits::math::{ParallelInnerProduct, ParallelNormed};
+use crate::traits::sugar::ScalableMut;
 use crate::traits::{
-    general::Algorithm,
     fp::{FPMatrix, FPVector},
+    general::Algorithm,
     math::{InnerProduct, LinearOp, MatrixProduct, Norm, Normed, Vector},
     mutable::MutMatrix,
 };
 use crate::util::{
-    low_level::{swap_vec_ptr, copy_vec_ptr},
+    low_level::{copy_vec_ptr, swap_vec_ptr},
     non_macro::{cbind, eye, rbind, zeros},
     useful::{nearly_eq, tab},
 };
-use crate::structure::dataframe::{Series, TypedVector};
+use peroxide_num::{ExpLogOps, Numeric, PowOps, TrigOps};
 use std::cmp::{max, min};
 pub use std::error::Error;
 use std::fmt;
 use std::ops::{Add, Div, Index, IndexMut, Mul, Neg, Sub};
-use crate::traits::sugar::ScalableMut;
-use peroxide_num::{ExpLogOps, PowOps, TrigOps, Numeric};
 
 pub type Perms = Vec<(usize, usize)>;
 
@@ -1143,7 +1145,7 @@ impl Matrix {
     ///     Ok(())
     /// }
     /// ```
-    #[cfg(feature="csv")]
+    #[cfg(feature = "csv")]
     pub fn write(&self, file_path: &str) -> Result<(), Box<dyn Error>> {
         let mut wtr = WriterBuilder::new().from_path(file_path)?;
         let r = self.row;
@@ -1174,7 +1176,7 @@ impl Matrix {
     ///     Ok(())
     /// }
     /// ```
-    #[cfg(feature="csv")]
+    #[cfg(feature = "csv")]
     pub fn write_round(&self, file_path: &str, round: usize) -> Result<(), Box<dyn Error>> {
         let mut wtr = WriterBuilder::new().from_path(file_path)?;
         let r = self.row;
@@ -1190,7 +1192,7 @@ impl Matrix {
         Ok(())
     }
 
-    #[cfg(feature="csv")]
+    #[cfg(feature = "csv")]
     pub fn write_with_header(
         &self,
         file_path: &str,
@@ -1212,7 +1214,7 @@ impl Matrix {
         Ok(())
     }
 
-    #[cfg(feature="csv")]
+    #[cfg(feature = "csv")]
     pub fn write_with_header_round(
         &self,
         file_path: &str,
@@ -1260,7 +1262,7 @@ impl Matrix {
     ///     Ok(())
     /// }
     /// ```
-    #[cfg(feature="csv")]
+    #[cfg(feature = "csv")]
     pub fn read(file_path: &str, header: bool, delimiter: char) -> Result<Matrix, Box<dyn Error>> {
         let mut rdr = ReaderBuilder::new()
             .has_headers(header)
@@ -1340,6 +1342,27 @@ impl Matrix {
         mat
     }
 
+    /// From index operations in parallel
+    pub fn par_from_index<F, G>(f: F, size: (usize, usize)) -> Matrix
+    where
+        F: Fn(usize, usize) -> G + Copy + Send + Sync,
+        G: Into<f64>,
+    {
+        let row = size.0;
+        let col = size.1;
+
+        let data = (0..row)
+            .into_par_iter()
+            .flat_map(|i| {
+                (0..col)
+                    .into_par_iter()
+                    .map(|j| f(i, j).into())
+                    .collect::<Vec<f64>>()
+            })
+            .collect::<Vec<f64>>();
+        matrix(data, row, col, Row)
+    }
+
     /// Matrix to `Vec<Vec<f64>>`
     ///
     /// To send `Matrix` to `inline-python`
@@ -1365,7 +1388,7 @@ impl Matrix {
     ///
     /// # Description
     /// Return below elements of matrix to new matrix
-    /// 
+    ///
     /// $$
     /// \begin{pmatrix}
     /// \\ddots & & & & \\\\
@@ -1380,7 +1403,7 @@ impl Matrix {
     /// ```
     /// extern crate peroxide;
     /// use peroxide::fuga::*;
-    /// 
+    ///
     /// fn main() {
     ///     let a = ml_matrix("1 2 3;4 5 6;7 8 9");
     ///     let b = ml_matrix("5 6;8 9");
@@ -1392,8 +1415,8 @@ impl Matrix {
         let row = end.0 - start.0 + 1;
         let col = end.1 - start.1 + 1;
         let mut result = matrix(vec![0f64; row * col], row, col, self.shape);
-        for i in 0 .. row {
-            for j in 0 .. col {
+        for i in 0..row {
+            for j in 0..col {
                 result[(i, j)] = self[(start.0 + i, start.1 + j)];
             }
         }
@@ -1404,7 +1427,7 @@ impl Matrix {
     ///
     /// # Description
     /// Substitute below elements of matrix
-    /// 
+    ///
     /// $$
     /// \begin{pmatrix}
     /// \\ddots & & & & \\\\
@@ -1414,12 +1437,12 @@ impl Matrix {
     ///   & & & & \\ddots
     /// \end{pmatrix}
     /// $$
-    /// 
+    ///
     /// # Examples
     /// ```
     /// extern crate peroxide;
     /// use peroxide::fuga::*;
-    /// 
+    ///
     /// fn main() {
     ///     let mut a = ml_matrix("1 2 3;4 5 6;7 8 9");
     ///     let b = ml_matrix("1 2;3 4");
@@ -1431,8 +1454,8 @@ impl Matrix {
     pub fn subs_mat(&mut self, start: (usize, usize), end: (usize, usize), m: &Matrix) {
         let row = end.0 - start.0 + 1;
         let col = end.1 - start.1 + 1;
-        for i in 0 .. row {
-            for j in 0 .. col {
+        for i in 0..row {
+            for j in 0..col {
                 self[(start.0 + i, start.1 + j)] = m[(i, j)];
             }
         }
@@ -1616,6 +1639,71 @@ impl Normed for Matrix {
     }
 }
 
+impl ParallelNormed for Matrix {
+    type UnsignedScalar = f64;
+
+    fn par_norm(&self, kind: Norm) -> f64 {
+        match kind {
+            Norm::F => {
+                let mut s = 0f64;
+                for i in 0..self.data.len() {
+                    s += self.data[i].powi(2);
+                }
+                s.sqrt()
+            }
+            Norm::Lpq(p, q) => {
+                let s = (0..self.col)
+                    .into_par_iter()
+                    .map(|j| {
+                        let s_row = (0..self.row)
+                            .into_iter()
+                            .map(|i| self[(i, j)].powi(p as i32))
+                            .sum::<f64>();
+                        s_row.powf(q / p)
+                    })
+                    .sum::<f64>();
+                s.powf(1f64 / q)
+            }
+            Norm::L1 => {
+                let mut m = f64::MIN;
+                match self.shape {
+                    Row => self.change_shape().norm(Norm::L1),
+                    Col => {
+                        for c in 0..self.col {
+                            let s = self.col(c).par_iter().sum();
+                            if s > m {
+                                m = s;
+                            }
+                        }
+                        m
+                    }
+                }
+            }
+            Norm::LInf => {
+                let mut m = f64::MIN;
+                match self.shape {
+                    Col => self.change_shape().norm(Norm::LInf),
+                    Row => {
+                        for r in 0..self.row {
+                            let s = self.row(r).iter().sum();
+                            if s > m {
+                                m = s;
+                            }
+                        }
+                        m
+                    }
+                }
+            }
+            Norm::L2 => {
+                let a = &self.t() * self;
+                let eig = eigen(&a, EigenMethod::Jacobi);
+                eig.eigenvalue.norm(Norm::LInf)
+            }
+            Norm::Lp(_) => unimplemented!(),
+        }
+    }
+}
+
 /// Frobenius inner product
 impl InnerProduct for Matrix {
     fn dot(&self, rhs: &Self) -> f64 {
@@ -1627,6 +1715,17 @@ impl InnerProduct for Matrix {
     }
 }
 
+/// Frobenius inner product in parallel
+impl ParallelInnerProduct for Matrix {
+    fn par_dot(&self, rhs: &Self) -> f64 {
+        if self.shape == rhs.shape {
+            self.data.par_dot(&rhs.data)
+        } else {
+            self.data.par_dot(&rhs.change_shape().data)
+        }
+    }
+}
+
 /// TODO: Transpose
 
 /// Matrix as Linear operator for Vector
@@ -2918,7 +3017,7 @@ impl SVD {
 
     pub fn s_mat(&self) -> Matrix {
         let mut mat = zeros(self.u.col, self.vt.row);
-        for i in 0 .. mat.row.min(mat.col) {
+        for i in 0..mat.row.min(mat.col) {
             mat[(i, i)] = self.s[i];
         }
         mat
@@ -2961,11 +3060,7 @@ impl SVD {
             }
         }
 
-        SVD {
-            s,
-            u,
-            vt,
-        }
+        SVD { s, u, vt }
     }
 }
 
@@ -3151,7 +3246,7 @@ impl LinearAlgebra for Matrix {
     #[allow(non_snake_case)]
     fn qr(&self) -> QR {
         match () {
-            #[cfg(feature="O3")]
+            #[cfg(feature = "O3")]
             () => {
                 let opt_dgeqrf = lapack_dgeqrf(self);
                 match opt_dgeqrf {
@@ -3159,10 +3254,7 @@ impl LinearAlgebra for Matrix {
                     Some(dgeqrf) => {
                         let q = dgeqrf.get_Q();
                         let r = dgeqrf.get_R();
-                        QR {
-                            q,
-                            r
-                        }
+                        QR { q, r }
                     }
                 }
             }
@@ -3209,7 +3301,7 @@ impl LinearAlgebra for Matrix {
     /// ```
     fn svd(&self) -> SVD {
         match () {
-            #[cfg(feature="O3")]
+            #[cfg(feature = "O3")]
             () => {
                 let opt_dgesvd = lapack_dgesvd(self);
                 match opt_dgesvd {
@@ -3218,14 +3310,12 @@ impl LinearAlgebra for Matrix {
                         SVD_STATUS::Diverge(i) => {
                             panic!("Divergent occurs in SVD - {} iterations", i)
                         }
-                        SVD_STATUS::Success => {
-                            SVD {
-                                s: dgesvd.s,
-                                u: dgesvd.u,
-                                vt: dgesvd.vt,
-                            }
-                        }
-                    }
+                        SVD_STATUS::Success => SVD {
+                            s: dgesvd.s,
+                            u: dgesvd.u,
+                            vt: dgesvd.vt,
+                        },
+                    },
                 }
             }
             _ => {
@@ -3465,7 +3555,9 @@ impl LinearAlgebra for Matrix {
                     None => panic!("Singular matrix (opt_dgrf)"),
                     Some(dgrf) => match dgrf.status {
                         LAPACK_STATUS::NonSingular => lapack_dgetri(&dgrf).unwrap(),
-                        LAPACK_STATUS::Singular => panic!("Singular matrix (LAPACK_STATUS Singular)"),
+                        LAPACK_STATUS::Singular => {
+                            panic!("Singular matrix (LAPACK_STATUS Singular)")
+                        }
                     },
                 }
             }
@@ -3500,13 +3592,13 @@ impl LinearAlgebra for Matrix {
     /// ```
     fn pseudo_inv(&self) -> Self {
         match () {
-            #[cfg(feature="O3")]
+            #[cfg(feature = "O3")]
             () => {
                 let svd = self.svd();
                 let row = svd.vt.row;
                 let col = svd.u.row;
                 let mut sp = zeros(row, col);
-                for i in 0 .. row.min(col) {
+                for i in 0..row.min(col) {
                     sp[(i, i)] = 1f64 / svd.s[i];
                 }
                 svd.vt.t() * sp * svd.u.t()
@@ -3620,9 +3712,9 @@ impl LinearAlgebra for Matrix {
             return false;
         }
 
-        for i in 0 .. self.row {
-            for j in i .. self.col {
-                if !nearly_eq(self[(i,j)], self[(j,i)]) {
+        for i in 0..self.row {
+            for j in i..self.col {
+                if !nearly_eq(self[(i, j)], self[(j, i)]) {
                     return false;
                 }
             }
@@ -3722,7 +3814,6 @@ impl Div<Matrix> for Matrix {
     }
 }
 
-
 impl ExpLogOps for Matrix {
     type Float = f64;
     fn exp(&self) -> Self {
@@ -4248,7 +4339,7 @@ pub enum POSITIVE_STATUS {
 #[derive(Debug, Copy, Clone, Eq, PartialEq)]
 pub enum UPLO {
     Upper,
-    Lower
+    Lower,
 }
 
 /// Temporary data structure from `dgetrf`
@@ -4279,7 +4370,7 @@ pub struct DGESVD {
 pub struct DPOTRF {
     pub fact_mat: Matrix,
     pub uplo: UPLO,
-    pub status: POSITIVE_STATUS
+    pub status: POSITIVE_STATUS,
 }
 
 ///// Temporary data structure from `dgeev`
@@ -4473,7 +4564,22 @@ pub fn lapack_dgesvd(mat: &Matrix) -> Option<DGESVD> {
 
             // Workspace query
             unsafe {
-                dgesvd(jobu, jobvt, m, n, &mut A.data, lda, &mut s, &mut u, ldu, &mut vt, ldvt, &mut work, lwork, &mut info);
+                dgesvd(
+                    jobu,
+                    jobvt,
+                    m,
+                    n,
+                    &mut A.data,
+                    lda,
+                    &mut s,
+                    &mut u,
+                    ldu,
+                    &mut vt,
+                    ldvt,
+                    &mut work,
+                    lwork,
+                    &mut info,
+                );
             }
 
             let optimal_lwork = work[0] as usize;
@@ -4532,35 +4638,23 @@ pub fn lapack_dpotrf(mat: &Matrix, UPLO: UPLO) -> Option<DPOTRF> {
             let mut info = 0i32;
             let uplo = match UPLO {
                 UPLO::Upper => b'U',
-                UPLO::Lower => b'L'
+                UPLO::Lower => b'L',
             };
 
-            unsafe {
-                dpotrf(
-                    uplo,
-                    N,
-                    &mut A.data,
-                    lda,
-                    &mut info,
-                )
-            }
+            unsafe { dpotrf(uplo, N, &mut A.data, lda, &mut info) }
 
             if info == 0 {
-                Some(
-                    DPOTRF {
-                        fact_mat: matrix(A.data, mat.row, mat.col, Col),
-                        uplo: UPLO,
-                        status: POSITIVE_STATUS::Success
-                    }
-                )
+                Some(DPOTRF {
+                    fact_mat: matrix(A.data, mat.row, mat.col, Col),
+                    uplo: UPLO,
+                    status: POSITIVE_STATUS::Success,
+                })
             } else if info > 0 {
-                Some(
-                    DPOTRF {
-                        fact_mat: matrix(A.data, mat.row, mat.col, Col),
-                        uplo: UPLO,
-                        status: POSITIVE_STATUS::Failed(info)
-                    }
-                )
+                Some(DPOTRF {
+                    fact_mat: matrix(A.data, mat.row, mat.col, Col),
+                    uplo: UPLO,
+                    status: POSITIVE_STATUS::Failed(info),
+                })
             } else {
                 None
             }
@@ -4674,8 +4768,8 @@ impl DPOTRF {
         let mat = &self.fact_mat;
         let n = mat.col;
         let mut result = matrix(vec![0f64; n.pow(2)], n, n, mat.shape);
-        for i in 0 .. n {
-            for j in i .. n {
+        for i in 0..n {
+            for j in i..n {
                 result[(i, j)] = mat[(i, j)];
             }
         }
@@ -4691,8 +4785,8 @@ impl DPOTRF {
         let n = mat.col;
         let mut result = matrix(vec![0f64; n.pow(2)], n, n, mat.shape);
 
-        for i in 0 .. n {
-            for j in 0 .. i+1 {
+        for i in 0..n {
+            for j in 0..i + 1 {
                 result[(i, j)] = mat[(i, j)];
             }
         }
diff --git a/src/structure/vector.rs b/src/structure/vector.rs
index 0e977361..8d584942 100644
--- a/src/structure/vector.rs
+++ b/src/structure/vector.rs
@@ -267,7 +267,12 @@ extern crate blas;
 #[cfg(feature = "O3")]
 use blas::{daxpy, ddot, dnrm2, idamax};
 
+use crate::rayon::prelude::*;
+
 use crate::structure::matrix::{matrix, Matrix, Row};
+use crate::traits::fp::ParallelFPVector;
+use crate::traits::math::{ParallelInnerProduct, ParallelNormed, ParallelVectorProduct};
+use crate::traits::mutable::ParallelMutFP;
 use crate::traits::{
     fp::FPVector,
     general::Algorithm,
@@ -318,7 +323,7 @@ impl FPVector for Vec<f64> {
     fn reduce<F, T>(&self, init: T, f: F) -> f64
     where
         F: Fn(f64, f64) -> f64,
-        T: Into<f64>,
+        T: Into<f64> + Copy,
     {
         self.iter().fold(init.into(), |x, &y| f(x, y))
     }
@@ -409,6 +414,93 @@ impl FPVector for Vec<f64> {
     }
 }
 
+impl ParallelFPVector for Vec<f64> {
+    type Scalar = f64;
+
+    /// par_fmap for `Vec<f64>`
+    ///
+    /// # Examples
+    /// ```
+    /// #[macro_use]
+    /// extern crate peroxide;
+    /// use peroxide::fuga::*;
+    /// use peroxide::traits::fp::ParallelFPVector;
+    ///
+    /// fn main() {
+    ///     let a = c!(1,2,3,4,5);
+    ///     assert_eq!(a.par_fmap(|x| x*2f64), seq!(2,10,2));
+    /// }
+    /// ```
+    fn par_fmap<F>(&self, f: F) -> Vec<f64>
+    where
+        F: Fn(f64) -> f64 + Sync + Send,
+    {
+        let mut v = self.clone();
+        v.par_iter_mut().for_each(|x| *x = f(*x));
+        v
+    }
+
+    /// reduce for `Vec<f64>`
+    ///
+    /// # Examples
+    /// ```
+    /// #[macro_use]
+    /// extern crate peroxide;
+    /// use peroxide::fuga::*;
+    /// use peroxide::traits::fp::ParallelFPVector;
+    ///
+    /// fn main() {
+    ///     let a = seq!(1,100,1);
+    ///     assert_eq!(a.par_reduce(0, |x,y| x + y), 5050f64);
+    /// }
+    /// ```
+    fn par_reduce<F, T>(&self, _init: T, f: F) -> f64
+    where
+        F: Fn(f64, f64) -> f64 + Send + Sync,
+        T: Into<f64> + Send + Sync + Copy,
+    {
+        self.par_iter()
+            .cloned()
+            .reduce_with(|x, y| f(x, y))
+            .expect("Unable to perform parallel reduce operation")
+    }
+
+    fn par_zip_with<F>(&self, f: F, other: &Vec<f64>) -> Vec<f64>
+    where
+        F: Fn(f64, f64) -> f64 + Send + Sync,
+    {
+        self.par_iter()
+            .zip(other)
+            .map(|(x, y)| f(*x, *y))
+            .collect::<Vec<f64>>()
+    }
+
+    /// Filter for `Vec<f64>`
+    ///
+    /// # Examples
+    /// ```
+    /// #[macro_use]
+    /// extern crate peroxide;
+    /// use peroxide::fuga::*;
+    /// use peroxide::traits::fp::ParallelFPVector;
+    ///
+    /// fn main() {
+    ///     let a = c!(1,2,3,4,5);
+    ///     let b = a.par_filter(|x| x > 3.);
+    ///     assert_eq!(b, c!(4,5));
+    /// }
+    /// ```
+    fn par_filter<F>(&self, f: F) -> Vec<f64>
+    where
+        F: Fn(f64) -> bool + Send + Sync,
+    {
+        self.clone()
+            .into_par_iter()
+            .filter(|x| f(*x))
+            .collect::<Vec<f64>>()
+    }
+}
+
 /// Explicit version of `map`
 pub fn map<F, T>(f: F, xs: &[T]) -> Vec<T>
 where
@@ -423,6 +515,17 @@ where
     result
 }
 
+/// Explicit version of `map` in parallel
+pub fn par_map<F, T>(f: F, xs: &[T]) -> Vec<T>
+where
+    F: Fn(T) -> T + Send + Sync,
+    T: Default + Copy + Send + Sync,
+{
+    let mut result = vec![T::default(); xs.len()];
+    result.par_iter_mut().for_each(|x| *x = f(*x));
+    result
+}
+
 /// Explicit version of `reduce`
 pub fn reduce<F, T>(f: F, init: T, xs: &[T]) -> T
 where
@@ -434,6 +537,7 @@ where
         s = f(s, x);
     }
     s
+    // Parallel version: !unimplemented()
 }
 
 /// Explicit version of `zip_with`
@@ -450,6 +554,18 @@ where
     result
 }
 
+/// Explicit version of `zip_with` in parallel
+pub fn par_zip_with<F, T>(f: F, xs: &[T], ys: &[T]) -> Vec<T>
+where
+    F: Fn(T, T) -> T + Send + Sync,
+    T: Default + Copy + Send + Sync,
+{
+    xs.par_iter()
+        .zip(ys.into_par_iter())
+        .map(|(x, y)| f(*x, *y))
+        .collect::<Vec<T>>()
+}
+
 impl MutFP for Vec<f64> {
     type Scalar = f64;
 
@@ -472,6 +588,26 @@ impl MutFP for Vec<f64> {
     }
 }
 
+impl ParallelMutFP for Vec<f64> {
+    type Scalar = f64;
+
+    fn par_mut_map<F>(&mut self, f: F)
+    where
+        F: Fn(Self::Scalar) -> Self::Scalar + Send + Sync,
+    {
+        self.par_iter_mut().for_each(|x| *x = f(*x));
+    }
+
+    fn par_mut_zip_with<F>(&mut self, f: F, other: &Self)
+    where
+        F: Fn(Self::Scalar, Self::Scalar) -> Self::Scalar + Send + Sync,
+    {
+        self.par_iter_mut()
+            .zip(other.into_par_iter())
+            .for_each(|(x, y)| *x = f(*x, *y));
+    }
+}
+
 impl Algorithm for Vec<f64> {
     /// Assign rank
     ///
@@ -693,6 +829,51 @@ impl Normed for Vec<f64> {
     }
 }
 
+impl ParallelNormed for Vec<f64> {
+    type UnsignedScalar = f64;
+
+    fn par_norm(&self, kind: Norm) -> f64 {
+        match kind {
+            Norm::L1 => self.iter().map(|x| x.abs()).sum(),
+            Norm::L2 => {
+                #[cfg(feature = "O3")]
+                {
+                    let n_i32 = self.len() as i32;
+                    let res: f64;
+                    unsafe {
+                        res = dnrm2(n_i32, self, 1);
+                    }
+                    res
+                }
+                #[cfg(not(feature = "O3"))]
+                {
+                    self.par_iter()
+                        .map(|x| x.powi(2))
+                        .sum::<Self::UnsignedScalar>()
+                        .sqrt()
+                }
+            }
+            Norm::Lp(p) => {
+                assert!(
+                    p >= 1f64,
+                    "lp norm is only defined for p>=1, the given value was p={}",
+                    p
+                );
+                self.par_iter()
+                    .map(|x| x.powf(p))
+                    .sum::<Self::UnsignedScalar>()
+                    .powf(1_f64 / p)
+            }
+            Norm::LInf => self
+                .par_iter()
+                .fold(|| 0f64, |x, y| x.max(y.abs()))
+                .reduce(|| 0f64, |acc1, acc2| acc1.max(acc2.abs())),
+            Norm::F => unimplemented!(),
+            Norm::Lpq(_, _) => unimplemented!(),
+        }
+    }
+}
+
 impl InnerProduct for Vec<f64> {
     fn dot(&self, rhs: &Self) -> f64 {
         #[cfg(feature = "O3")]
@@ -713,6 +894,27 @@ impl InnerProduct for Vec<f64> {
     }
 }
 
+impl ParallelInnerProduct for Vec<f64> {
+    fn par_dot(&self, rhs: &Self) -> f64 {
+        #[cfg(feature = "O3")]
+        {
+            let n_i32 = self.len() as i32;
+            let res: f64;
+            unsafe {
+                res = ddot(n_i32, self, 1, rhs, 1);
+            }
+            res
+        }
+        #[cfg(not(feature = "O3"))]
+        {
+            self.par_iter()
+                .zip(rhs.into_par_iter())
+                .fold(|| 0_f64, |x, (y1, y2)| x + y1 * y2)
+                .sum::<f64>()
+        }
+    }
+}
+
 impl LinearOp<Vec<f64>, f64> for Vec<f64> {
     fn apply(&self, rhs: &Vec<f64>) -> f64 {
         self.dot(rhs)
@@ -729,18 +931,22 @@ impl VectorProduct for Vec<f64> {
     fn cross(&self, other: &Self) -> Self {
         assert_eq!(self.len(), other.len());
         // 2D cross product is ill-defined
-        if self.len() == 2 {
-            let mut v = vec![0f64; 1];
-            v[0] = self[0] * other[1] - self[1] * other[0];
-            v
-        } else if self.len() == 3 {
-            let mut v = vec![0f64; 3];
-            v[0] = self[1] * other[2] - self[2] * other[1];
-            v[1] = self[2] * other[0] - self[0] * other[2];
-            v[2] = self[0] * other[1] - self[1] * other[0];
-            v
-        } else {
-            panic!("Cross product can be defined only in 2 or 3 dimension")
+        match self.len() {
+            2 => {
+                let mut v = vec![0f64; 1];
+                v[0] = self[0] * other[1] - self[1] * other[0];
+                v
+            }
+            3 => {
+                let mut v = vec![0f64; 3];
+                v[0] = self[1] * other[2] - self[2] * other[1];
+                v[1] = self[2] * other[0] - self[0] * other[2];
+                v[2] = self[0] * other[1] - self[1] * other[0];
+                v
+            }
+            _ => {
+                panic!("Cross product can be defined only in 2 or 3 dimension")
+            }
         }
     }
 
@@ -751,6 +957,33 @@ impl VectorProduct for Vec<f64> {
     }
 }
 
+impl ParallelVectorProduct for Vec<f64> {
+    fn par_cross(&self, other: &Self) -> Self {
+        assert_eq!(self.len(), other.len());
+        // 2D cross product is ill-defined
+        match self.len() {
+            2 => {
+                let mut v = vec![0f64; 1];
+                v[0] = self[0] * other[1] - self[1] * other[0];
+                v
+            }
+            3 => {
+                let v = (0..3)
+                    .into_par_iter()
+                    .map(|index| {
+                        self[(index + 1) % 3] * other[(index + 2) % 3]
+                            - self[(index + 2) % 3] * other[(index + 1) % 3]
+                    })
+                    .collect::<Vec<f64>>();
+                v
+            }
+            _ => {
+                panic!("Cross product can be defined only in 2 or 3 dimension")
+            }
+        }
+    }
+}
+
 // /// Convenient Vec<f64> Operations (No Clone, No Copy)
 // impl VecOps for Vec<f64> {
 //     fn s_add(&self, scala: f64) -> Self {
diff --git a/src/traits/fp.rs b/src/traits/fp.rs
index 41f11463..07782cb0 100644
--- a/src/traits/fp.rs
+++ b/src/traits/fp.rs
@@ -10,7 +10,7 @@ pub trait FPVector {
     fn reduce<F, T>(&self, init: T, f: F) -> Self::Scalar
     where
         F: Fn(Self::Scalar, Self::Scalar) -> Self::Scalar,
-        T: Into<Self::Scalar>;
+        T: Into<Self::Scalar> + Copy;
     fn zip_with<F>(&self, f: F, other: &Self) -> Self
     where
         F: Fn(Self::Scalar, Self::Scalar) -> Self::Scalar;
@@ -47,7 +47,7 @@ pub trait FPMatrix {
     fn reduce<F, T>(&self, init: T, f: F) -> f64
     where
         F: Fn(f64, f64) -> f64,
-        T: Into<f64>;
+        T: Into<f64> + Copy + Clone;
     fn zip_with<F>(&self, f: F, other: &Matrix) -> Matrix
     where
         F: Fn(f64, f64) -> f64;
@@ -58,3 +58,42 @@ pub trait FPMatrix {
     where
         F: Fn(Vec<f64>) -> f64;
 }
+
+/// Functional Programming tools for Vector in Parallel (Uses Rayon crate)
+pub trait ParallelFPVector {
+    type Scalar;
+
+    fn par_fmap<F>(&self, f: F) -> Self
+    where
+        F: Fn(Self::Scalar) -> Self::Scalar + Send + Sync;
+    fn par_reduce<F, T>(&self, init: T, f: F) -> Self::Scalar
+    where
+        F: Fn(Self::Scalar, Self::Scalar) -> Self::Scalar + Send + Sync,
+        T: Into<Self::Scalar> + Send + Sync + Copy;
+    fn par_zip_with<F>(&self, f: F, other: &Self) -> Self
+    where
+        F: Fn(Self::Scalar, Self::Scalar) -> Self::Scalar + Send + Sync;
+    fn par_filter<F>(&self, f: F) -> Self
+    where
+        F: Fn(Self::Scalar) -> bool + Send + Sync;
+}
+
+/// Functional Programming for Matrix in Parallel (Uses Rayon crate)
+pub trait ParallelFPMatrix {
+    fn par_fmap<F>(&self, f: F) -> Matrix
+    where
+        F: Fn(f64) -> f64 + Send + Sync;
+    fn par_reduce<F, T>(&self, init: T, f: F) -> f64
+    where
+        F: Fn(f64, f64) -> f64 + Send + Sync,
+        T: Into<f64> + Copy + Clone + Send + Sync;
+    fn par_zip_with<F>(&self, f: F, other: &Matrix) -> Matrix
+    where
+        F: Fn(f64, f64) -> f64 + Send + Sync;
+    fn par_col_reduce<F>(&self, f: F) -> Vec<f64>
+    where
+        F: Fn(Vec<f64>) -> f64 + Send + Sync;
+    fn par_row_reduce<F>(&self, f: F) -> Vec<f64>
+    where
+        F: Fn(Vec<f64>) -> f64 + Send + Sync;
+}
diff --git a/src/traits/math.rs b/src/traits/math.rs
index 228aad9e..2100fd1b 100644
--- a/src/traits/math.rs
+++ b/src/traits/math.rs
@@ -97,3 +97,36 @@ impl Normed for f64 {
         self / self.abs()
     }
 }
+
+// =============================================================================
+// Implementation for parallel traits
+// =============================================================================
+
+/// Mathematical Vector in Parallel
+pub trait ParallelVector {
+    type Scalar;
+    fn par_add_vec(&self, rhs: &Self) -> Self;
+    fn par_sub_vec(&self, rhs: &Self) -> Self;
+    fn par_mul_scalar(&self, rhs: Self::Scalar) -> Self;
+}
+
+/// Normed Vector in Parallel
+pub trait ParallelNormed: Vector {
+    type UnsignedScalar;
+    fn par_norm(&self, kind: Norm) -> Self::UnsignedScalar;
+}
+
+/// Inner product Vector in Parallel
+pub trait ParallelInnerProduct: ParallelNormed {
+    fn par_dot(&self, rhs: &Self) -> Self::Scalar;
+}
+
+/// Matrix Products in Parallel
+pub trait ParallelMatrixProduct {
+    fn par_hadamard(&self, other: &Self) -> Matrix;
+}
+
+/// Vector Products in Parallel
+pub trait ParallelVectorProduct: Vector {
+    fn par_cross(&self, other: &Self) -> Self;
+}
diff --git a/src/traits/mod.rs b/src/traits/mod.rs
index 92b224a6..2709f2db 100644
--- a/src/traits/mod.rs
+++ b/src/traits/mod.rs
@@ -1,3 +1,4 @@
+pub mod float;
 pub mod fp;
 pub mod general;
 pub mod math;
@@ -6,4 +7,3 @@ pub mod num;
 pub mod pointer;
 pub mod stable;
 pub mod sugar;
-pub mod float;
diff --git a/src/traits/mutable.rs b/src/traits/mutable.rs
index 7b0932fc..fccb56ce 100644
--- a/src/traits/mutable.rs
+++ b/src/traits/mutable.rs
@@ -16,3 +16,14 @@ pub trait MutMatrix {
     unsafe fn swap(&mut self, idx1: usize, idx2: usize, shape: Shape);
     unsafe fn swap_with_perm(&mut self, p: &Vec<(usize, usize)>, shape: Shape);
 }
+
+// Mutable trait for Vector in Parallel (Uses Rayon crate)
+pub trait ParallelMutFP {
+    type Scalar;
+    fn par_mut_map<F>(&mut self, f: F)
+    where
+        F: Fn(Self::Scalar) -> Self::Scalar + Send + Sync;
+    fn par_mut_zip_with<F>(&mut self, f: F, other: &Self)
+    where
+        F: Fn(Self::Scalar, Self::Scalar) -> Self::Scalar + Send + Sync;
+}
diff --git a/src/traits/sugar.rs b/src/traits/sugar.rs
index a51d9269..e1ce7f1e 100644
--- a/src/traits/sugar.rs
+++ b/src/traits/sugar.rs
@@ -1,15 +1,17 @@
-use crate::structure::matrix::{Matrix, Shape, matrix};
+use crate::structure::matrix::{matrix, Matrix, Shape};
 use crate::traits::fp::FPVector;
 use crate::util::non_macro::zeros_shape;
-use std::ops::{Add, Sub, Mul, Div};
+use std::ops::{Add, Div, Mul, Sub};
 
 /// Syntactic sugar for Vector operations
-pub trait VecOps: Sized + FPVector 
-where Self::Scalar: Copy + Clone
-    + Add<Self::Scalar, Output=Self::Scalar>
-    + Sub<Self::Scalar, Output=Self::Scalar> 
-    + Mul<Self::Scalar, Output=Self::Scalar>
-    + Div<Self::Scalar, Output=Self::Scalar>
+pub trait VecOps: Sized + FPVector
+where
+    Self::Scalar: Copy
+        + Clone
+        + Add<Self::Scalar, Output = Self::Scalar>
+        + Sub<Self::Scalar, Output = Self::Scalar>
+        + Mul<Self::Scalar, Output = Self::Scalar>
+        + Div<Self::Scalar, Output = Self::Scalar>,
 {
     //type Scalar;
     fn add_v(&self, v: &Self) -> Self {
@@ -449,7 +451,7 @@ impl ConvToMat for Vec<f64> {
     fn to_col(&self) -> Matrix {
         matrix(self.clone(), self.len(), 1, Shape::Col)
     }
-    
+
     fn to_row(&self) -> Matrix {
         matrix(self.clone(), 1, self.len(), Shape::Row)
     }