From 161d2499178329fe924e28f8d3439e5a87ab6379 Mon Sep 17 00:00:00 2001 From: Lorenzo Date: Tue, 8 Nov 2022 15:22:34 +0000 Subject: [PATCH] Release 0.3 (#235) --- .github/CONTRIBUTING.md | 11 +++++++ .github/DEVELOPERS.md | 5 ++- .gitignore | 4 ++- CHANGELOG.md | 27 ++++++++++------ Cargo.toml | 30 ++++++++++-------- LICENSE | 2 +- README.md | 4 +-- smartcore.svg | 2 +- src/algorithm/neighbour/cover_tree.rs | 10 +++--- src/cluster/kmeans.rs | 8 ++--- src/dataset/mod.rs | 2 +- src/ensemble/mod.rs | 2 +- src/ensemble/random_forest_classifier.rs | 5 +-- src/ensemble/random_forest_regressor.rs | 3 -- src/lib.rs | 39 ++++++++++++++++++------ src/linear/linear_regression.rs | 5 +-- src/linear/logistic_regression.rs | 2 +- src/linear/ridge_regression.rs | 5 +-- src/metrics/auc.rs | 2 +- src/metrics/mod.rs | 2 +- src/model_selection/mod.rs | 2 +- src/neighbors/knn_classifier.rs | 2 +- src/numbers/realnum.rs | 2 +- src/rand_custom.rs | 18 ++++++----- src/svm/mod.rs | 2 +- src/svm/svc.rs | 5 ++- src/svm/svr.rs | 2 -- src/tree/decision_tree_classifier.rs | 13 +++----- src/tree/decision_tree_regressor.rs | 14 +++------ src/tree/mod.rs | 2 +- 30 files changed, 131 insertions(+), 101 deletions(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index c09dfa7e..15b39063 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -26,6 +26,17 @@ Take a look to the conventions established by existing code: * Every module should provide comprehensive tests at the end, in its `mod tests {}` sub-module. These tests can be flagged or not with configuration flags to allow WebAssembly target. * Run `cargo doc --no-deps --open` and read the generated documentation in the browser to be sure that your changes reflects in the documentation and new code is documented. +#### digging deeper +* a nice overview of the codebase is given by [static analyzer](https://mozilla.github.io/rust-code-analysis/metrics.html): +``` +$ cargo install rust-code-analysis-cli +// print metrics for every module +$ rust-code-analysis-cli -m -O json -o . -p src/ --pr +// print full AST for a module +$ rust-code-analysis-cli -p src/algorithm/neighbour/fastpair.rs --ls 22 --le 213 -d > ast.txt +``` +* find more information about what happens in your binary with [`twiggy`](https://rustwasm.github.io/twiggy/install.html). This need a compiled binary so create a brief `main {}` function using `smartcore` and then point `twiggy` to that file. + ## Issue Report Process 1. Go to the project's issues. diff --git a/.github/DEVELOPERS.md b/.github/DEVELOPERS.md index 87c2506c..b3a647bc 100644 --- a/.github/DEVELOPERS.md +++ b/.github/DEVELOPERS.md @@ -1,4 +1,7 @@ -# Smartcore: Introduction to modules +# smartcore: Introduction to modules + +Important source of information: +* [Rust API guidelines](https://rust-lang.github.io/api-guidelines/about.html) ## Walkthrough: traits system and basic structures diff --git a/.gitignore b/.gitignore index 9c0651ce..0983a159 100644 --- a/.gitignore +++ b/.gitignore @@ -26,4 +26,6 @@ src.dot out.svg FlameGraph/ -out.stacks \ No newline at end of file +out.stacks +*.json +*.txt \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index a9dda106..d1054327 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,22 +4,29 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] +## [0.3.0] - 2022-11-09 ## Added -- Seeds to multiple algorithims that depend on random number generation. -- Added feature `js` to use WASM in browser -- Drop `nalgebra-bindings` feature -- Complete refactoring with *extensive API changes* that includes: +- WARNING: Breaking changes! +- Complete refactoring with **extensive API changes** that includes: * moving to a new traits system, less structs more traits * adapting all the modules to the new traits system - * moving towards Rust 2021, in particular the use of `dyn` and `as_ref` - * reorganization of the code base, trying to eliminate duplicates + * moving to Rust 2021, use of object-safe traits and `as_ref` + * reorganization of the code base, eliminate duplicates +- implements `readers` (needs "serde" feature) for read/write CSV file, extendible to other formats +- default feature is now Wasm-/Wasi-first -## BREAKING CHANGE -- Added a new parameter to `train_test_split` to define the seed. +## Changed +- WARNING: Breaking changes! +- Seeds to multiple algorithims that depend on random number generation +- Added a new parameter to `train_test_split` to define the seed +- changed use of "serde" feature + +## Dropped +- WARNING: Breaking changes! +- Drop `nalgebra-bindings` feature, only `ndarray` as supported library -## [0.2.1] - 2022-05-10 +## [0.2.1] - 2021-05-10 ## Added - L2 regularization penalty to the Logistic Regression diff --git a/Cargo.toml b/Cargo.toml index 0a230832..4fb260bd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,9 +1,9 @@ [package] name = "smartcore" -description = "The most advanced machine learning library in rust." +description = "Machine Learning in Rust." homepage = "https://smartcorelib.org" -version = "0.4.0" -authors = ["SmartCore Developers"] +version = "0.3.0" +authors = ["smartcore Developers"] edition = "2021" license = "Apache-2.0" documentation = "https://docs.rs/smartcore" @@ -11,6 +11,13 @@ repository = "https://github.com/smartcorelib/smartcore" readme = "README.md" keywords = ["machine-learning", "statistical", "ai", "optimization", "linear-algebra"] categories = ["science"] +exclude = [ + ".github", + ".gitignore", + "smartcore.iml", + "smartcore.svg", + "tests/" +] [dependencies] approx = "0.5.1" @@ -19,32 +26,31 @@ ndarray = { version = "0.15", optional = true } num-traits = "0.2.12" num = "0.4" rand = { version = "0.8.5", default-features = false, features = ["small_rng"] } +getrandom = "*" rand_distr = { version = "0.4", optional = true } serde = { version = "1", features = ["derive"], optional = true } [features] -default = ["serde", "datasets"] +default = [] serde = ["dep:serde"] ndarray-bindings = ["dep:ndarray"] -datasets = ["dep:rand_distr", "std"] -std = ["rand/std_rng", "rand/std"] -# wasm32 only +datasets = ["dep:rand_distr", "std_rand", "serde"] +std_rand = ["rand/std_rng", "rand/std"] +# used by wasm32-unknown-unknown for in-browser usage js = ["getrandom/js"] [target.'cfg(target_arch = "wasm32")'.dependencies] getrandom = { version = "0.2", optional = true } +[target.'cfg(all(target_arch = "wasm32", not(target_os = "wasi")))'.dev-dependencies] +wasm-bindgen-test = "0.3" + [dev-dependencies] itertools = "*" -criterion = { version = "0.4", default-features = false } serde_json = "1.0" bincode = "1.3.1" -[target.'cfg(all(target_arch = "wasm32", not(target_os = "wasi")))'.dev-dependencies] -wasm-bindgen-test = "0.3" - [workspace] -resolver = "2" [profile.test] debug = 1 diff --git a/LICENSE b/LICENSE index 3cd57869..9448ceef 100644 --- a/LICENSE +++ b/LICENSE @@ -186,7 +186,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright 2019-present at SmartCore developers (smartcorelib.org) + Copyright 2019-present at smartcore developers (smartcorelib.org) Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/README.md b/README.md index fd6f4811..758a461f 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@

- SmartCore + smartcore

@@ -18,4 +18,4 @@ ----- [![CI](https://github.com/smartcorelib/smartcore/actions/workflows/ci.yml/badge.svg)](https://github.com/smartcorelib/smartcore/actions/workflows/ci.yml) -To start getting familiar with the new Smartcore v0.5 API, there is now available a [**Jupyter Notebook environment repository**](https://github.com/smartcorelib/smartcore-jupyter). Please see instructions there, contributions welcome see [CONTRIBUTING](.github/CONTRIBUTING.md). +To start getting familiar with the new smartcore v0.5 API, there is now available a [**Jupyter Notebook environment repository**](https://github.com/smartcorelib/smartcore-jupyter). Please see instructions there, contributions welcome see [CONTRIBUTING](.github/CONTRIBUTING.md). diff --git a/smartcore.svg b/smartcore.svg index 3e4c68d1..eaffd58f 100644 --- a/smartcore.svg +++ b/smartcore.svg @@ -76,5 +76,5 @@ y="81.876823" x="91.861809" id="tspan842" - sodipodi:role="line">SmartCore + sodipodi:role="line">smartcore diff --git a/src/algorithm/neighbour/cover_tree.rs b/src/algorithm/neighbour/cover_tree.rs index db062f9f..011a9cc0 100644 --- a/src/algorithm/neighbour/cover_tree.rs +++ b/src/algorithm/neighbour/cover_tree.rs @@ -64,7 +64,7 @@ struct Node { max_dist: f64, parent_dist: f64, children: Vec, - scale: i64, + _scale: i64, } #[derive(Debug)] @@ -84,7 +84,7 @@ impl> CoverTree { max_dist: 0f64, parent_dist: 0f64, children: Vec::new(), - scale: 0, + _scale: 0, }; let mut tree = CoverTree { base, @@ -245,7 +245,7 @@ impl> CoverTree { max_dist: 0f64, parent_dist: 0f64, children: Vec::new(), - scale: 100, + _scale: 100, } } @@ -306,7 +306,7 @@ impl> CoverTree { max_dist: 0f64, parent_dist: 0f64, children, - scale: 100, + _scale: 100, } } else { let mut far: Vec = Vec::new(); @@ -375,7 +375,7 @@ impl> CoverTree { max_dist: self.max(consumed_set), parent_dist: 0f64, children, - scale: (top_scale - max_scale), + _scale: (top_scale - max_scale), } } } diff --git a/src/cluster/kmeans.rs b/src/cluster/kmeans.rs index 9322d659..18f83085 100644 --- a/src/cluster/kmeans.rs +++ b/src/cluster/kmeans.rs @@ -11,7 +11,7 @@ //! these re-calculated centroids becoming the new centers of their respective clusters. Next all instances of the training set are re-assigned to their closest cluster again. //! This iterative process continues until convergence is achieved and the clusters are considered settled. //! -//! Initial choice of K data points is very important and has big effect on performance of the algorithm. SmartCore uses k-means++ algorithm to initialize cluster centers. +//! Initial choice of K data points is very important and has big effect on performance of the algorithm. `smartcore` uses k-means++ algorithm to initialize cluster centers. //! //! Example: //! @@ -74,7 +74,7 @@ pub struct KMeans, Y: Array1> { k: usize, _y: Vec, size: Vec, - distortion: f64, + _distortion: f64, centroids: Vec>, _phantom_tx: PhantomData, _phantom_ty: PhantomData, @@ -313,7 +313,7 @@ impl, Y: Array1> KMeans k: parameters.k, _y: y, size, - distortion, + _distortion: distortion, centroids, _phantom_tx: PhantomData, _phantom_ty: PhantomData, @@ -470,7 +470,7 @@ mod tests { wasm_bindgen_test::wasm_bindgen_test )] #[test] - fn fit_predict_iris() { + fn fit_predict() { let x = DenseMatrix::from_2d_array(&[ &[5.1, 3.5, 1.4, 0.2], &[4.9, 3.0, 1.4, 0.2], diff --git a/src/dataset/mod.rs b/src/dataset/mod.rs index 5b32d02d..855b288e 100644 --- a/src/dataset/mod.rs +++ b/src/dataset/mod.rs @@ -1,6 +1,6 @@ //! Datasets //! -//! In this module you will find small datasets that are used in SmartCore mostly for demonstration purposes. +//! In this module you will find small datasets that are used in `smartcore` mostly for demonstration purposes. pub mod boston; pub mod breast_cancer; pub mod diabetes; diff --git a/src/ensemble/mod.rs b/src/ensemble/mod.rs index 1ddf4b47..8cebd5c5 100644 --- a/src/ensemble/mod.rs +++ b/src/ensemble/mod.rs @@ -7,7 +7,7 @@ //! set and then aggregate their individual predictions to form a final prediction. In classification setting the overall prediction is the most commonly //! occurring majority class among the individual predictions. //! -//! In SmartCore you will find implementation of RandomForest - a popular averaging algorithms based on randomized [decision trees](../tree/index.html). +//! In `smartcore` you will find implementation of RandomForest - a popular averaging algorithms based on randomized [decision trees](../tree/index.html). //! Random forests provide an improvement over bagged trees by way of a small tweak that decorrelates the trees. As in bagging, we build a number of //! decision trees on bootstrapped training samples. But when building these decision trees, each time a split in a tree is considered, //! a random sample of _m_ predictors is chosen as split candidates from the full set of _p_ predictors. diff --git a/src/ensemble/random_forest_classifier.rs b/src/ensemble/random_forest_classifier.rs index d01aceff..8ea174b5 100644 --- a/src/ensemble/random_forest_classifier.rs +++ b/src/ensemble/random_forest_classifier.rs @@ -104,7 +104,6 @@ pub struct RandomForestClassifier< X: Array2, Y: Array1, > { - parameters: Option, trees: Option>>, classes: Option>, samples: Option>>, @@ -198,7 +197,6 @@ impl, Y: { fn new() -> Self { Self { - parameters: Option::None, trees: Option::None, classes: Option::None, samples: Option::None, @@ -501,7 +499,6 @@ impl, Y: Array1, Y: Array1, > { - parameters: Option, trees: Option>>, samples: Option>>, } @@ -177,7 +176,6 @@ impl, Y: Array1 { fn new() -> Self { Self { - parameters: Option::None, trees: Option::None, samples: Option::None, } @@ -434,7 +432,6 @@ impl, Y: Array1 } Ok(RandomForestRegressor { - parameters: Some(parameters), trees: Some(trees), samples: maybe_all_samples, }) diff --git a/src/lib.rs b/src/lib.rs index a955de2c..03bfc03b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,25 +8,38 @@ #![warn(missing_docs)] #![warn(rustdoc::missing_doc_code_examples)] -//! # SmartCore +//! # smartcore //! -//! Welcome to SmartCore, machine learning in Rust! +//! Welcome to `smartcore`, machine learning in Rust! //! -//! SmartCore features various classification, regression and clustering algorithms including support vector machines, random forests, k-means and DBSCAN, +//! `smartcore` features various classification, regression and clustering algorithms including support vector machines, random forests, k-means and DBSCAN, //! as well as tools for model selection and model evaluation. //! -//! SmartCore provides its own traits system that extends Rust standard library, to deal with linear algebra and common +//! `smartcore` provides its own traits system that extends Rust standard library, to deal with linear algebra and common //! computational models. Its API is designed using well recognizable patterns. Extra features (like support for [ndarray](https://docs.rs/ndarray) //! structures) is available via optional features. //! //! ## Getting Started //! -//! To start using SmartCore simply add the following to your Cargo.toml file: +//! To start using `smartcore` latest stable version simply add the following to your `Cargo.toml` file: +//! ```ignore +//! [dependencies] +//! smartcore = "*" +//! ``` +//! +//! To start using smartcore development version with latest unstable additions: //! ```ignore //! [dependencies] //! smartcore = { git = "https://github.com/smartcorelib/smartcore", branch = "development" } //! ``` //! +//! There are different features that can be added to the base library, for example to add sample datasets: +//! ```ignore +//! [dependencies] +//! smartcore = { git = "https://github.com/smartcorelib/smartcore", features = ["datasets"] } +//! ``` +//! Check `smartcore`'s `Cargo.toml` for available features. +//! //! ## Using Jupyter //! For quick introduction, Jupyter Notebooks are available [here](https://github.com/smartcorelib/smartcore-jupyter/tree/main/notebooks). //! You can set up a local environment to run Rust notebooks using [EVCXR](https://github.com/google/evcxr) @@ -37,7 +50,7 @@ //! For example, you can use this code to fit a [K Nearest Neighbors classifier](neighbors/knn_classifier/index.html) to a dataset that is defined as standard Rust vector: //! //! ``` -//! // DenseMatrix defenition +//! // DenseMatrix definition //! use smartcore::linalg::basic::matrix::DenseMatrix; //! // KNNClassifier //! use smartcore::neighbors::knn_classifier::*; @@ -62,7 +75,9 @@ //! ``` //! //! ## Overview -//! All machine learning algorithms in SmartCore are grouped into these broad categories: +//! +//! ### Supported algorithms +//! All machine learning algorithms are grouped into these broad categories: //! * [Clustering](cluster/index.html), unsupervised clustering of unlabeled data. //! * [Matrix Decomposition](decomposition/index.html), various methods for matrix decomposition. //! * [Linear Models](linear/index.html), regression and classification methods where output is assumed to have linear relation to explanatory variables @@ -71,11 +86,14 @@ //! * [Nearest Neighbors](neighbors/index.html), K Nearest Neighbors for classification and regression //! * [Naive Bayes](naive_bayes/index.html), statistical classification technique based on Bayes Theorem //! * [SVM](svm/index.html), support vector machines +//! +//! ### Linear Algebra traits system +//! For an introduction to `smartcore`'s traits system see [this notebook](https://github.com/smartcorelib/smartcore-jupyter/blob/5523993c53c6ec1fd72eea130ef4e7883121c1ea/notebooks/01-A-little-bit-about-numbers.ipynb) /// Foundamental numbers traits pub mod numbers; -/// Various algorithms and helper methods that are used elsewhere in SmartCore +/// Various algorithms and helper methods that are used elsewhere in smartcore pub mod algorithm; pub mod api; @@ -89,7 +107,7 @@ pub mod decomposition; /// Ensemble methods, including Random Forest classifier and regressor pub mod ensemble; pub mod error; -/// Diverse collection of linear algebra abstractions and methods that power SmartCore algorithms +/// Diverse collection of linear algebra abstractions and methods that power smartcore algorithms pub mod linalg; /// Supervised classification and regression models that assume linear relationship between dependent and explanatory variables. pub mod linear; @@ -105,7 +123,8 @@ pub mod neighbors; pub mod optimization; /// Preprocessing utilities pub mod preprocessing; -/// Reading in data from serialized foramts +/// Reading in data from serialized formats +#[cfg(feature = "serde")] pub mod readers; /// Support Vector Machines pub mod svm; diff --git a/src/linear/linear_regression.rs b/src/linear/linear_regression.rs index 1f7d5404..a5c76999 100644 --- a/src/linear/linear_regression.rs +++ b/src/linear/linear_regression.rs @@ -12,7 +12,7 @@ //! \\[\hat{\beta} = (X^TX)^{-1}X^Ty \\] //! //! the \\((X^TX)^{-1}\\) term is both computationally expensive and numerically unstable. An alternative approach is to use a matrix decomposition to avoid this operation. -//! SmartCore uses [SVD](../../linalg/svd/index.html) and [QR](../../linalg/qr/index.html) matrix decomposition to find estimates of \\(\hat{\beta}\\). +//! `smartcore` uses [SVD](../../linalg/svd/index.html) and [QR](../../linalg/qr/index.html) matrix decomposition to find estimates of \\(\hat{\beta}\\). //! The QR decomposition is more computationally efficient and more numerically stable than calculating the normal equation directly, //! but does not work for all data matrices. Unlike the QR decomposition, all matrices have an SVD decomposition. //! @@ -113,7 +113,6 @@ pub struct LinearRegression< > { coefficients: Option, intercept: Option, - solver: LinearRegressionSolverName, _phantom_ty: PhantomData, _phantom_y: PhantomData, } @@ -210,7 +209,6 @@ impl< Self { coefficients: Option::None, intercept: Option::None, - solver: LinearRegressionParameters::default().solver, _phantom_ty: PhantomData, _phantom_y: PhantomData, } @@ -276,7 +274,6 @@ impl< Ok(LinearRegression { intercept: Some(*w.get((num_attributes, 0))), coefficients: Some(weights), - solver: parameters.solver, _phantom_ty: PhantomData, _phantom_y: PhantomData, }) diff --git a/src/linear/logistic_regression.rs b/src/linear/logistic_regression.rs index 7dd269c2..8bf65bf0 100644 --- a/src/linear/logistic_regression.rs +++ b/src/linear/logistic_regression.rs @@ -5,7 +5,7 @@ //! //! \\[ Pr(y=1) \approx \frac{e^{\beta_0 + \sum_{i=1}^n \beta_iX_i}}{1 + e^{\beta_0 + \sum_{i=1}^n \beta_iX_i}} \\] //! -//! SmartCore uses [limited memory BFGS](https://en.wikipedia.org/wiki/Limited-memory_BFGS) method to find estimates of regression coefficients, \\(\beta\\) +//! `smartcore` uses [limited memory BFGS](https://en.wikipedia.org/wiki/Limited-memory_BFGS) method to find estimates of regression coefficients, \\(\beta\\) //! //! Example: //! diff --git a/src/linear/ridge_regression.rs b/src/linear/ridge_regression.rs index 914afc2d..6bd5595b 100644 --- a/src/linear/ridge_regression.rs +++ b/src/linear/ridge_regression.rs @@ -12,7 +12,7 @@ //! where \\(\alpha \geq 0\\) is a tuning parameter that controls strength of regularization. When \\(\alpha = 0\\) the penalty term has no effect, and ridge regression will produce the least squares estimates. //! However, as \\(\alpha \rightarrow \infty\\), the impact of the shrinkage penalty grows, and the ridge regression coefficient estimates will approach zero. //! -//! SmartCore uses [SVD](../../linalg/svd/index.html) and [Cholesky](../../linalg/cholesky/index.html) matrix decomposition to find estimates of \\(\hat{\beta}\\). +//! `smartcore` uses [SVD](../../linalg/svd/index.html) and [Cholesky](../../linalg/cholesky/index.html) matrix decomposition to find estimates of \\(\hat{\beta}\\). //! The Cholesky decomposition is more computationally efficient and more numerically stable than calculating the normal equation directly, //! but does not work for all data matrices. Unlike the Cholesky decomposition, all matrices have an SVD decomposition. //! @@ -197,7 +197,6 @@ pub struct RidgeRegression< > { coefficients: Option, intercept: Option, - solver: Option, _phantom_ty: PhantomData, _phantom_y: PhantomData, } @@ -259,7 +258,6 @@ impl< Self { coefficients: Option::None, intercept: Option::None, - solver: Option::None, _phantom_ty: PhantomData, _phantom_y: PhantomData, } @@ -367,7 +365,6 @@ impl< Ok(RidgeRegression { intercept: Some(b), coefficients: Some(w), - solver: Some(parameters.solver), _phantom_ty: PhantomData, _phantom_y: PhantomData, }) diff --git a/src/metrics/auc.rs b/src/metrics/auc.rs index ecaf646f..0a7ddf43 100644 --- a/src/metrics/auc.rs +++ b/src/metrics/auc.rs @@ -2,7 +2,7 @@ //! Computes the area under the receiver operating characteristic (ROC) curve that is equal to the probability that a classifier will rank a //! randomly chosen positive instance higher than a randomly chosen negative one. //! -//! SmartCore calculates ROC AUC from Wilcoxon or Mann-Whitney U test. +//! `smartcore` calculates ROC AUC from Wilcoxon or Mann-Whitney U test. //! //! Example: //! ``` diff --git a/src/metrics/mod.rs b/src/metrics/mod.rs index 06d44a16..c7e1be3d 100644 --- a/src/metrics/mod.rs +++ b/src/metrics/mod.rs @@ -4,7 +4,7 @@ //! In a feedback loop you build your model first, then you get feedback from metrics, improve it and repeat until your model achieve desirable performance. //! Evaluation metrics helps to explain the performance of a model and compare models based on an objective criterion. //! -//! Choosing the right metric is crucial while evaluating machine learning models. In SmartCore you will find metrics for these classes of ML models: +//! Choosing the right metric is crucial while evaluating machine learning models. In `smartcore` you will find metrics for these classes of ML models: //! //! * [Classification metrics](struct.ClassificationMetrics.html) //! * [Regression metrics](struct.RegressionMetrics.html) diff --git a/src/model_selection/mod.rs b/src/model_selection/mod.rs index b8e4e7fc..222b9d72 100644 --- a/src/model_selection/mod.rs +++ b/src/model_selection/mod.rs @@ -7,7 +7,7 @@ //! Splitting data into multiple subsets helps us to find the right combination of hyperparameters, estimate model performance and choose the right model for //! the data. //! -//! In SmartCore a random split into training and test sets can be quickly computed with the [train_test_split](./fn.train_test_split.html) helper function. +//! In `smartcore` a random split into training and test sets can be quickly computed with the [train_test_split](./fn.train_test_split.html) helper function. //! //! ``` //! use smartcore::linalg::basic::matrix::DenseMatrix; diff --git a/src/neighbors/knn_classifier.rs b/src/neighbors/knn_classifier.rs index 67d094a4..882ac556 100644 --- a/src/neighbors/knn_classifier.rs +++ b/src/neighbors/knn_classifier.rs @@ -1,6 +1,6 @@ //! # K Nearest Neighbors Classifier //! -//! SmartCore relies on 2 backend algorithms to speedup KNN queries: +//! `smartcore` relies on 2 backend algorithms to speedup KNN queries: //! * [`LinearSearch`](../../algorithm/neighbour/linear_search/index.html) //! * [`CoverTree`](../../algorithm/neighbour/cover_tree/index.html) //! diff --git a/src/numbers/realnum.rs b/src/numbers/realnum.rs index 8c60e47b..f4d9aec1 100644 --- a/src/numbers/realnum.rs +++ b/src/numbers/realnum.rs @@ -1,5 +1,5 @@ //! # Real Number -//! Most algorithms in SmartCore rely on basic linear algebra operations like dot product, matrix decomposition and other subroutines that are defined for a set of real numbers, ℝ. +//! Most algorithms in `smartcore` rely on basic linear algebra operations like dot product, matrix decomposition and other subroutines that are defined for a set of real numbers, ℝ. //! This module defines real number and some useful functions that are used in [Linear Algebra](../../linalg/index.html) module. use num_traits::Float; diff --git a/src/rand_custom.rs b/src/rand_custom.rs index 15f9e738..b22390ed 100644 --- a/src/rand_custom.rs +++ b/src/rand_custom.rs @@ -1,19 +1,23 @@ -#[cfg(not(feature = "std"))] -pub(crate) use rand::rngs::SmallRng as RngImpl; -#[cfg(feature = "std")] -pub(crate) use rand::rngs::StdRng as RngImpl; +#[cfg(not(feature = "std_rand"))] +pub use rand::rngs::SmallRng as RngImpl; +#[cfg(feature = "std_rand")] +pub use rand::rngs::StdRng as RngImpl; use rand::SeedableRng; -pub(crate) fn get_rng_impl(seed: Option) -> RngImpl { +/// Custom switch for random fuctions +pub fn get_rng_impl(seed: Option) -> RngImpl { match seed { Some(seed) => RngImpl::seed_from_u64(seed), None => { cfg_if::cfg_if! { - if #[cfg(feature = "std")] { + if #[cfg(feature = "std_rand")] { use rand::RngCore; RngImpl::seed_from_u64(rand::thread_rng().next_u64()) } else { - panic!("seed number needed for non-std build"); + // no std_random feature build, use getrandom + let mut buf = [0u8; 64]; + getrandom::getrandom(&mut buf).unwrap(); + RngImpl::seed_from_u64(buf[0] as u64) } } } diff --git a/src/svm/mod.rs b/src/svm/mod.rs index a30fe876..ef0f0033 100644 --- a/src/svm/mod.rs +++ b/src/svm/mod.rs @@ -9,7 +9,7 @@ //! SVM is memory efficient since it uses only a subset of training data to find a decision boundary. This subset is called support vectors. //! //! In SVM distance between a data point and the support vectors is defined by the kernel function. -//! SmartCore supports multiple kernel functions but you can always define a new kernel function by implementing the `Kernel` trait. Not all functions can be a kernel. +//! `smartcore` supports multiple kernel functions but you can always define a new kernel function by implementing the `Kernel` trait. Not all functions can be a kernel. //! Building a new kernel requires a good mathematical understanding of the [Mercer theorem](https://en.wikipedia.org/wiki/Mercer%27s_theorem) //! that gives necessary and sufficient condition for a function to be a kernel function. //! diff --git a/src/svm/svc.rs b/src/svm/svc.rs index 9cb140d7..74998f57 100644 --- a/src/svm/svc.rs +++ b/src/svm/svc.rs @@ -20,7 +20,7 @@ //! //! Where \\( m \\) is a number of training samples, \\( y_i \\) is a label value (either 1 or -1) and \\(\langle\vec{w}, \vec{x}_i \rangle + b\\) is a decision boundary. //! -//! To solve this optimization problem, SmartCore uses an [approximate SVM solver](https://leon.bottou.org/projects/lasvm). +//! To solve this optimization problem, `smartcore` uses an [approximate SVM solver](https://leon.bottou.org/projects/lasvm). //! The optimizer reaches accuracies similar to that of a real SVM after performing two passes through the training examples. You can choose the number of passes //! through the data that the algorithm takes by changing the `epoch` parameter of the classifier. //! @@ -934,8 +934,7 @@ mod tests { use super::*; use crate::linalg::basic::matrix::DenseMatrix; use crate::metrics::accuracy; - #[cfg(feature = "serde")] - use crate::svm::*; + use crate::svm::Kernels; #[cfg_attr( all(target_arch = "wasm32", not(target_os = "wasi")), diff --git a/src/svm/svr.rs b/src/svm/svr.rs index 7a39a56b..8d49525b 100644 --- a/src/svm/svr.rs +++ b/src/svm/svr.rs @@ -596,7 +596,6 @@ mod tests { use super::*; use crate::linalg::basic::matrix::DenseMatrix; use crate::metrics::mean_squared_error; - #[cfg(feature = "serde")] use crate::svm::Kernels; // #[test] @@ -617,7 +616,6 @@ mod tests { // assert!(iter.next().is_none()); // } - //TODO: had to disable this test as it runs for too long #[cfg_attr( all(target_arch = "wasm32", not(target_os = "wasi")), wasm_bindgen_test::wasm_bindgen_test diff --git a/src/tree/decision_tree_classifier.rs b/src/tree/decision_tree_classifier.rs index 6341ab4f..cbce14e0 100644 --- a/src/tree/decision_tree_classifier.rs +++ b/src/tree/decision_tree_classifier.rs @@ -163,7 +163,6 @@ impl Default for SplitCriterion { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] struct Node { - index: usize, output: usize, split_feature: usize, split_value: Option, @@ -406,9 +405,8 @@ impl Default for DecisionTreeClassifierSearchParameters { } impl Node { - fn new(index: usize, output: usize) -> Self { + fn new(output: usize) -> Self { Node { - index, output, split_feature: 0, split_value: Option::None, @@ -582,7 +580,7 @@ impl, Y: Array1> count[yi[i]] += samples[i]; } - let root = Node::new(0, which_max(&count)); + let root = Node::new(which_max(&count)); change_nodes.push(root); let mut order: Vec> = Vec::new(); @@ -831,11 +829,9 @@ impl, Y: Array1> let true_child_idx = self.nodes().len(); - self.nodes - .push(Node::new(true_child_idx, visitor.true_child_output)); + self.nodes.push(Node::new(visitor.true_child_output)); let false_child_idx = self.nodes().len(); - self.nodes - .push(Node::new(false_child_idx, visitor.false_child_output)); + self.nodes.push(Node::new(visitor.false_child_output)); self.nodes[visitor.node].true_child = Some(true_child_idx); self.nodes[visitor.node].false_child = Some(false_child_idx); @@ -923,6 +919,7 @@ mod tests { wasm_bindgen_test::wasm_bindgen_test )] #[test] + #[cfg(feature = "datasets")] fn fit_predict_iris() { let x: DenseMatrix = DenseMatrix::from_2d_array(&[ &[5.1, 3.5, 1.4, 0.2], diff --git a/src/tree/decision_tree_regressor.rs b/src/tree/decision_tree_regressor.rs index 12ea9781..0146cbc5 100644 --- a/src/tree/decision_tree_regressor.rs +++ b/src/tree/decision_tree_regressor.rs @@ -11,7 +11,7 @@ //! //! where \\(\hat{y}_{Rk}\\) is the mean response for the training observations withing region _k_. //! -//! SmartCore uses recursive binary splitting approach to build \\(R_1, R_2, ..., R_K\\) regions. The approach begins at the top of the tree and then successively splits the predictor space +//! `smartcore` uses recursive binary splitting approach to build \\(R_1, R_2, ..., R_K\\) regions. The approach begins at the top of the tree and then successively splits the predictor space //! one predictor at a time. At each step of the tree-building process, the best split is made at that particular step, rather than looking ahead and picking a split that will lead to a better //! tree in some future step. //! @@ -128,7 +128,6 @@ impl, Y: Array1> #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[derive(Debug, Clone)] struct Node { - index: usize, output: f64, split_feature: usize, split_value: Option, @@ -299,9 +298,8 @@ impl Default for DecisionTreeRegressorSearchParameters { } impl Node { - fn new(index: usize, output: f64) -> Self { + fn new(output: f64) -> Self { Node { - index, output, split_feature: 0, split_value: Option::None, @@ -450,7 +448,7 @@ impl, Y: Array1> sum += *sample_i as f64 * y_m.get(i).to_f64().unwrap(); } - let root = Node::new(0, sum / (n as f64)); + let root = Node::new(sum / (n as f64)); nodes.push(root); let mut order: Vec> = Vec::new(); @@ -662,11 +660,9 @@ impl, Y: Array1> let true_child_idx = self.nodes().len(); - self.nodes - .push(Node::new(true_child_idx, visitor.true_child_output)); + self.nodes.push(Node::new(visitor.true_child_output)); let false_child_idx = self.nodes().len(); - self.nodes - .push(Node::new(false_child_idx, visitor.false_child_output)); + self.nodes.push(Node::new(visitor.false_child_output)); self.nodes[visitor.node].true_child = Some(true_child_idx); self.nodes[visitor.node].false_child = Some(false_child_idx); diff --git a/src/tree/mod.rs b/src/tree/mod.rs index 700dc76c..340b0a8e 100644 --- a/src/tree/mod.rs +++ b/src/tree/mod.rs @@ -9,7 +9,7 @@ //! Decision trees suffer from high variance and often does not deliver best prediction accuracy when compared to other supervised learning approaches, such as linear and logistic regression. //! Hence some techniques such as [Random Forests](../ensemble/index.html) use more than one decision tree to improve performance of the algorithm. //! -//! SmartCore uses [CART](https://en.wikipedia.org/wiki/Predictive_analytics#Classification_and_regression_trees_.28CART.29) learning technique to build both classification and regression trees. +//! `smartcore` uses [CART](https://en.wikipedia.org/wiki/Predictive_analytics#Classification_and_regression_trees_.28CART.29) learning technique to build both classification and regression trees. //! //! ## References: //!