From e8f5ed2e88f993b871abbe9f4f99aa080a1cfbd3 Mon Sep 17 00:00:00 2001 From: Andrew DalPino Date: Thu, 3 Sep 2020 04:58:14 -0500 Subject: [PATCH] Prepare for 0.1.0-beta release --- CHANGELOG.md | 10 +++++-- composer.json | 2 +- docs/kernels/distance/gower.md | 6 +--- docs/transformers/bm25-transformer.md | 2 +- docs/transformers/delta-tf-idf-transformer.md | 30 +++++++++++++++++++ docs/transformers/lambda-function.md | 2 +- src/Kernels/Distance/Gower.php | 2 +- src/ModelOrchestra.php | 2 +- src/NeuralNet/ActivationFunctions/ISRLU.php | 2 +- src/NeuralNet/ActivationFunctions/ISRU.php | 2 +- src/NeuralNet/Layers/AlphaDropout.php | 2 +- src/Transformers/DeltaTfIdfTransformer.php | 12 +++++++- 12 files changed, 58 insertions(+), 16 deletions(-) create mode 100644 docs/transformers/delta-tf-idf-transformer.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 37da8dd..e73132e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,10 @@ -- Unreleased +- 0.1.0-beta + - Add Recursive Feature Eliminator feature selector - Implement BM25 TF-IDF Transformer + - Add Delta TF-IDF Transformer - Added Lambda function Transformer - - All objects implement Stringable interface \ No newline at end of file + - All objects implement Stringable interface + - Added Gower nan-safe distance kernel + - Added ISRU and ISRLU activation functions + - Added Alpha Dropout hidden layer + - Added Model Orchestra meta-estimator \ No newline at end of file diff --git a/composer.json b/composer.json index aac8317..0d066a9 100644 --- a/composer.json +++ b/composer.json @@ -16,7 +16,7 @@ ], "require": { "php": ">=7.2", - "rubix/ml": "^0.1.0-rc3", + "rubix/ml": "^0.1.0", "rubix/tensor": "^2.0.4", "wamania/php-stemmer": "^2.0" }, diff --git a/docs/kernels/distance/gower.md b/docs/kernels/distance/gower.md index 8e7d188..9c33d08 100644 --- a/docs/kernels/distance/gower.md +++ b/docs/kernels/distance/gower.md @@ -16,11 +16,7 @@ A robust distance kernel that measures a mix of categorical and continuous data ```php use Rubix\ML\Kernels\Distance\Gower; -$kernel = new Gower(); // Continuous features between 0 and 1 - -$kernel = new Gower(2.0); // Between -1 and 1 - -$kernel = new Gower(1000.0); // Between 0 and 1000 +$kernel = new Gower(2.0); ``` ### References diff --git a/docs/transformers/bm25-transformer.md b/docs/transformers/bm25-transformer.md index 285f1ab..5c522e2 100644 --- a/docs/transformers/bm25-transformer.md +++ b/docs/transformers/bm25-transformer.md @@ -1,4 +1,4 @@ -[source] +[source] # BM25 Transformer BM25 is a term frequency weighting scheme that takes term frequency (TF) saturation and document length into account. diff --git a/docs/transformers/delta-tf-idf-transformer.md b/docs/transformers/delta-tf-idf-transformer.md new file mode 100644 index 0000000..63f506c --- /dev/null +++ b/docs/transformers/delta-tf-idf-transformer.md @@ -0,0 +1,30 @@ +[source] + +# Delta TF-IDF Transformer +A supervised TF-IDF (Term Frequency Inverse Document Frequency) Transformer that uses class labels to boost the TF-IDFs of terms by how informative they are. Terms that receive the highest boost are those whose concentration is primarily in one class whereas low weighted terms are more evenly distributed among the classes. + +> **Note:** This transformer assumes that its input is made up of word frequency vectors such as those produced by [Word Count Vectorizer](word-count-vectorizer.md). + +**Interfaces:** [Transformer](api.md#transformer), [Stateful](api.md#stateful), [Elastic](api.md#elastic) + +**Data Type Compatibility:** Continuous only + +## Parameters +This transformer does not have any parameters. + +## Example +```php +use Rubix\ML\Transformers\DeltaTfIdfTransformer; + +$transformer = new DeltaTfIdfTransformer(); +``` + +## Additional Methods +Return the document frequencies calculated during fitting: +```php +public dfs() : ?array +``` + +### References +>- J. Martineau et al. (2009). Delta TFIDF: An Improved Feature Space for Sentiment Analysis. +>- S. Ghosh et al. (2018). Class Specific TF-IDF Boosting for Short-text Classification. \ No newline at end of file diff --git a/docs/transformers/lambda-function.md b/docs/transformers/lambda-function.md index 69d5ec9..2e45b21 100644 --- a/docs/transformers/lambda-function.md +++ b/docs/transformers/lambda-function.md @@ -1,4 +1,4 @@ -[source] +[source] # Lambda Function Run a stateless lambda function (*anonymous* function) over the samples. The lambda function receives the sample matrix as an argument and should return the transformed matrix. diff --git a/src/Kernels/Distance/Gower.php b/src/Kernels/Distance/Gower.php index 93233ec..89bdb2a 100644 --- a/src/Kernels/Distance/Gower.php +++ b/src/Kernels/Distance/Gower.php @@ -117,6 +117,6 @@ public function compute(array $a, array $b) : float */ public function __toString() : string { - return "Gower {range: {$this->range}}"; + return "Gower (range: {$this->range})"; } } diff --git a/src/ModelOrchestra.php b/src/ModelOrchestra.php index f1d5547..4126ac0 100644 --- a/src/ModelOrchestra.php +++ b/src/ModelOrchestra.php @@ -384,6 +384,6 @@ protected function extractRegressor(Dataset $dataset) : array */ public function __toString() : string { - return 'Model Orchestra {' . Params::stringify($this->params()) . '}'; + return 'Model Orchestra (' . Params::stringify($this->params()) . ')'; } } diff --git a/src/NeuralNet/ActivationFunctions/ISRLU.php b/src/NeuralNet/ActivationFunctions/ISRLU.php index f209202..348ab42 100644 --- a/src/NeuralNet/ActivationFunctions/ISRLU.php +++ b/src/NeuralNet/ActivationFunctions/ISRLU.php @@ -95,6 +95,6 @@ public function differentiate(Matrix $z, Matrix $computed) : Matrix */ public function __toString() : string { - return "ISRLU {alpha: {$this->alpha}}"; + return "ISRLU (alpha: {$this->alpha})"; } } diff --git a/src/NeuralNet/ActivationFunctions/ISRU.php b/src/NeuralNet/ActivationFunctions/ISRU.php index 75c603a..bd5c7e0 100644 --- a/src/NeuralNet/ActivationFunctions/ISRU.php +++ b/src/NeuralNet/ActivationFunctions/ISRU.php @@ -95,6 +95,6 @@ public function differentiate(Matrix $z, Matrix $computed) : Matrix */ public function __toString() : string { - return "ISRU {alpha: {$this->alpha}}"; + return "ISRU (alpha: {$this->alpha})"; } } diff --git a/src/NeuralNet/Layers/AlphaDropout.php b/src/NeuralNet/Layers/AlphaDropout.php index 71ee938..77d9bc9 100644 --- a/src/NeuralNet/Layers/AlphaDropout.php +++ b/src/NeuralNet/Layers/AlphaDropout.php @@ -200,6 +200,6 @@ public function saturate(int $value) : float */ public function __toString() : string { - return "Alpha Dropout {ratio: {$this->ratio}}"; + return "Alpha Dropout (ratio: {$this->ratio})"; } } diff --git a/src/Transformers/DeltaTfIdfTransformer.php b/src/Transformers/DeltaTfIdfTransformer.php index e0f2b2d..56e22a5 100644 --- a/src/Transformers/DeltaTfIdfTransformer.php +++ b/src/Transformers/DeltaTfIdfTransformer.php @@ -32,7 +32,7 @@ * @package Rubix/ML * @author Andrew DalPino */ -class DeltaTfIdfTransformer implements Elastic +class DeltaTfIdfTransformer implements Transformer, Stateful, Elastic { /** * The class specific term frequencies of each word i.e. the number of @@ -102,6 +102,16 @@ public function fitted() : bool return $this->idfs and $this->entropies; } + /** + * Return the document frequencies calculated during fitting. + * + * @return int[]|null + */ + public function dfs() : ?array + { + return $this->dfs; + } + /** * Fit the transformer to the dataset. *