-
-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added Laplace smoothing to Delta TF-IDF Transformer
- Loading branch information
1 parent
18dab62
commit 769c3e7
Showing
14 changed files
with
157 additions
and
381 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
<?php | ||
|
||
namespace Rubix\ML\Benchmarks\Transformers; | ||
|
||
use Tensor\Matrix; | ||
use Tensor\Vector; | ||
use Rubix\ML\Datasets\Labeled; | ||
use Rubix\ML\Transformers\DeltaTfIdfTransformer; | ||
|
||
/** | ||
* @Groups({"Transformers"}) | ||
* @BeforeMethods({"setUp"}) | ||
*/ | ||
class DeltaTfIdfTransformerBench | ||
{ | ||
protected const DATASET_SIZE = 10000; | ||
|
||
/** | ||
* @var \Rubix\ML\Datasets\Labeled | ||
*/ | ||
public $dataset; | ||
|
||
/** | ||
* @var \Rubix\ML\Transformers\DeltaTfIdfTransformer | ||
*/ | ||
protected $transformer; | ||
|
||
public function setUp() : void | ||
{ | ||
$mask = Matrix::rand(self::DATASET_SIZE, 4) | ||
->greater(0.8); | ||
|
||
$samples = Matrix::gaussian(self::DATASET_SIZE, 4) | ||
->multiply($mask) | ||
->asArray(); | ||
|
||
$labels = Vector::rand(self::DATASET_SIZE) | ||
->greater(0.5) | ||
->asArray(); | ||
|
||
$this->dataset = Labeled::quick($samples, $labels); | ||
|
||
$this->transformer = new DeltaTfIdfTransformer(1.0); | ||
} | ||
|
||
/** | ||
* @Subject | ||
* @Iterations(3) | ||
* @OutputTimeUnit("milliseconds", precision=3) | ||
*/ | ||
public function apply() : void | ||
{ | ||
$this->dataset->apply($this->transformer); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
49 changes: 0 additions & 49 deletions
49
benchmarks/Transformers/RecursiveFeatureEliminatorBench.php
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
<?php | ||
|
||
namespace Rubix\ML\Benchmarks\Transformers; | ||
|
||
use Rubix\ML\Datasets\Unlabeled; | ||
use Rubix\ML\Transformers\TokenHashingVectorizer; | ||
|
||
/** | ||
* @Groups({"Transformers"}) | ||
* @BeforeMethods({"setUp"}) | ||
*/ | ||
class TokenHashingVectorizerBench | ||
{ | ||
protected const DATASET_SIZE = 10000; | ||
|
||
protected const SAMPLE_TEXT = 'Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec at nisl posuere, luctus sapien vel, maximus ex. Curabitur tincidunt, libero at commodo tempor, magna neque malesuada diam, vel blandit metus velit quis magna. Vestibulum auctor libero quam, eu ullamcorper nulla dapibus a. Mauris id ultricies sapien. Integer consequat mi eget vehicula vulputate. Mauris cursus nisi non semper dictum. Quisque luctus ex in tortor laoreet tincidunt. Vestibulum imperdiet purus sit amet sapien dignissim elementum. Mauris tincidunt eget ex eu laoreet. Etiam efficitur quam at purus sagittis hendrerit. Mauris tempus, sem in pulvinar imperdiet, lectus ipsum molestie ante, id semper nunc est sit amet sem. Nulla at justo eleifend, gravida neque eu, consequat arcu. Vivamus bibendum eleifend metus, id elementum orci aliquet ac. Praesent pellentesque nisi vitae tincidunt eleifend. Pellentesque quis ex et lorem laoreet hendrerit ut ac lorem. Aliquam non sagittis est.'; | ||
|
||
/** | ||
* @var array[] | ||
*/ | ||
protected $aSamples; | ||
|
||
/** | ||
* @var array[] | ||
*/ | ||
protected $bSamples; | ||
|
||
public function setUp() : void | ||
{ | ||
$samples = []; | ||
|
||
for ($i = 0; $i < self::DATASET_SIZE; ++$i) { | ||
$text = self::SAMPLE_TEXT; | ||
|
||
str_shuffle($text); | ||
|
||
$samples[] = [$text]; | ||
} | ||
|
||
$this->dataset = Unlabeled::quick($samples); | ||
|
||
$this->transformer = new TokenHashingVectorizer(1000); | ||
} | ||
|
||
/** | ||
* @Subject | ||
* @Iterations(3) | ||
* @OutputTimeUnit("milliseconds", precision=3) | ||
*/ | ||
public function apply() : void | ||
{ | ||
$this->dataset->apply($this->transformer); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.