From 3dac78179e9ab73eb6110d0b78cb07825e058e46 Mon Sep 17 00:00:00 2001 From: Lucas Carlson Date: Wed, 31 Jul 2024 07:40:52 -0700 Subject: [PATCH 1/8] Fix word count calculation when removing category in Bayes classifier - Move total word count reduction before category deletion - Ensure accurate word count maintenance during category removal --- lib/classifier/bayes.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/classifier/bayes.rb b/lib/classifier/bayes.rb index 4845213..5175ef1 100644 --- a/lib/classifier/bayes.rb +++ b/lib/classifier/bayes.rb @@ -152,10 +152,11 @@ def remove_category(category) category = category.prepare_category_name raise StandardError, "No such category: #{category}" unless @categories.key?(category) + @total_words -= @category_word_count[category].to_i + @categories.delete(category) @category_counts.delete(category) @category_word_count.delete(category) - @total_words -= @category_word_count[category].to_i end end end From 6b53130c0a684ca92ac0b0f246cd7d966a38f3e8 Mon Sep 17 00:00:00 2001 From: Lucas Carlson Date: Wed, 31 Jul 2024 07:45:13 -0700 Subject: [PATCH 2/8] add remove_category tests --- Gemfile.lock | 2 +- test/bayes/bayesian_test.rb | 30 ++++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/Gemfile.lock b/Gemfile.lock index a00a57d..daf06f5 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,7 +1,7 @@ PATH remote: . specs: - classifier (1.4.0) + classifier (1.4.1) fast-stemmer (~> 1.0.0) rake diff --git a/test/bayes/bayesian_test.rb b/test/bayes/bayesian_test.rb index b337032..171d5c8 100644 --- a/test/bayes/bayesian_test.rb +++ b/test/bayes/bayesian_test.rb @@ -94,4 +94,34 @@ def test_remove_category_preserves_other_category_data assert_equal interesting_classification, @classifier.classify('This is interesting') end + + def test_remove_category + initial_total_words = @classifier.instance_variable_get(:@total_words) + category_word_count = @classifier.instance_variable_get(:@category_word_count)['Interesting'] + + @classifier.remove_category('Interesting') + + assert_nil @classifier.instance_variable_get(:@categories)['Interesting'] + assert_equal @classifier.instance_variable_get(:@category_counts)['Interesting'], 0 + assert_equal @classifier.instance_variable_get(:@category_word_count)['Interesting'], 0 + + new_total_words = @classifier.instance_variable_get(:@total_words) + assert_equal initial_total_words - category_word_count, new_total_words + end + + def test_remove_category_updates_total_words_before_deletion + initial_total_words = @classifier.instance_variable_get(:@total_words) + category_word_count = @classifier.instance_variable_get(:@category_word_count)['Interesting'] + + @classifier.remove_category('Interesting') + + new_total_words = @classifier.instance_variable_get(:@total_words) + assert_equal initial_total_words - category_word_count, new_total_words + end + + def test_remove_nonexistent_category + assert_raises(StandardError, 'No such category: Nonexistent Category') do + @classifier.remove_category('Nonexistent Category') + end + end end From 5bf5f1c277393fd476f1b3539167f80816828057 Mon Sep 17 00:00:00 2001 From: Lucas Carlson Date: Wed, 31 Jul 2024 07:45:45 -0700 Subject: [PATCH 3/8] Bump classifier gem version to 1.4.2 --- classifier.gemspec | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/classifier.gemspec b/classifier.gemspec index acdc60e..52e58fd 100644 --- a/classifier.gemspec +++ b/classifier.gemspec @@ -1,6 +1,6 @@ Gem::Specification.new do |s| s.name = 'classifier' - s.version = '1.4.1' + s.version = '1.4.2' s.summary = 'A general classifier module to allow Bayesian and other types of classifications.' s.description = 'A general classifier module to allow Bayesian and other types of classifications.' s.author = 'Lucas Carlson' From 1e5b24e259cd9b58d4ef32759d8d9e7d14bd5d8b Mon Sep 17 00:00:00 2001 From: Lucas Carlson Date: Wed, 31 Jul 2024 07:46:01 -0700 Subject: [PATCH 4/8] Update Ruby CI workflow - Reduce tested Ruby versions to 2.7 and head --- .github/workflows/ruby.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml index df1ffab..f80d4a1 100644 --- a/.github/workflows/ruby.yml +++ b/.github/workflows/ruby.yml @@ -22,7 +22,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - ruby-version: ['2.7', '3.0', '3.1', '3.2', 'head'] + ruby-version: ['2.7', 'head'] steps: - uses: actions/checkout@v4 From aa67f7e5561b6330e39e7b5d7ef79e26070dc195 Mon Sep 17 00:00:00 2001 From: Lucas Carlson Date: Wed, 31 Jul 2024 07:47:22 -0700 Subject: [PATCH 5/8] Bump classifier gem version to 1.4.2 - Update classifier gem from version 1.4.1 to 1.4.2 in Gemfile.lock --- Gemfile.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Gemfile.lock b/Gemfile.lock index daf06f5..f366807 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,7 +1,7 @@ PATH remote: . specs: - classifier (1.4.1) + classifier (1.4.2) fast-stemmer (~> 1.0.0) rake From ed07d6c721c211fcd8dbddf060d583f7a08f60e6 Mon Sep 17 00:00:00 2001 From: Lucas Carlson Date: Wed, 31 Jul 2024 07:52:26 -0700 Subject: [PATCH 6/8] Refactor Bayesian classifier tests - Remove test for nonexistent category removal - Rename and update test for category removal - Add mutex_m requirement in test helper --- test/bayes/bayesian_test.rb | 8 +------- test/test_helper.rb | 1 + 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/test/bayes/bayesian_test.rb b/test/bayes/bayesian_test.rb index 171d5c8..35148d3 100644 --- a/test/bayes/bayesian_test.rb +++ b/test/bayes/bayesian_test.rb @@ -54,12 +54,6 @@ def test_remove_category assert_equal ['Interesting'], @classifier.categories end - def test_remove_nonexistent_category - assert_raises(StandardError) do - @classifier.remove_category 'NonexistentCategory' - end - end - def test_remove_category_affects_classification @classifier.train_interesting 'This is interesting content' @classifier.train_uninteresting 'This is uninteresting content' @@ -95,7 +89,7 @@ def test_remove_category_preserves_other_category_data assert_equal interesting_classification, @classifier.classify('This is interesting') end - def test_remove_category + def test_remove_category_check_counts initial_total_words = @classifier.instance_variable_get(:@total_words) category_word_count = @classifier.instance_variable_get(:@category_word_count)['Interesting'] diff --git a/test/test_helper.rb b/test/test_helper.rb index b5e6f1d..4808dc5 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -1,5 +1,6 @@ $:.unshift(File.dirname(__FILE__) + '/../lib') +require 'mutex_m' require 'minitest' require 'minitest/autorun' require 'classifier' From daeb29d6a04cbcdc5f092d66049ce2fe6709c014 Mon Sep 17 00:00:00 2001 From: Lucas Carlson Date: Wed, 31 Jul 2024 07:55:02 -0700 Subject: [PATCH 7/8] Add mutex_m dependency and update fast-stemmer version - Add mutex_m gem to Gemfile and gemspec - Update fast-stemmer dependency version in gemspec - Remove explicit require of mutex_m in test_helper.rb --- Gemfile | 1 + Gemfile.lock | 2 ++ classifier.gemspec | 3 ++- test/test_helper.rb | 1 - 4 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Gemfile b/Gemfile index b44b21c..a23fa63 100644 --- a/Gemfile +++ b/Gemfile @@ -3,3 +3,4 @@ gemspec gem 'fast-stemmer' gem 'matrix' +gem 'mutex_m' diff --git a/Gemfile.lock b/Gemfile.lock index f366807..85692e8 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -11,6 +11,7 @@ GEM fast-stemmer (1.0.2) matrix (0.4.2) minitest (5.18.1) + mutex_m (0.2.0) psych (5.1.2) stringio rake (13.0.6) @@ -28,6 +29,7 @@ DEPENDENCIES fast-stemmer matrix minitest + mutex_m rdoc BUNDLED WITH diff --git a/classifier.gemspec b/classifier.gemspec index 52e58fd..5811c5d 100644 --- a/classifier.gemspec +++ b/classifier.gemspec @@ -9,7 +9,8 @@ Gem::Specification.new do |s| s.files = Dir['{lib}/**/*.rb', 'bin/*', 'LICENSE', '*.md', 'test/*'] s.license = 'LGPL' - s.add_dependency 'fast-stemmer', '~> 1.0.0' + s.add_dependency 'fast-stemmer', '~> 1.0' + s.add_dependency 'mutex_m', '~> 0.2' s.add_dependency 'rake' s.add_development_dependency 'minitest' s.add_development_dependency 'rdoc' diff --git a/test/test_helper.rb b/test/test_helper.rb index 4808dc5..b5e6f1d 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -1,6 +1,5 @@ $:.unshift(File.dirname(__FILE__) + '/../lib') -require 'mutex_m' require 'minitest' require 'minitest/autorun' require 'classifier' From 9b559f2d06045261a57b7758632fed55ce92c09b Mon Sep 17 00:00:00 2001 From: Lucas Carlson Date: Wed, 31 Jul 2024 07:56:34 -0700 Subject: [PATCH 8/8] Update classifier gem dependencies - Loosen fast-stemmer version constraint to ~> 1.0 - Add mutex_m dependency with version ~> 0.2 --- Gemfile.lock | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Gemfile.lock b/Gemfile.lock index 85692e8..99dc08d 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -2,7 +2,8 @@ PATH remote: . specs: classifier (1.4.2) - fast-stemmer (~> 1.0.0) + fast-stemmer (~> 1.0) + mutex_m (~> 0.2) rake GEM