Skip to content

Commit

Permalink
Merge pull request #58 from MITLibraries/tco38-journals-model-and-loader
Browse files Browse the repository at this point in the history
Adds Detector::Journal class
  • Loading branch information
JPrevost authored Jul 15, 2024
2 parents e4e9a51 + 59e8f7a commit e394f7e
Show file tree
Hide file tree
Showing 6 changed files with 159 additions and 1 deletion.
9 changes: 9 additions & 0 deletions app/models/detector.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# frozen_string_literal: true

# Detectors are classes that implement various algorithms that allow us to identify patterns
# within search terms.
module Detector
def self.table_name_prefix
'detector_'
end
end
53 changes: 53 additions & 0 deletions app/models/detector/journal.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# frozen_string_literal: true

# == Schema Information
#
# Table name: detector_journals
#
# id :integer not null, primary key
# name :string
# additional_info :json
# created_at :datetime not null
# updated_at :datetime not null
#
module Detector
# Detector::Journal stores information about academic journals loaded from external sources to allow us to check our
# incoming Terms against these information
class Journal < ApplicationRecord
before_save :downcase_fields!

# Identify journals in which the incoming phrase matches a Journal.name exactly
#
# @note We always store the Journal.name downcased, so we should also always downcase the phrase
# when matching
#
# @note In reality, multiple Journals can exist with the same name. Therefore, we don't enforce
# unique names and don't expect a single Journal to be returned.
#
# @param phrase [String]. A string representation of a search term (not an actual Term object!)
#
# @return [Set of Detector::Journal] A set of ActiveRecord Detector::Journal relations.
def self.full_term_match(phrase)
Journal.where(name: phrase.downcase)
end

# Identify journals in which the incoming phrase contains one or more Journal names
#
# @note This likely won't scale well and may not be suitable for live detection as it loads all Journal records.
#
# @param phrase [String]. A string representation of a search term (not an actual Term object!)
#
# @return [Set of Detector::Journal] A set of ActiveRecord Detector::Journal relations.
def self.partial_term_match(phrase)
Journal.all.map { |journal| journal if phrase.downcase.include?(journal.name) }.compact
end

private

# Downcasing all names before saving allows for more efficient matching by ensuring our index is lowercase.
# If we find we need the non-lowercase Journal name in the future, we could store that as `additional_info` json
def downcase_fields!
name.downcase!
end
end
end
11 changes: 11 additions & 0 deletions db/migrate/20240701205444_create_detector_journals.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
class CreateDetectorJournals < ActiveRecord::Migration[7.1]
def change
create_table :detector_journals do |t|
t.string :name
t.json :additional_info

t.timestamps
end
add_index :detector_journals, :name
end
end
10 changes: 9 additions & 1 deletion db/schema.rb

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

29 changes: 29 additions & 0 deletions test/fixtures/detector/journals.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# == Schema Information
#
# Table name: detector_journals
#
# id :integer not null, primary key
# name :string
# additional_info :json
# created_at :datetime not null
# updated_at :datetime not null
#

# Note: fixtures bypass ActiveRecord callbacks so while our model auto downcases titles,
# these fixtures will be stored mixed case unless they are all manually downcased here.
# Put another way, please make sure to always use downcase/lowercase for the 'name' in these fixtures
# to properly match the real behavior of the application.
nature: {
name: nature,
additional_info: {issns: ['0028-0836', '1476-4687']}
}

the_new_england_journal_of_medicine: {
name: the new england journal of medicine,
additional_info: {issns: ['0028-4793', '1533-4406']}
}

nature_medicine: {
name: nature medicine,
additional_info: {issns: ['1078-8956', '1546-170X']}
}
48 changes: 48 additions & 0 deletions test/models/detector/journal_test.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# frozen_string_literal: true

# == Schema Information
#
# Table name: detector_journals
#
# id :integer not null, primary key
# name :string
# additional_info :json
# created_at :datetime not null
# updated_at :datetime not null
#
require 'test_helper'

module Detector
class JournalTest < ActiveSupport::TestCase
test 'exact term match on journal name' do
expected = detector_journals('the_new_england_journal_of_medicine')
actual = Detector::Journal.full_term_match('the new england journal of medicine')

assert actual.count == 1
assert_equal(expected, actual.first)
end

test 'mixed case exact term match on journal name' do
expected = detector_journals('the_new_england_journal_of_medicine')
actual = Detector::Journal.full_term_match('The New England Journal of Medicine')

assert actual.count == 1
assert_equal(expected, actual.first)
end

test 'exact match within longer term returns no matches' do
actual = Detector::Journal.full_term_match('The New England Journal of Medicine, 1999')
assert actual.count.zero?
end

test 'phrase match within longer term returns matches' do
actual = Detector::Journal.partial_term_match('words and stuff The New England Journal of Medicine, 1999')
assert actual.count == 1
end

test 'multple matches can happen with phrase matching within longer terms' do
actual = Detector::Journal.partial_term_match('words and stuff Nature medicine, 1999')
assert actual.count == 2
end
end
end

0 comments on commit e394f7e

Please sign in to comment.