-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #58 from MITLibraries/tco38-journals-model-and-loader
Adds Detector::Journal class
- Loading branch information
Showing
6 changed files
with
159 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
# frozen_string_literal: true | ||
|
||
# Detectors are classes that implement various algorithms that allow us to identify patterns | ||
# within search terms. | ||
module Detector | ||
def self.table_name_prefix | ||
'detector_' | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
# frozen_string_literal: true | ||
|
||
# == Schema Information | ||
# | ||
# Table name: detector_journals | ||
# | ||
# id :integer not null, primary key | ||
# name :string | ||
# additional_info :json | ||
# created_at :datetime not null | ||
# updated_at :datetime not null | ||
# | ||
module Detector | ||
# Detector::Journal stores information about academic journals loaded from external sources to allow us to check our | ||
# incoming Terms against these information | ||
class Journal < ApplicationRecord | ||
before_save :downcase_fields! | ||
|
||
# Identify journals in which the incoming phrase matches a Journal.name exactly | ||
# | ||
# @note We always store the Journal.name downcased, so we should also always downcase the phrase | ||
# when matching | ||
# | ||
# @note In reality, multiple Journals can exist with the same name. Therefore, we don't enforce | ||
# unique names and don't expect a single Journal to be returned. | ||
# | ||
# @param phrase [String]. A string representation of a search term (not an actual Term object!) | ||
# | ||
# @return [Set of Detector::Journal] A set of ActiveRecord Detector::Journal relations. | ||
def self.full_term_match(phrase) | ||
Journal.where(name: phrase.downcase) | ||
end | ||
|
||
# Identify journals in which the incoming phrase contains one or more Journal names | ||
# | ||
# @note This likely won't scale well and may not be suitable for live detection as it loads all Journal records. | ||
# | ||
# @param phrase [String]. A string representation of a search term (not an actual Term object!) | ||
# | ||
# @return [Set of Detector::Journal] A set of ActiveRecord Detector::Journal relations. | ||
def self.partial_term_match(phrase) | ||
Journal.all.map { |journal| journal if phrase.downcase.include?(journal.name) }.compact | ||
end | ||
|
||
private | ||
|
||
# Downcasing all names before saving allows for more efficient matching by ensuring our index is lowercase. | ||
# If we find we need the non-lowercase Journal name in the future, we could store that as `additional_info` json | ||
def downcase_fields! | ||
name.downcase! | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
class CreateDetectorJournals < ActiveRecord::Migration[7.1] | ||
def change | ||
create_table :detector_journals do |t| | ||
t.string :name | ||
t.json :additional_info | ||
|
||
t.timestamps | ||
end | ||
add_index :detector_journals, :name | ||
end | ||
end |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
# == Schema Information | ||
# | ||
# Table name: detector_journals | ||
# | ||
# id :integer not null, primary key | ||
# name :string | ||
# additional_info :json | ||
# created_at :datetime not null | ||
# updated_at :datetime not null | ||
# | ||
|
||
# Note: fixtures bypass ActiveRecord callbacks so while our model auto downcases titles, | ||
# these fixtures will be stored mixed case unless they are all manually downcased here. | ||
# Put another way, please make sure to always use downcase/lowercase for the 'name' in these fixtures | ||
# to properly match the real behavior of the application. | ||
nature: { | ||
name: nature, | ||
additional_info: {issns: ['0028-0836', '1476-4687']} | ||
} | ||
|
||
the_new_england_journal_of_medicine: { | ||
name: the new england journal of medicine, | ||
additional_info: {issns: ['0028-4793', '1533-4406']} | ||
} | ||
|
||
nature_medicine: { | ||
name: nature medicine, | ||
additional_info: {issns: ['1078-8956', '1546-170X']} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
# frozen_string_literal: true | ||
|
||
# == Schema Information | ||
# | ||
# Table name: detector_journals | ||
# | ||
# id :integer not null, primary key | ||
# name :string | ||
# additional_info :json | ||
# created_at :datetime not null | ||
# updated_at :datetime not null | ||
# | ||
require 'test_helper' | ||
|
||
module Detector | ||
class JournalTest < ActiveSupport::TestCase | ||
test 'exact term match on journal name' do | ||
expected = detector_journals('the_new_england_journal_of_medicine') | ||
actual = Detector::Journal.full_term_match('the new england journal of medicine') | ||
|
||
assert actual.count == 1 | ||
assert_equal(expected, actual.first) | ||
end | ||
|
||
test 'mixed case exact term match on journal name' do | ||
expected = detector_journals('the_new_england_journal_of_medicine') | ||
actual = Detector::Journal.full_term_match('The New England Journal of Medicine') | ||
|
||
assert actual.count == 1 | ||
assert_equal(expected, actual.first) | ||
end | ||
|
||
test 'exact match within longer term returns no matches' do | ||
actual = Detector::Journal.full_term_match('The New England Journal of Medicine, 1999') | ||
assert actual.count.zero? | ||
end | ||
|
||
test 'phrase match within longer term returns matches' do | ||
actual = Detector::Journal.partial_term_match('words and stuff The New England Journal of Medicine, 1999') | ||
assert actual.count == 1 | ||
end | ||
|
||
test 'multple matches can happen with phrase matching within longer terms' do | ||
actual = Detector::Journal.partial_term_match('words and stuff Nature medicine, 1999') | ||
assert actual.count == 2 | ||
end | ||
end | ||
end |