Skip to content

Commit

Permalink
deleted the old datacite graphql code and replace with the rest api
Browse files Browse the repository at this point in the history
  • Loading branch information
briri committed May 7, 2024
1 parent c3bf526 commit 01378b1
Show file tree
Hide file tree
Showing 11 changed files with 128 additions and 328 deletions.
2 changes: 1 addition & 1 deletion src/lambdas/harvesters/datacite/.aws-sam/build.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# This file is auto generated by SAM CLI build command

[function_build_definitions.66e880ba-8a1c-4a71-9af7-3212a8fef2f9]
[function_build_definitions.d040dc14-b551-4b81-b69b-29ae4079ae63]
codeuri = "/Users/briley/Documents/workspace/dmsp_aws_prototype/src/lambdas/harvesters/datacite"
runtime = "ruby3.2"
architecture = "x86_64"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ Resources:
DYNAMO_INDEX_TABLE: !Ref DynamoIndexTableName
SNS_FATAL_ERROR_TOPIC: !Ref SnsTopicEmailArn
Events:
DataCiteHarvesting:
DataCiteHarvester:
Type: 'EventBridgeRule'
Properties:
DeadLetterConfig:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ Resources:
SNS_FATAL_ERROR_TOPIC:
Ref: SnsTopicEmailArn
Events:
DataCiteHarvesting:
DataCiteHarvester:
Type: EventBridgeRule
Properties:
DeadLetterConfig:
Expand Down
348 changes: 74 additions & 274 deletions src/lambdas/harvesters/datacite/app.rb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/lambdas/harvesters/datacite/template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ Resources:
DYNAMO_INDEX_TABLE: !Ref DynamoIndexTableName
SNS_FATAL_ERROR_TOPIC: !Ref SnsTopicEmailArn
Events:
DataCiteHarvesting:
DataCiteHarvester:
Type: 'EventBridgeRule'
Properties:
DeadLetterConfig:
Expand Down
86 changes: 43 additions & 43 deletions src/lambdas/harvesters/harvestable_dmps/app.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,39 @@ module Functions
class HarvestableDmps
SOURCE = 'Harvestable DMPs'

PILOT_DMPS = [
# Northwestern University
'doi.org/10.48321/D10B3E54E4',
'doi.org/10.48321/D1143FD15F',
'doi.org/10.48321/D139D84658',
'doi.org/10.48321/D1944C8215',
'doi.org/10.48321/D1A04A9B1D',

# University of California, Berkeley
'doi.org/10.48321/D114471AC3',
'doi.org/10.48321/D18F9B93B8',
'doi.org/10.48321/D1BA48FBC9',
'doi.org/10.48321/D1CE350633',
'doi.org/10.48321/D1DF9DDDAF',

# University of California, Riverside
'doi.org/10.48321/D13BEA529C',
'doi.org/10.48321/D14406894e',
'doi.org/10.48321/D145457051',
'doi.org/10.48321/D1FCB77AF0',
'doi.org/10.48321/D1FFBFF8FE',

# University of California, Santa Barbara
'doi.org/10.48321/D154FA23E9',
'doi.org/10.48321/D1A90CCC2B',
'doi.org/10.48321/D1BAD5B94D',
'doi.org/10.48321/D1FFE5D7FD',

# University of Colorado Boulder
'doi.org/10.48321/D14F38aa13',
'doi.org/10.48321/D1B581751F'
]

class << self
def process(event:, context:)
# No need to validate the source and detail-type because that is done by the EventRule
Expand All @@ -41,16 +74,18 @@ def process(event:, context:)
docs = _fetch_relevant_dmps_from_dynamo(client: dynamo_client, table:, logger:)
logger.debug(message: 'Relevant DMP search results: ', details: docs) if logger.respond_to?(:debug)

rors = docs.map { |doc| doc.fetch('affiliation_ids', []) }.flatten.compact.uniq
# rors = docs.map { |doc| doc.fetch('affiliation_ids', []) }.flatten.compact.uniq
affils = docs.map { |doc| doc.fetch('affiliations', []) }.flatten.compact.uniq

# Kick off harvesters for each unique ROR id
publisher = Uc3DmpEventBridge::Publisher.new
rors.each do |ror|
dmps = docs.select { |doc| doc.fetch('affiliation_ids', []).include?(ror) }
affils.each do |affil|
# dmps = docs.select { |doc| doc.fetch('affiliation_ids', []).include?(ror) }
dmps = docs.select { |doc| doc.fetch('affiliations', []).include?(affil) }

# limit the number of DMPs we send at one time because SNS has a size limit
dmps.each_slice(50) do |dmp_entries|
_kick_off_harvester(ror:, dmps: dmp_entries, publisher:, logger:)
_kick_off_harvester(affil:, dmps: dmp_entries, publisher:, logger:)
end

# Pause for a second. Publishing these messages kicks off multiple Lambda harvesters and we
Expand Down Expand Up @@ -91,10 +126,10 @@ def _open_search_connect(logger:)
puts e.backtrace
end

def _kick_off_harvester(ror:, dmps:, publisher: nil, logger: nil)
def _kick_off_harvester(affil:, dmps:, publisher: nil, logger: nil)
# Publish the change to the EventBridge
publisher = Uc3DmpEventBridge::Publisher.new if publisher.nil?
publisher.publish(source: 'HarvestableDmps', event_type: 'Harvest', dmp: {}, detail: { ror:, dmps: }, logger:)
publisher.publish(source: 'HarvestableDmps', event_type: 'Harvest', dmp: {}, detail: { ror: affil, dmps: }, logger:)
end

# Instead of OpenSearch (for now) grab the relevant DMPs from our Dynamo INdex table
Expand All @@ -116,7 +151,7 @@ def _dynamo_scan(client:, table:, items: [], last_key: '', logger: nil)
args = {
table_name: table,
consistent_read: false,
projection_expression: 'PK, affiliation_ids',
projection_expression: 'PK, affiliations',
# expression_attribute_values: {
# ':sk': 'METADATA',
# ':not_empty': '',
Expand All @@ -127,42 +162,7 @@ def _dynamo_scan(client:, table:, items: [], last_key: '', logger: nil)
# filter_expression: expr.join(' AND ')

expression_attribute_values: {
':dmp_pks': [
=begin
# Northwestern University
'doi.org/10.48321/D10B3E54E4',
'doi.org/10.48321/D1944C8215',
'doi.org/10.48321/D139D84658',
'doi.org/10.48321/D1A04A9B1D',
# University of Colorado Boulder
'doi.org/10.48321/D14F38aa13',
'doi.org/10.48321/D1B581751F',
# University of California, Santa Barbara
'doi.org/10.48321/D1BAD5B94D',
'doi.org/10.48321/D1FFE5D7FD',
'doi.org/10.48321/D1A90CCC2B',
'doi.org/10.48321/D154FA23E9',
=end
# University of California, Berkeley
'doi.org/10.48321/D114471AC3',
'doi.org/10.48321/D1DF9DDDAF',
'doi.org/10.48321/D18F9B93B8',
'doi.org/10.48321/D1BA48FBC9',
'doi.org/10.48321/D1CE350633',
=begin
# University of California, Riverside
'doi.org/10.48321/D14406894e',
'doi.org/10.48321/D145457051',
'doi.org/10.48321/D1FFBFF8FE',
'doi.org/10.48321/D1FCB77AF0',
'doi.org/10.48321/D13BEA529C',
# Boston University
'doi.org/10.48321/D1A04A9B1D'
=end
]
':dmp_pks': PILOT_DMPS
},
filter_expression: 'contains(:dmp_pks, dmp_id)'
}
Expand Down
10 changes: 5 additions & 5 deletions src/lambdas/layers/baseline/Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,16 @@ GEM
public_suffix (>= 2.0.2, < 6.0)
ast (2.4.2)
aws-eventstream (1.3.0)
aws-partitions (1.923.0)
aws-partitions (1.925.0)
aws-sdk-cognitoidentityprovider (1.90.0)
aws-sdk-core (~> 3, >= 3.193.0)
aws-sigv4 (~> 1.1)
aws-sdk-core (3.194.0)
aws-sdk-core (3.194.1)
aws-eventstream (~> 1, >= 1.3.0)
aws-partitions (~> 1, >= 1.651.0)
aws-sigv4 (~> 1.8)
jmespath (~> 1, >= 1.6.1)
aws-sdk-dynamodb (1.107.0)
aws-sdk-dynamodb (1.108.0)
aws-sdk-core (~> 3, >= 3.193.0)
aws-sigv4 (~> 1.1)
aws-sdk-eventbridge (1.58.0)
Expand Down Expand Up @@ -120,7 +120,7 @@ GEM
rubocop (~> 1.41)
rubocop-factory_bot (2.25.1)
rubocop (~> 1.41)
rubocop-rspec (2.29.1)
rubocop-rspec (2.29.2)
rubocop (~> 1.40)
rubocop-capybara (~> 2.17)
rubocop-factory_bot (~> 2.22)
Expand Down Expand Up @@ -153,7 +153,7 @@ GEM
aws-sdk-ssm (~> 1.150)
httparty (~> 0.21.0)
json (~> 2.6)
uc3-dmp-id (0.1.37)
uc3-dmp-id (0.1.39)
json (~> 2.6)
json-schema (~> 3.0)
text (~> 1.3)
Expand Down
2 changes: 1 addition & 1 deletion src/sam/gems/uc3-dmp-id/lib/uc3-dmp-id/comparator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def compare(hash:)
response = _text_match?(type: 'title', text: hash['title'], dmp:, response:) if response[:score].positive?
response = _text_match?(type: 'abstract', text: hash['description'], dmp:, response:) if response[:score].positive?
# If the score is less than 3 then we have no confidence that it is a match
next if response[:score] <= 2
# next if response[:score] <= 2

# Set the confidence level based on the score
response[:dmp_id] = "DMP##{dmp['dmp_id']}"
Expand Down
2 changes: 1 addition & 1 deletion src/sam/gems/uc3-dmp-id/lib/uc3-dmp-id/version.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# frozen_string_literal: true

module Uc3DmpId
VERSION = '0.1.38'
VERSION = '0.1.39'
end
Binary file removed src/sam/gems/uc3-dmp-id/uc3-dmp-id-0.1.38.gem
Binary file not shown.
Binary file added src/sam/gems/uc3-dmp-id/uc3-dmp-id-0.1.39.gem
Binary file not shown.

0 comments on commit 01378b1

Please sign in to comment.