-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
simplify docker compose layers, update hyku and bulkrax
- Loading branch information
1 parent
144c62c
commit 0eeb409
Showing
3 changed files
with
213 additions
and
168 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,125 +2,124 @@ | |
|
||
# Ensure Knapsack version gets loaded after Hyku's bulkrax.rb | ||
Rails.application.config.after_initialize do | ||
if ENV.fetch('HYKU_BULKRAX_ENABLED', 'true') == 'true' | ||
Bulkrax.setup do |config| | ||
## | ||
# By default this is the first registered curation concern. But based on tests and past | ||
# configs, this should be 'GenericWork'. Note: The below value could change, but it should be | ||
# explicit. | ||
# | ||
# See https://github.com/samvera/hyku/blob/07fde572f9152d513b13f71cae90dd4fdfbfba6c/config/initializers/hyrax.rb#L200-L202 | ||
config.default_work_type = 'GenericWork' | ||
|
||
# Setting the available parsers for Adventist. | ||
config.parsers = [ | ||
{ name: "OAI - Adventist Digital Library", class_name: "Bulkrax::OaiAdventistQdcParser", partial: "oai_adventist_fields" }, | ||
{ name: "CSV - Comma Separated Values", class_name: "Bulkrax::CsvParser", partial: "csv_fields" }, | ||
] | ||
|
||
# Should Bulkrax make up source identifiers for you? This allow round tripping | ||
# and download errored entries to still work, but does mean if you upload the | ||
# same source record in two different files you WILL get duplicates. | ||
# It is given two aruguments, self at the time of call and the index of the reocrd | ||
# config.fill_in_blank_source_identifiers = ->(parser, index) { "b-#{parser.importer.id}-#{index}"} | ||
# or use a uuid | ||
# config.fill_in_blank_source_identifiers = ->(parser, index) { SecureRandom.uuid } | ||
|
||
# Field mappings | ||
# Create a completely new set of mappings by replacing the whole set as follows | ||
# config.field_mappings = { | ||
# "Bulkrax::OaiDcParser" => { **individual field mappings go here*** } | ||
# } | ||
|
||
# Add to, or change existing mappings as follows | ||
# e.g. to exclude date | ||
# config.field_mappings["Bulkrax::OaiDcParser"]["date"] = { from: ["date"], excluded: true } | ||
# | ||
# # e.g. to add the required source_identifier field | ||
# # config.field_mappings["Bulkrax::CsvParser"]["source_id"] = { from: ["old_source_id"], source_identifier: true } | ||
# If you want Bulkrax to fill in source_identifiers for you, see below | ||
|
||
# To duplicate a set of mappings from one parser to another | ||
# config.field_mappings["Bulkrax::OaiOmekaParser"] = {} | ||
# config.field_mappings["Bulkrax::OaiDcParser"].each {|key,value| config.field_mappings["Bulkrax::OaiOmekaParser"][key] = value } | ||
config.field_mappings['Bulkrax::OaiAdventistQdcParser'] = { | ||
'abstract' => { from: ['abstract'] }, | ||
'aark_id' => { from: ['aark_id'] }, | ||
'identifier' => { from: ['identifier'], source_identifier: true }, | ||
'bibliographic_citation' => { from: ['bibliographic_citation'] }, | ||
'creator' => { from: ['creator'] }, | ||
'contributor' => { from: ['contributor'] }, | ||
'edition' => { from: ['edition'] }, | ||
'resource_type' => { from: ['resource_type'] }, | ||
'issue_number' => { from: ['issue_number'] }, | ||
'language' => { from: ['language'] }, | ||
'description' => { from: ['description'] }, | ||
'pagination' => { from: ['pagination'] }, | ||
'extent' => { from: ['extent'], split: ';' }, | ||
'source' => { from: ['source'] }, | ||
'date_issued' => { from: ['date_issued'] }, | ||
'alt' => { from: ['geocode'] }, | ||
'publisher' => { from: ['publisher'], split: ';' }, | ||
'rights_statement' => { from: ['rights_statement'] }, | ||
'part_of' => { from: ['part_of'] }, | ||
'part' => { from: ['part_of'] }, | ||
'date_created' => { from: ['date_created'] }, | ||
'title' => { from: ['title'] }, | ||
'subject' => { from: ['subject'], split: ';' }, | ||
'volume_number' => { from: ['volume_number'] }, | ||
'keyword' => { from: ['keyword'], split: ';' }, | ||
'location' => { from: ['location'], split: ';' }, | ||
'model' => { from: ['model', 'work_type'] }, | ||
'remote_files' => { from: ['related_url'], split: ';', parsed: true }, | ||
'thumbnail_url' => { from: ['thumbnail_url'], default_thumbnail: true, parsed: true }, | ||
'video_embed' => { from: ['video_embed'] }, | ||
'refereed' => { from: ['peer_reviewed'] } | ||
} | ||
config.field_mappings['Bulkrax::CsvParser'] = { | ||
'abstract' => { from: ['description.abstract'] }, | ||
'aark_id' => { from: ['identifier.ark'] }, | ||
'identifier' => { from: ['identifier'], source_identifier: true }, | ||
'bibliographic_citation' => { from: ['identifier.bibliographicCitation'] }, | ||
'creator' => { from: ['creator'], split: ';' }, | ||
'contributor' => { from: ['contributor'], split: ';' }, | ||
'edition' => { from: ['title.release'] }, | ||
'resource_type' => { from: ['type'] }, | ||
'issue_number' => { from: ['relation.isPartOfIssue'] }, | ||
'language' => { from: ['language'], split: ';' }, | ||
'description' => { from: ['description'], split: ';' }, | ||
'pagination' => { from: ['format.extent'] }, | ||
'extent' => { from: ['format.extent'], split: ';' }, | ||
'source' => { from: ['source'], split: ';' }, | ||
'date_issued' => { from: ['date'] }, | ||
'alt' => { from: ['coverage.spatial'] }, | ||
'publisher' => { from: ['publisher'], split: ';' }, | ||
'rights_statement' => { from: ['rights'] }, | ||
'part_of' => { from: ['relation.isPartOf'], split: ';' }, | ||
'part' => { from: ['relation.isPartOf'] }, | ||
'date_created' => { from: ['date.other'] }, | ||
'title' => { from: ['title'] }, | ||
'subject' => { from: ['subject'], split: ';' }, | ||
'volume_number' => { from: ['relation.isPartOfVolume'] }, | ||
'keyword' => { from: ['keyword'], split: ';' }, | ||
'location' => { from: ['location'], split: ';' }, | ||
'model' => { from: ['work_type'] }, | ||
'remote_files' => { from: ['related_url'], split: ';', parsed: true }, | ||
'remote_url' => { from: ['official_url', 'remote_url'], split: ';' }, | ||
'thumbnail_url' => { from: ['thumbnail_url'], default_thumbnail: true, parsed: true }, | ||
'video_embed' => { from: ['video_embed'] }, | ||
'refereed' => { from: ['peer_reviewed'] } | ||
} | ||
Bulkrax.setup do |config| | ||
## | ||
# By default this is the first registered curation concern. But based on tests and past | ||
# configs, this should be 'GenericWork'. Note: The below value could change, but it should be | ||
# explicit. | ||
# | ||
# See https://github.com/samvera/hyku/blob/07fde572f9152d513b13f71cae90dd4fdfbfba6c/config/initializers/hyrax.rb#L200-L202 | ||
config.default_work_type = 'GenericWork' | ||
|
||
# Setting the available parsers for Adventist. | ||
config.parsers = [ | ||
{ name: "OAI - Adventist Digital Library", class_name: "Bulkrax::OaiAdventistQdcParser", partial: "oai_adventist_fields" }, | ||
{ name: "CSV - Comma Separated Values", class_name: "Bulkrax::CsvParser", partial: "csv_fields" }, | ||
] | ||
|
||
# Should Bulkrax make up source identifiers for you? This allow round tripping | ||
# and download errored entries to still work, but does mean if you upload the | ||
# same source record in two different files you WILL get duplicates. | ||
# It is given two aruguments, self at the time of call and the index of the reocrd | ||
# config.fill_in_blank_source_identifiers = ->(parser, index) { "b-#{parser.importer.id}-#{index}"} | ||
# or use a uuid | ||
# config.fill_in_blank_source_identifiers = ->(parser, index) { SecureRandom.uuid } | ||
|
||
config.field_mappings['Bulkrax::CsvParser'].merge!( | ||
'parents' => { from: ['parents'], split: /\s*[;|]\s*/, related_parents_field_mapping: true }, | ||
'children' => { from: ['children'], split: /\s*[;|]\s*/, related_children_field_mapping: true } | ||
) | ||
# Field mappings | ||
# Create a completely new set of mappings by replacing the whole set as follows | ||
# config.field_mappings = { | ||
# "Bulkrax::OaiDcParser" => { **individual field mappings go here*** } | ||
# } | ||
|
||
# Lambda to set the default field mapping | ||
config.default_field_mapping = lambda do |field| | ||
return if field.blank? | ||
{ | ||
field.to_s => | ||
# Add to, or change existing mappings as follows | ||
# e.g. to exclude date | ||
# config.field_mappings["Bulkrax::OaiDcParser"]["date"] = { from: ["date"], excluded: true } | ||
# | ||
# # e.g. to add the required source_identifier field | ||
# # config.field_mappings["Bulkrax::CsvParser"]["source_id"] = { from: ["old_source_id"], source_identifier: true } | ||
# If you want Bulkrax to fill in source_identifiers for you, see below | ||
|
||
# To duplicate a set of mappings from one parser to another | ||
# config.field_mappings["Bulkrax::OaiOmekaParser"] = {} | ||
# config.field_mappings["Bulkrax::OaiDcParser"].each {|key,value| config.field_mappings["Bulkrax::OaiOmekaParser"][key] = value } | ||
config.field_mappings['Bulkrax::OaiAdventistQdcParser'] = { | ||
'abstract' => { from: ['abstract'] }, | ||
'aark_id' => { from: ['aark_id'] }, | ||
'identifier' => { from: ['identifier'], source_identifier: true }, | ||
'bibliographic_citation' => { from: ['bibliographic_citation'] }, | ||
'creator' => { from: ['creator'] }, | ||
'contributor' => { from: ['contributor'] }, | ||
'edition' => { from: ['edition'] }, | ||
'resource_type' => { from: ['resource_type'] }, | ||
'issue_number' => { from: ['issue_number'] }, | ||
'language' => { from: ['language'] }, | ||
'description' => { from: ['description'] }, | ||
'pagination' => { from: ['pagination'] }, | ||
'extent' => { from: ['extent'], split: ';' }, | ||
'source' => { from: ['source'] }, | ||
'date_issued' => { from: ['date_issued'] }, | ||
'alt' => { from: ['geocode'] }, | ||
'publisher' => { from: ['publisher'], split: ';' }, | ||
'rights_statement' => { from: ['rights_statement'] }, | ||
'part_of' => { from: ['part_of'] }, | ||
'part' => { from: ['part_of'] }, | ||
'date_created' => { from: ['date_created'] }, | ||
'title' => { from: ['title'] }, | ||
'subject' => { from: ['subject'], split: ';' }, | ||
'volume_number' => { from: ['volume_number'] }, | ||
'keyword' => { from: ['keyword'], split: ';' }, | ||
'location' => { from: ['location'], split: ';' }, | ||
'model' => { from: ['model', 'work_type'] }, | ||
'remote_files' => { from: ['related_url'], split: ';', parsed: true }, | ||
'thumbnail_url' => { from: ['thumbnail_url'], default_thumbnail: true, parsed: true }, | ||
'video_embed' => { from: ['video_embed'] }, | ||
'refereed' => { from: ['peer_reviewed'] } | ||
} | ||
config.field_mappings['Bulkrax::CsvParser'] = { | ||
'abstract' => { from: ['description.abstract'] }, | ||
'aark_id' => { from: ['identifier.ark'] }, | ||
'identifier' => { from: ['identifier'], source_identifier: true }, | ||
'bibliographic_citation' => { from: ['identifier.bibliographicCitation'] }, | ||
'creator' => { from: ['creator'], split: ';' }, | ||
'contributor' => { from: ['contributor'], split: ';' }, | ||
'edition' => { from: ['title.release'] }, | ||
'resource_type' => { from: ['type'] }, | ||
'issue_number' => { from: ['relation.isPartOfIssue'] }, | ||
'language' => { from: ['language'], split: ';' }, | ||
'description' => { from: ['description'], split: ';' }, | ||
'pagination' => { from: ['format.extent'] }, | ||
'extent' => { from: ['format.extent'], split: ';' }, | ||
'source' => { from: ['source'], split: ';' }, | ||
'date_issued' => { from: ['date'] }, | ||
'alt' => { from: ['coverage.spatial'] }, | ||
'publisher' => { from: ['publisher'], split: ';' }, | ||
'rights_statement' => { from: ['rights'] }, | ||
'part_of' => { from: ['relation.isPartOf'], split: ';' }, | ||
'part' => { from: ['relation.isPartOf'] }, | ||
'date_created' => { from: ['date.other'] }, | ||
'title' => { from: ['title'] }, | ||
'subject' => { from: ['subject'], split: ';' }, | ||
'volume_number' => { from: ['relation.isPartOfVolume'] }, | ||
'keyword' => { from: ['keyword'], split: ';' }, | ||
'location' => { from: ['location'], split: ';' }, | ||
'model' => { from: ['work_type'] }, | ||
'remote_files' => { from: ['related_url'], split: ';', parsed: true }, | ||
'remote_url' => { from: ['official_url', 'remote_url'], split: ';' }, | ||
'thumbnail_url' => { from: ['thumbnail_url'], default_thumbnail: true, parsed: true }, | ||
'video_embed' => { from: ['video_embed'] }, | ||
'refereed' => { from: ['peer_reviewed'] } | ||
} | ||
|
||
config.field_mappings['Bulkrax::CsvParser'].merge!( | ||
'parents' => { from: ['parents'], split: /\s*[;|]\s*/, related_parents_field_mapping: true }, | ||
'children' => { from: ['children'], split: /\s*[;|]\s*/, related_children_field_mapping: true } | ||
) | ||
|
||
# Lambda to set the default field mapping | ||
config.default_field_mapping = lambda do |field| | ||
return if field.blank? | ||
{ | ||
field.to_s => | ||
{ | ||
from: [field.to_s], | ||
split: false, | ||
|
@@ -129,43 +128,43 @@ | |
excluded: false, | ||
default_thumbnail: false | ||
} | ||
} | ||
end | ||
|
||
# WorkType to use as the default if none is specified in the import | ||
# Default is the first returned by Hyrax.config.curation_concerns | ||
# config.default_work_type = MyWork | ||
|
||
# Path to store pending imports | ||
# config.import_path = 'tmp/imports' | ||
|
||
# Path to store exports before download | ||
# config.export_path = 'tmp/exports' | ||
|
||
# Server name for oai request header | ||
# config.server_name = '[email protected]' | ||
|
||
# Field_mapping for establishing a parent-child relationship (FROM parent TO child) | ||
# This can be a Collection to Work, or Work to Work relationship | ||
# This value IS NOT used for OAI, so setting the OAI Entries here will have no effect | ||
# The mapping is supplied per Entry, provide the full class name as a string, eg. 'Bulkrax::CsvEntry' | ||
# Example: | ||
# { | ||
# 'Bulkrax::RdfEntry' => 'http://opaquenamespace.org/ns/contents', | ||
# 'Bulkrax::CsvEntry' => 'children' | ||
# } | ||
# By default no parent-child relationships are added | ||
# config.parent_child_field_mapping = { } | ||
|
||
# Field_mapping for establishing a collection relationship (FROM work TO collection) | ||
# This value IS NOT used for OAI, so setting the OAI parser here will have no effect | ||
# The mapping is supplied per Entry, provide the full class name as a string, eg. 'Bulkrax::CsvEntry' | ||
# The default value for CSV is collection | ||
# Add/replace parsers, for example: | ||
# config.collection_field_mapping['Bulkrax::RdfEntry'] = 'http://opaquenamespace.org/ns/set' | ||
|
||
# Properties that should not be used in imports/exports. They are reserved for use by Hyrax. | ||
# config.reserved_properties += ['my_field'] | ||
} | ||
end | ||
|
||
# WorkType to use as the default if none is specified in the import | ||
# Default is the first returned by Hyrax.config.curation_concerns | ||
# config.default_work_type = MyWork | ||
|
||
# Path to store pending imports | ||
# config.import_path = 'tmp/imports' | ||
|
||
# Path to store exports before download | ||
# config.export_path = 'tmp/exports' | ||
|
||
# Server name for oai request header | ||
# config.server_name = '[email protected]' | ||
|
||
# Field_mapping for establishing a parent-child relationship (FROM parent TO child) | ||
# This can be a Collection to Work, or Work to Work relationship | ||
# This value IS NOT used for OAI, so setting the OAI Entries here will have no effect | ||
# The mapping is supplied per Entry, provide the full class name as a string, eg. 'Bulkrax::CsvEntry' | ||
# Example: | ||
# { | ||
# 'Bulkrax::RdfEntry' => 'http://opaquenamespace.org/ns/contents', | ||
# 'Bulkrax::CsvEntry' => 'children' | ||
# } | ||
# By default no parent-child relationships are added | ||
# config.parent_child_field_mapping = { } | ||
|
||
# Field_mapping for establishing a collection relationship (FROM work TO collection) | ||
# This value IS NOT used for OAI, so setting the OAI parser here will have no effect | ||
# The mapping is supplied per Entry, provide the full class name as a string, eg. 'Bulkrax::CsvEntry' | ||
# The default value for CSV is collection | ||
# Add/replace parsers, for example: | ||
# config.collection_field_mapping['Bulkrax::RdfEntry'] = 'http://opaquenamespace.org/ns/set' | ||
|
||
# Properties that should not be used in imports/exports. They are reserved for use by Hyrax. | ||
# config.reserved_properties += ['my_field'] | ||
|
||
end | ||
end |
Oops, something went wrong.