-
Notifications
You must be signed in to change notification settings - Fork 0
Seed data migration
config/initializers/hyrax_migrator.rb
config.skip_field_mode = true
config.migration_user = '[email protected]'
config.upload_storage_service = :file_system
config.ingest_storage_service = :file_system
Default admin set and collection
bundle exec rails hyrax:default_admin_set:create
bundle exec rails hyrax:default_collection_types:create
Default OD2 admin sets and collections
bundle exec rake oregon_digital:create_admin_sets
bundle exec rake oregon_digital:create_collections
Load workflows
bundle exec rails hyrax:workflow:load
-
scp bagsclean.tar.gz from OD1 server to a local folder
/data/tmp/
and untar, which would create bagsclean with subfolders for each seed data collections. -
Make a zip version of a single bag
/data/tmp/bagsclean/Baseball_jpegs/3t945r08v/
from within the folder, and move it to/data/tmp/batch_baseball/3t945r08v.zip
. -
Load the console in the server container and run the following to migrate bag located at
/data/tmp/batch_baseball/3t945r08v.zip
:
$ bundle exec rails c
> Hyrax::Migrator.config.skip_field_mode = true
> Hyrax::Migrator.config.upload_storage_service = :file_system
> Hyrax::Migrator.config.ingest_storage_service = :file_system
> pid = "3t945r08v"
> file_path = "/data/tmp/batch_baseball/3t945r08v.zip"
> w = Hyrax::Migrator::Work.find_by_pid(pid)
> w = Hyrax::Migrator::Work.create(pid: pid, file_path: file_path) if w.blank?
> m = Hyrax::Migrator::Middleware.default
> m.start(w)
To allow a restart, existing records would need to be removed for now (restart for existing records hasn't been implemented yet):
$ bundle exec rails c
> pid = "3t945r08v"
> gid = ActiveFedora::Base.find(pid).to_global_id.to_s if ActiveFedora::Base.exists?(pid)
> Sipity::Entity.find_by(proxy_for_global_id: gid).delete if Sipity::Entity.find_by(proxy_for_global_id: gid).present?
> ActiveFedora::Base.find(pid).delete if ActiveFedora::Base.exists?(pid)
> ActiveFedora::Base.eradicate(pid)
> Hyrax::Migrator::Work.find_by_pid(pid).delete if Hyrax::Migrator::Work.find_by_pid(pid).present?
-
scp
bagsclean.tar.gz
from OD1 server to a local folder like/data/tmp/
and untar, which would createbagsclean
with subfolders for each seed data collections. -
cd into
Baseball_jpegs
folder/data/tmp/bagsclean/Baseball_jpegs/
and executezip_bags.sh
.
/data/tmp/bagsclean/Baseball_jpegs/zip_bags.sh
#!/bin/bash
# Usage: cd into the collection folder 'Baseball_jpegs' and run bash zip_bags.sh
# Baseball jpegs (20) collection
ARRAY=('3t945r08v'
'4t64gn50h'
'5138jf19t'
'5t34sj883'
'6t053g24j'
'8c97kq715'
'cc08hf952'
'fb494874x'
'js956g08w'
'k0698775b'
'ms35t889d'
'pg15bf278'
'rn3011720'
'sq87bt983'
'sx61dm57b'
'vq27zn81f'
'wh246s39p'
'wh246s438'
'xp68kg588'
'zs25x8763')
# get number of elements in the array
ELEMENTS=${#ARRAY[@]}
# zip from within the directory of each PID
for (( i=0;i<$ELEMENTS;i++)); do
echo ${ARRAY[${i}]}
cd ${ARRAY[${i}]}
zip -r ../${ARRAY[${i}]}.zip .
cd ..
done
- Make batch folder
batch_baseball
and get the zip files generated withzip_bags.sh
mkdir /data/tmp/batch_baseball
rsync -r -v /data/tmp/bagsclean/Baseball_jpegs/*.zip /data/tmp/batch_baseball/
- Get into the workers container and run
BagIngestService
for batchbatch_baseball
$ bundle exec rails c
> c = Hyrax::Migrator::Configuration.new
> c.upload_storage_service = :file_system
> c.ingest_storage_service = :file_system
> c.file_system_path = "/data/tmp"
> c.ingest_local_path = "/data/tmp"
> i = Hyrax::Migrator::Services::BagIngestService.new(['batch_baseball'], c)
> i.ingest
- Inspect the logs in the workers container
tail -f log/development.log
Add to OD2 lib/tasks/migration/bulk_delete.rake
and set batch_name
to a folder name inside the path defined at config.ingest_local_path
(defaults to /data/tmp
in config/initializers/hyrax_migrator.rb
) in OD2.
Example
When batch_name
is batch_baseball
, the task bellow will scan bags at /data/tmp/batch_baseball
to get the PIDs, and then remove them from the system.
/data/lib/tasks/migration/bulk_delete.rake
# frozen_string_literal: true
namespace :migration do
desc 'Bulk delete migrated items'
task bulk_delete: :environment do
batch_name = 'batch_baseball'
begin
file_name = "bulk-delete-migrated-#{Date.today}.log"
logger = Logger.new(File.join(Rails.root, 'log', file_name))
puts "Delete works already migrated at #{batch_name}"
c = Hyrax::Migrator::Configuration.new
batch_path = File.join(c.ingest_local_path, batch_name)
bag_zip_files = Dir.entries(batch_path).select { |e| File.file?(File.join(batch_path, e)) && File.extname(e) == '.zip' }
bag_zip_files.each do |zip_file|
pid = File.basename(zip_file, File.extname(zip_file))
puts "Deleting work #{pid}"
gid = ActiveFedora::Base.find(pid).to_global_id.to_s if ActiveFedora::Base.exists?(pid)
Sipity::Entity.find_by(proxy_for_global_id: gid).delete if Sipity::Entity.find_by(proxy_for_global_id: gid).present?
ActiveFedora::Base.find(pid).delete if ActiveFedora::Base.exists?(pid)
ActiveFedora::Base.eradicate(pid)
Hyrax::Migrator::Work.find_by_pid(pid).delete if Hyrax::Migrator::Work.find_by_pid(pid).present?
logger.info("Successfully deleted #{pid}")
end
puts "Done"
rescue StandardError => e
puts "Unable to delete #{pid}"
puts "Error: #{e.message}"
puts e.backtrace
logger.error "Unable to delete #{pid}: #{e.message}: #{e.backtrace}"
return nil
end
end
end
Run task in the workers container
bundle exec rake migration:bulk_delete