Skip to content

Commit

Permalink
feat: dataverse export (#909)
Browse files Browse the repository at this point in the history
* fix: use correct file name when importing

* feat: export to dataverse

* refactor: use Renku exceptions for Zenodo
  • Loading branch information
m-alisafaee authored Jan 28, 2020
1 parent 3879124 commit 7e9e647
Show file tree
Hide file tree
Showing 12 changed files with 505 additions and 58 deletions.
11 changes: 11 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,6 +363,17 @@ def zenodo_sandbox(client):
)


@pytest.fixture
def dataverse_demo(client):
"""Configure environment to use Dataverse demo environment."""
client.set_value(
'dataverse', 'access_token', '4ca13597-cf43-4815-8763-b64994058c19'
)
client.set_value('dataverse', 'server_url', 'https://demo.dataverse.org')
client.repo.git.add('.renku/renku.ini')
client.repo.index.commit('renku.ini')


@pytest.fixture
def doi_responses():
"""Responses for doi.org requests."""
Expand Down
27 changes: 20 additions & 7 deletions renku/cli/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,11 @@
exported. The remote version will be set to the local tag that is being
exported.
To export to a Dataverse provider you must pass Dataverse server's URL and
the name of the parent dataverse where the dataset will be exported to.
Server's URL is stored in your Renku setting and you don't need to pass it
every time.
Listing all files in the project associated with a dataset.
Expand Down Expand Up @@ -591,7 +596,7 @@ def ls_tags(short_name, format):


@dataset.command('export')
@click.argument('id')
@click.argument('short_name')
@click.argument('provider')
@click.option(
'-p',
Expand All @@ -600,16 +605,24 @@ def ls_tags(short_name, format):
help='Automatically publish exported dataset.'
)
@click.option('-t', '--tag', help='Dataset tag to export')
def export_(id, provider, publish, tag):
@click.option('--dataverse-server', default=None, help='Dataverse server URL.')
@click.option(
'--dataverse-name', default=None, help='Dataverse name to export to.'
)
def export_(
short_name, provider, publish, tag, dataverse_server, dataverse_name
):
"""Export data to 3rd party provider."""
try:
output = export_dataset(
id,
provider,
publish,
tag,
short_name=short_name,
provider=provider,
publish=publish,
tag=tag,
handle_access_token_fn=prompt_access_token,
handle_tag_selection_fn=prompt_tag_selection
handle_tag_selection_fn=prompt_tag_selection,
dataverse_server_url=dataverse_server,
dataverse_name=dataverse_name,
)
except (
ValueError, InvalidAccessToken, DatasetNotFound, requests.HTTPError
Expand Down
52 changes: 40 additions & 12 deletions renku/core/commands/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
import click
import git
import yaml
from requests import HTTPError

from renku.core import errors
from renku.core.commands.checks.migration import check_dataset_resources, \
dataset_pre_0_3
from renku.core.commands.format.dataset_tags import DATASET_TAGS_FORMATS
Expand Down Expand Up @@ -172,6 +172,7 @@ def add_to_dataset(
commit_message=None,
extract=False,
all_at_once=False,
destination_names=None,
progress=None,
):
"""Add data to a dataset."""
Expand All @@ -197,6 +198,7 @@ def add_to_dataset(
ref=ref,
extract=extract,
all_at_once=all_at_once,
destination_names=destination_names,
progress=progress,
)

Expand Down Expand Up @@ -341,13 +343,15 @@ def dataset_remove(
)
def export_dataset(
client,
id,
short_name,
provider,
publish,
tag,
handle_access_token_fn=None,
handle_tag_selection_fn=None,
commit_message=None,
dataverse_server_url=None,
dataverse_name=None,
):
"""Export data to 3rd party provider.
Expand All @@ -356,9 +360,9 @@ def export_dataset(
"""
# TODO: all these callbacks are ugly, improve in #737
config_key_secret = 'access_token'
provider_id = provider
provider_id = provider.lower()

dataset_ = client.load_dataset(id)
dataset_ = client.load_dataset(short_name)
if not dataset_:
raise DatasetNotFound()

Expand All @@ -385,7 +389,7 @@ def export_dataset(
selected_commit = tag_result.commit

with client.with_commit(selected_commit):
dataset_ = client.load_dataset(id)
dataset_ = client.load_dataset(short_name)
if not dataset_:
raise DatasetNotFound()

Expand All @@ -405,14 +409,35 @@ def export_dataset(
)
exporter.set_access_token(access_token)

try:
destination = exporter.export(publish, selected_tag)
except HTTPError as e:
if 'unauthorized' in str(e):
client.remove_value(
provider_id, config_key_secret, global_only=True
if provider_id == 'dataverse':
if not dataverse_name:
raise errors.ParameterError('Dataverse name is required.')

CONFIG_BASE_URL = 'server_url'

if not dataverse_server_url:
dataverse_server_url = client.get_value(
provider_id, CONFIG_BASE_URL
)
else:
client.set_value(
provider_id,
CONFIG_BASE_URL,
dataverse_server_url,
global_only=True
)

try:
destination = exporter.export(
publish=publish,
tag=selected_tag,
server_url=dataverse_server_url,
dataverse_name=dataverse_name
)
except errors.AuthenticationError:
client.remove_value(
provider_id, config_key_secret, global_only=True
)
raise

result = 'Exported to: {0}'.format(destination)
Expand Down Expand Up @@ -484,15 +509,18 @@ def import_dataset(

dataset.url = remove_credentials(dataset.url)

urls, names = zip(*[(f.url, f.filename) for f in files])

add_to_dataset(
client,
urls=[f.url for f in files],
urls=urls,
short_name=short_name,
create=True,
with_metadata=dataset,
force=True,
extract=extract,
all_at_once=True,
destination_names=names,
progress=progress,
)

Expand Down
2 changes: 1 addition & 1 deletion renku/core/commands/providers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,4 +71,4 @@ def from_uri(uri):
@staticmethod
def from_id(provider_id):
"""Get provider type based on identifier."""
return ProviderFactory.PROVIDERS[provider_id]()
return ProviderFactory.PROVIDERS[provider_id.lower()]()
4 changes: 2 additions & 2 deletions renku/core/commands/providers/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def find_record(self, uri):
pass

@abc.abstractmethod
def get_exporter(self, dataset, secret):
def get_exporter(self, dataset, access_token):
"""Get export manager."""
pass

Expand All @@ -51,6 +51,6 @@ def access_token_url(self):
pass

@abc.abstractmethod
def export(self, publish):
def export(self, publish, **kwargs):
"""Execute export process."""
pass
Loading

0 comments on commit 7e9e647

Please sign in to comment.