Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix data access and redirects #182

Merged
merged 3 commits into from
Apr 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions src/pyDataverse/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def get_request(self, url, params=None, auth=False):

try:
url = urljoin(self.base_url_api, url)
resp = httpx.get(url, params=params)
resp = httpx.get(url, params=params, follow_redirects=True)
if resp.status_code == 401:
error_msg = resp.json()["message"]
raise ApiAuthorizationError(
Expand Down Expand Up @@ -175,7 +175,7 @@ def post_request(self, url, data=None, auth=False, params=None, files=None):
params["key"] = self.api_token

try:
resp = httpx.post(url, data=data, params=params, files=files)
resp = httpx.post(url, data=data, params=params, files=files, follow_redirects=True)
if resp.status_code == 401:
error_msg = resp.json()["message"]
raise ApiAuthorizationError(
Expand Down Expand Up @@ -216,7 +216,7 @@ def put_request(self, url, data=None, auth=False, params=None):
params["key"] = self.api_token

try:
resp = httpx.put(url, data=data, params=params)
resp = httpx.put(url, data=data, params=params, follow_redirects=True)
if resp.status_code == 401:
error_msg = resp.json()["message"]
raise ApiAuthorizationError(
Expand Down Expand Up @@ -255,7 +255,7 @@ def delete_request(self, url, auth=False, params=None):
params["key"] = self.api_token

try:
return httpx.delete(url, params=params)
return httpx.delete(url, params=params, follow_redirects=True)
except ConnectError:
raise ConnectError(
"ERROR: DELETE could not establish connection to api {}.".format(url)
Expand Down Expand Up @@ -338,13 +338,13 @@ def get_datafile(
"""
is_first_param = True
if is_pid:
url = "{0}/datafile/{1}".format(self.base_url_api_data_access, identifier)
if data_format or no_var_header or image_thumb:
url += "?"
else:
url = "{0}/datafile/:persistentId/?persistentId={1}".format(
self.base_url_api_data_access, identifier
)
else:
url = "{0}/datafile/{1}".format(self.base_url_api_data_access, identifier)
if data_format or no_var_header or image_thumb:
url += "?"
if data_format:
url += "format={0}".format(data_format)
is_first_param = False
Expand Down
146 changes: 146 additions & 0 deletions tests/api/test_access.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
import os
import json
import httpx

from pyDataverse.api import DataAccessApi, NativeApi

class TestDataAccess:

def test_get_data_by_id(self):
"""Tests getting data file by id."""

# Arrange
BASE_URL = os.getenv("BASE_URL").rstrip("/")
API_TOKEN = os.getenv("API_TOKEN")

assert BASE_URL is not None, "BASE_URL is not set"
assert API_TOKEN is not None, "API_TOKEN is not set"

# Create dataset
metadata = json.load(open("tests/data/file_upload_ds_minimum.json"))
pid = self._create_dataset(BASE_URL, API_TOKEN, metadata)
api = DataAccessApi(BASE_URL, API_TOKEN)

# Upload a file
self._upload_datafile(BASE_URL, API_TOKEN, pid)

# Retrieve the file ID
file_id = self._get_file_id(BASE_URL, API_TOKEN, pid)

# Act
response = api.get_datafile(file_id, is_pid=False)
response.raise_for_status()
content = response.content.decode("utf-8")

# Assert
expected = open("tests/data/datafile.txt").read()
assert content == expected, "Data retrieval failed."

def test_get_data_by_pid(self):
"""Tests getting data file by id.

Test runs with a PID instead of a file ID from Harvard.
No PID given if used within local containers

TODO - Check if possible with containers
"""

# Arrange
BASE_URL = "https://dataverse.harvard.edu"
pid = "doi:10.7910/DVN/26093/IGA4JD"
api = DataAccessApi(BASE_URL)

# Act
response = api.get_datafile(pid, is_pid=True)
response.raise_for_status()
content = response.content

# Assert
expected = self._get_file_content(BASE_URL, pid)
assert content == expected, "Data retrieval failed."

@staticmethod
def _create_dataset(
BASE_URL: str,
API_TOKEN: str,
metadata: dict,
):
"""
Create a dataset in the Dataverse.

Args:
BASE_URL (str): The base URL of the Dataverse instance.
API_TOKEN (str): The API token for authentication.
metadata (dict): The metadata for the dataset.

Returns:
str: The persistent identifier (PID) of the created dataset.
"""
url = f"{BASE_URL}/api/dataverses/root/datasets"
response = httpx.post(
url=url,
json=metadata,
headers={
"X-Dataverse-key": API_TOKEN,
"Content-Type": "application/json",
},
)

response.raise_for_status()

return response.json()["data"]["persistentId"]

@staticmethod
def _get_file_id(
BASE_URL: str,
API_TOKEN: str,
pid: str,
):
"""Retrieves a file ID for a given persistent identifier (PID) in Dataverse."""

response = httpx.get(
url=f"{BASE_URL}/api/datasets/:persistentId/?persistentId={pid}",
headers={
"X-Dataverse-key": API_TOKEN,
"Content-Type": "application/json",
}
)

response.raise_for_status()

return response.json()["data"]["latestVersion"]["files"][0]["dataFile"]["id"]

@staticmethod
def _upload_datafile(
BASE_URL: str,
API_TOKEN: str,
pid: str,
):
"""Uploads a file to Dataverse"""

url = f"{BASE_URL}/api/datasets/:persistentId/add?persistentId={pid}"
response = httpx.post(
url=url,
files={"file": open("tests/data/datafile.txt", "rb")},
headers={
"X-Dataverse-key": API_TOKEN,
},
)

response.raise_for_status()

@staticmethod
def _get_file_content(
BASE_URL: str,
pid: str,
):
"""Retrieves the file content for testing purposes."""

response = httpx.get(
url=f"{BASE_URL}/api/access/datafile/:persistentId/?persistentId={pid}",
follow_redirects=True,
)

response.raise_for_status()

return response.content
Loading