Skip to content

Commit

Permalink
add debugging
Browse files Browse the repository at this point in the history
  • Loading branch information
pdurbin committed Dec 12, 2024
1 parent 3977884 commit 1e0d726
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 0 deletions.
13 changes: 13 additions & 0 deletions repo2docker/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,18 @@ def fetch(self, url, ref, checkout_path):
picked_content_provider = None
for ContentProvider in self.content_providers:
cp = ContentProvider()
self.log.info(f"DEBUG: content provider is {cp.__class__.__name__}...")
if "Local" in {cp.__class__.__name__}:
print("skipping")
continue
if "Zenodo" in {cp.__class__.__name__}:
print("skipping")
continue
if "Figshare" in {cp.__class__.__name__}:
print("skipping")
continue
print("")
self.log.info(f"DEBUG: About to detect. URL is {url}. Trying content provider {cp.__class__.__name__}\n")
spec = cp.detect(url, ref=ref)
if spec is not None:
picked_content_provider = cp
Expand All @@ -504,6 +516,7 @@ def fetch(self, url, ref, checkout_path):
if swh_token and isinstance(picked_content_provider, contentproviders.Swhid):
picked_content_provider.set_auth_token(swh_token)

print("about to fetch...")
for log_line in picked_content_provider.fetch(
spec, checkout_path, yield_output=self.json_logs
):
Expand Down
13 changes: 13 additions & 0 deletions repo2docker/contentproviders/dataverse.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,13 @@ def detect(self, doi, ref=None, extra_args=None):
- doi:10.7910/DVN/6ZXAGT/3YRRYJ
"""
self.log.debug(f"DEBUG In Dataverse detect(). DOI is {doi}\n")
url = self.doi2url(doi)
# Parse the url, to get the base for later API calls
parsed_url = urlparse(url)
self.log.info(f"DEBUG: ")
self.log.info(f"DEBUG: parsed_url: {parsed_url}")
# exit(1)

# Check if the url matches any known Dataverse installation, bail if not.
host = next(
Expand All @@ -53,9 +57,11 @@ def detect(self, doi, ref=None, extra_args=None):
if host is None:
return

# exit(1)
query_args = parse_qs(parsed_url.query)
# Corner case handling
if parsed_url.path.startswith("/file.xhtml"):
print("got here 1")
# There's no way of getting file information using its persistentId, the only thing we can do is assume that doi
# is structured as "doi:<dataset_doi>/<file_doi>" and try to handle dataset that way.
new_doi = doi.rsplit("/", 1)[0]
Expand All @@ -64,6 +70,7 @@ def detect(self, doi, ref=None, extra_args=None):
return
return self.detect(new_doi)
elif parsed_url.path.startswith("/api/access/datafile"):
print("got here 2")
# Raw url pointing to a datafile is a typical output from an External Tool integration
entity_id = os.path.basename(parsed_url.path)
search_query = "q=entityId:" + entity_id + "&type=file"
Expand All @@ -84,7 +91,12 @@ def detect(self, doi, ref=None, extra_args=None):
parsed_url.path.startswith("/dataset.xhtml")
and "persistentId" in query_args
):
print()
print("got here 3, for example from this: jupyter-repo2docker doi:10.7910/DVN/TJCLKP")
self.record_id = deep_get(query_args, "persistentId.0")
print("self.record_id BEGIN")
print(self.record_id)
print("self.record_id END")

if hasattr(self, "record_id"):
return {"record": self.record_id, "host": host}
Expand All @@ -96,6 +108,7 @@ def fetch(self, spec, output_dir, yield_output=False):

yield f"Fetching Dataverse record {record_id}.\n"
url = f'{host["url"]}/api/datasets/:persistentId?persistentId={record_id}'
print(f'Downloading from {url}')

resp = self.urlopen(url, headers={"accept": "application/json"})
record = resp.json()["data"]
Expand Down
2 changes: 2 additions & 0 deletions repo2docker/contentproviders/doi.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def doi2url(self, doi):
doi = normalize_doi(doi)

try:
self.log.info(f"DEBUG: About to request DOI: {doi}\n")
resp = self._request(f"https://doi.org/{doi}")
resp.raise_for_status()
except HTTPError as e:
Expand All @@ -60,6 +61,7 @@ def doi2url(self, doi):
# default Git provider as this leads to a misleading error.
logging.error(f"DOI {doi} does not resolve: {e}")
raise
self.log.info(f"DEBUG: returning URL {resp.url}\n")
return resp.url
else:
# Just return what is actulally just a URL
Expand Down
5 changes: 5 additions & 0 deletions repo2docker/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,10 @@ def deep_get(dikt, path):
deep_get(data, "data.files.0") == data["data"]["files"][0]
"""
print("dikt BEGIN")
print(dikt)
print("dikt END")
print("path: " + path)
value = dikt
for component in path.split("."):
if component.isdigit():
Expand Down Expand Up @@ -471,6 +475,7 @@ def normalize_doi(val):
"""Return just the DOI (e.g. 10.1234/jshd123)
from a val that could include a url or doi
(e.g. https://doi.org/10.1234/jshd123)"""
print("calling normalize_doi()...")
m = doi_regexp.match(val)
return m.group(2)

Expand Down

0 comments on commit 1e0d726

Please sign in to comment.