Skip to content

Commit

Permalink
Extend test for ges_disc
Browse files Browse the repository at this point in the history
  • Loading branch information
ghiggi committed Feb 8, 2024
1 parent 149b8b9 commit 7a7acad
Show file tree
Hide file tree
Showing 5 changed files with 162 additions and 25 deletions.
2 changes: 0 additions & 2 deletions gpm_api/io/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
@author: ghiggi
"""
import datetime
import functools
import os
import subprocess

Expand Down Expand Up @@ -91,7 +90,6 @@ def check_remote_storage(storage):
return storage.lower()


@functools.lru_cache(maxsize=None)
def check_transfer_tool(transfer_tool):
"""Check the transfer tool."""
valid_transfer_tools = ["curl", "wget"]
Expand Down
4 changes: 2 additions & 2 deletions gpm_api/io/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
check_filepaths_integrity,
)
from gpm_api.io.find import find_daily_filepaths
from gpm_api.io.ges_disc import define_gesdisc_filepath
from gpm_api.io.ges_disc import define_ges_disc_filepath
from gpm_api.io.info import get_info_from_filepath
from gpm_api.io.local import define_local_filepath
from gpm_api.io.pps import define_pps_filepath
Expand Down Expand Up @@ -374,7 +374,7 @@ def _get_func_filepath_definition(storage):
dict_fun = {
"local": define_local_filepath,
"pps": define_pps_filepath,
"ges_disc": define_gesdisc_filepath,
"ges_disc": define_ges_disc_filepath,
}
func = dict_fun[storage]
return func
Expand Down
4 changes: 2 additions & 2 deletions gpm_api/io/find.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
check_valid_time_request,
)
from gpm_api.io.filter import filter_filepaths
from gpm_api.io.ges_disc import get_gesdisc_daily_filepaths
from gpm_api.io.ges_disc import get_ges_disc_daily_filepaths
from gpm_api.io.info import get_version_from_filepaths
from gpm_api.io.local import get_local_daily_filepaths
from gpm_api.io.pps import get_pps_daily_filepaths
Expand Down Expand Up @@ -55,7 +55,7 @@ def _get_all_daily_filepaths(storage, date, product, product_type, version, verb
verbose=verbose,
)
elif storage == "ges_disc":
filepaths = get_gesdisc_daily_filepaths(
filepaths = get_ges_disc_daily_filepaths(
product=product,
product_type=product_type,
date=date,
Expand Down
36 changes: 17 additions & 19 deletions gpm_api/io/ges_disc.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,16 @@


def _get_ges_disc_url_content(url):
cmd = f"wget -O - {url}"
# cmd = f"wget -O - {url}"
cmd = f"curl -L {url}"

Check warning on line 21 in gpm_api/io/ges_disc.py

View check run for this annotation

Codecov / codecov/patch

gpm_api/io/ges_disc.py#L21

Added line #L21 was not covered by tests
args = cmd.split()
process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout = process.communicate()[0].decode()
# Check if server is available
if stdout == "":
raise ValueError(f"The requested url {url} was not found on the GES DISC server.")
if "The requested URL was not found on this server" in stdout:
raise ValueError(f"The requested url {url} was not found on the GES DISC server.")

Check warning on line 29 in gpm_api/io/ges_disc.py

View check run for this annotation

Codecov / codecov/patch

gpm_api/io/ges_disc.py#L28-L29

Added lines #L28 - L29 were not covered by tests
return stdout


Expand All @@ -44,24 +47,16 @@ def _get_href_value(input_string):
def _get_ges_disc_list_path(url):
# Retrieve url content
# - If it returns something, means url is correct
wget_output = _get_ges_disc_url_content(url)
output = _get_ges_disc_url_content(url)

Check warning on line 50 in gpm_api/io/ges_disc.py

View check run for this annotation

Codecov / codecov/patch

gpm_api/io/ges_disc.py#L50

Added line #L50 was not covered by tests
# Retrieve content
list_content = [_get_href_value(s) for s in wget_output.split("alt=")[4:]]
list_content = [_get_href_value(s) for s in output.split("alt=")[4:]]

Check warning on line 52 in gpm_api/io/ges_disc.py

View check run for this annotation

Codecov / codecov/patch

gpm_api/io/ges_disc.py#L52

Added line #L52 was not covered by tests
list_content = [s for s in list_content if s != ""]
if len(list_content) == 0:
raise ValueError(f"The GES DISC {url} directory is empty.")
list_path = [f"{url}/{s}" for s in list_content]
return list_path


# # Empty directory
# url = "https://gpm2.gesdisc.eosdis.nasa.gov/data/GPM_L3/GPM_3IMERGHHE.07/"
# url = "https://gpm2.gesdisc.eosdis.nasa.gov/data/GPM_L3/GPM_3IMERGHHE.07"

# # Unexisting directory
# url = "https://gpm2.gesdisc.eosdis.nasa.gov/data/GPM_L3/GPM_3IMERGHHE.07/2020"


####--------------------------------------------------------------------------.
#####################
#### Directories ####
Expand Down Expand Up @@ -159,7 +154,7 @@ def get_ges_disc_product_directory(product, date, version):
############################


def _get_gesdisc_file_list(url_product_dir, product, date, version, verbose=True):
def _get_ges_disc_file_list(url_product_dir, product, date, version, verbose=True):

Check notice on line 157 in gpm_api/io/ges_disc.py

View check run for this annotation

CodeScene Delta Analysis / CodeScene Cloud Delta Analysis (main)

✅ No longer an issue: Excess Number of Function Arguments

_get_gesdisc_file_list is no longer above the threshold for number of arguments. This function has too many arguments, indicating a lack of encapsulation. Avoid adding more arguments.

Check notice on line 157 in gpm_api/io/ges_disc.py

View check run for this annotation

CodeScene Delta Analysis / CodeScene Cloud Delta Analysis (main)

ℹ New issue: Excess Number of Function Arguments

_get_ges_disc_file_list has 5 arguments, threshold = 4. This function has too many arguments, indicating a lack of encapsulation. Avoid adding more arguments.
"""
Retrieve NASA GES DISC filepaths for a specific day and product.
Expand Down Expand Up @@ -194,7 +189,12 @@ def _get_gesdisc_file_list(url_product_dir, product, date, version, verbose=True
return filepaths


def get_gesdisc_daily_filepaths(product, product_type, date, version, verbose=True):
def _check_gesc_disc_product_type(product, product_type):
if product_type == "NRT" and "IMERG" not in product:
raise ValueError("The only available NRT products on GES DISC are IMERG-ER and IMERG-FR")

Check warning on line 194 in gpm_api/io/ges_disc.py

View check run for this annotation

Codecov / codecov/patch

gpm_api/io/ges_disc.py#L194

Added line #L194 was not covered by tests


def get_ges_disc_daily_filepaths(product, product_type, date, version, verbose=True):
"""
Retrieve the NASA GES DISC file paths available at a given date.
Expand All @@ -212,13 +212,12 @@ def get_gesdisc_daily_filepaths(product, product_type, date, version, verbose=Tr
Whether to specify when data are not available for a specific date.
The default is True.
"""
if product_type == "NRT" and "IMERG" not in product:
raise ValueError("The only available NRT products on GES DISC are IMERG-ER and IMERG-FR")
_check_gesc_disc_product_type(product=product, product_type=product_type)
# Retrieve server urls of NASA GES DISC
url_product_dir = get_ges_disc_product_directory(product=product, date=date, version=version)
# Retrieve GES DISC filepaths
# - If empty: return []
filepaths = _get_gesdisc_file_list(
filepaths = _get_ges_disc_file_list(

Check notice on line 220 in gpm_api/io/ges_disc.py

View check run for this annotation

CodeScene Delta Analysis / CodeScene Cloud Delta Analysis (main)

✅ No longer an issue: Excess Number of Function Arguments

get_gesdisc_daily_filepaths is no longer above the threshold for number of arguments. This function has too many arguments, indicating a lack of encapsulation. Avoid adding more arguments.

Check notice on line 220 in gpm_api/io/ges_disc.py

View check run for this annotation

CodeScene Delta Analysis / CodeScene Cloud Delta Analysis (main)

ℹ New issue: Excess Number of Function Arguments

get_ges_disc_daily_filepaths has 5 arguments, threshold = 4. This function has too many arguments, indicating a lack of encapsulation. Avoid adding more arguments.
url_product_dir=url_product_dir,
product=product,
date=date,
Expand All @@ -228,7 +227,7 @@ def get_gesdisc_daily_filepaths(product, product_type, date, version, verbose=Tr
return filepaths


def define_gesdisc_filepath(product, product_type, date, version, filename):
def define_ges_disc_filepath(product, product_type, date, version, filename):
"""Define GES DISC filepath from filename.
Parameters
Expand All @@ -244,8 +243,7 @@ def define_gesdisc_filepath(product, product_type, date, version, filename):
filename : str
Name of the GPM file.
"""
if product_type == "NRT" and "IMERG" not in product:
raise ValueError("The only available NRT products on GES DISC are IMERG-ER and IMERG-FR")
_check_gesc_disc_product_type(product=product, product_type=product_type)

Check notice on line 246 in gpm_api/io/ges_disc.py

View check run for this annotation

CodeScene Delta Analysis / CodeScene Cloud Delta Analysis (main)

✅ No longer an issue: Excess Number of Function Arguments

define_gesdisc_filepath is no longer above the threshold for number of arguments. This function has too many arguments, indicating a lack of encapsulation. Avoid adding more arguments.

Check notice on line 246 in gpm_api/io/ges_disc.py

View check run for this annotation

CodeScene Delta Analysis / CodeScene Cloud Delta Analysis (main)

ℹ New issue: Excess Number of Function Arguments

define_ges_disc_filepath has 5 arguments, threshold = 4. This function has too many arguments, indicating a lack of encapsulation. Avoid adding more arguments.
# Retrieve product directory url
url_product_dir = get_ges_disc_product_directory(product=product, date=date, version=version)
# Define GES DISC filepath
Expand Down
141 changes: 141 additions & 0 deletions gpm_api/tests/test_io/test_ges_disc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Feb 8 18:03:15 2024
@author: ghiggi
"""
import pytest
import datetime
from pytest_mock.plugin import MockerFixture
from gpm_api.io import ges_disc


def _test_get_ges_disc_list_path():
"""Test _get_ges_disc_list_path"""
# Empty directory
url = "https://gpm2.gesdisc.eosdis.nasa.gov/data/GPM_L3/GPM_3IMERGHHE.07/"
with pytest.raises(ValueError) as excinfo:
list_path = ges_disc._get_ges_disc_list_path(url)
assert "directory is empty" in str(excinfo.value)

# Year directory
url = "https://gpm2.gesdisc.eosdis.nasa.gov/data/GPM_L2/GPM_2ADPR.07/"
list_path = ges_disc._get_ges_disc_list_path(url)
assert len(list_path) > 0

# File directory
url = "https://gpm2.gesdisc.eosdis.nasa.gov/data/GPM_L2/GPM_2ADPR.07/2019/006/"
list_path = ges_disc._get_ges_disc_list_path(url)
assert len(list_path) > 0

# Wrong URL
url = "BAD_URL"
with pytest.raises(ValueError) as excinfo:
list_path = ges_disc._get_ges_disc_list_path(url)
assert f"The requested url {url} was not found on the GES DISC server." == str(excinfo.value)

# Unexisting directory
url = "https://gpm2.gesdisc.eosdis.nasa.gov/data/GPM_L3/GPM_3IMERGHHE.07/20020"
with pytest.raises(ValueError) as excinfo:
list_path = ges_disc._get_ges_disc_list_path(url)
assert f"The requested url {url} was not found on the GES DISC server." == str(excinfo.value)


def _test_define_ges_disc_filepath():
# Test request of NRT but not IMERG
with pytest.raises(ValueError):
ges_disc.define_ges_disc_filepath(
product="2A-DPR",
product_type="NRT",
date="DUMMY",
version=1,
filename="BIDDIBIBBODIBIBU",
)
# Test for valid name
expected_url = "https://gpm2.gesdisc.eosdis.nasa.gov/data/GPM_L2/GPM_2ADPR.07/2019/006/2A.GPM.DPR.V9-20211125.20190106-S020627-E033859.027589.V07A.HDF5"
filename = "2A.GPM.DPR.V9-20211125.20190106-S020627-E033859.027589.V07A.HDF5"
version = 7
product_type = "RS"
date = datetime.date(2019, 1, 6)
url = ges_disc.define_ges_disc_filepath(
product="2A-DPR",
product_type=product_type,
date=date,
version=version,
filename=filename,
)
assert url == expected_url


class TestGESDISCFileList:
def test_success(self, mocker: MockerFixture):
url = "http://example.com/products/"
date = datetime.date(2020, 7, 5)
product = "2A-DPR"
version = "07"

expected_filepaths = ["https://example.com/file1.hdf5", "https://example.com/file2.hdf5"]

mocker.patch.object(
ges_disc, "_get_ges_disc_list_path", autospec=True, return_value=expected_filepaths
)

filepaths = ges_disc._get_ges_disc_file_list(url, product, date, version, verbose=True)
assert filepaths == expected_filepaths, "File paths do not match expected output"

def test_not_found_error(self, mocker: MockerFixture):
url = "http://wrong.url/"
date = datetime.date(2020, 7, 5)
product = "2A-DPR"
version = "07"

mocker.patch.object(
ges_disc,
"_get_ges_disc_list_path",
side_effect=Exception("was not found on the GES DISC server"),
)

with pytest.raises(Exception) as excinfo:
ges_disc._get_ges_disc_file_list(url, product, date, version)
assert "was not found on the GES DISC server" in str(
excinfo.value
), "Expected exception not raised for server not found"

def test_no_data_verbose(self, mocker: MockerFixture, capsys):
url = "http://example.com/products/"
date = datetime.date(2020, 7, 5)
product = "2A-DPR"
version = "07"

mocker.patch.object(
ges_disc,
"_get_ges_disc_list_path",
autospec=True,
side_effect=Exception("some other error"),
)

filepaths = ges_disc._get_ges_disc_file_list(url, product, date, version, verbose=True)
assert filepaths == [], "Expected empty list for no data found"

captured = capsys.readouterr()
assert "No data found on GES DISC" in captured.out, "Expected verbose message not printed"

def test_no_data_not_verbose(self, mocker: MockerFixture, capsys):
url = "http://example.com/products/"
date = datetime.date(2020, 7, 5)
product = "2A-DPR"
version = "07"

mocker.patch.object(
ges_disc,
"_get_ges_disc_list_path",
autospec=True,
side_effect=Exception("some other error"),
)

filepaths = ges_disc._get_ges_disc_file_list(url, product, date, version, verbose=False)
assert filepaths == [], "Expected empty list for no data found"

captured = capsys.readouterr()
assert captured.out == "", "No output expected when verbose is False"

0 comments on commit 7a7acad

Please sign in to comment.