diff --git a/gpm_api/io/checks.py b/gpm_api/io/checks.py index 31c1b17e..d4704145 100644 --- a/gpm_api/io/checks.py +++ b/gpm_api/io/checks.py @@ -32,7 +32,7 @@ def check_base_dir(base_dir): Base directory where the GPM directory is located. """ # Check base_dir does not end with / - if base_dir[-1] == "/": + if base_dir[-1] == os.path.sep: base_dir = base_dir[0:-1] # Retrieve last folder name dir_name = os.path.basename(base_dir) @@ -133,7 +133,7 @@ def check_product_validity(product, product_type=None): def check_time(time): """Check time validity. - It returns a datetime.datetime object. + It returns a datetime.datetime object to seconds precision. Parameters ---------- @@ -145,7 +145,7 @@ def check_time(time): Returns ------- time : datetime.datetime - datetime.datetime object. + datetime.datetime object """ if not isinstance(time, (datetime.datetime, datetime.date, np.datetime64, np.ndarray, str)): @@ -174,15 +174,25 @@ def check_time(time): time = datetime.datetime.fromisoformat(time) except ValueError: raise ValueError("The time string must have format 'YYYY-MM-DD hh:mm:ss'") + + # If datetime object carries timezone that is not UTC, raise error + if time.tzinfo is not None: + if str(time.tzinfo) != "UTC": + raise ValueError("The datetime object must be in UTC timezone if timezone is given.") + else: + # If UTC, strip timezone information + time = time.replace(tzinfo=None) + return time def check_date(date): - if not isinstance(date, (datetime.date, datetime.datetime)): - raise ValueError("date must be a datetime object") - if isinstance(date, datetime.datetime): - date = date.date() - return date + if date is None: + raise ValueError("date cannot be None") + + # Use check_time to convert to datetime.datetime + datetime_obj = check_time(date) + return datetime_obj.date() def check_start_end_time(start_time, end_time): @@ -225,25 +235,3 @@ def check_scan_mode(scan_mode, product, version): # -------------------------------------------------------------------------. return scan_mode - - -def check_bbox(bbox): - """ - Check correctness of bounding box. - bbox format: [lon_0, lon_1, lat_0, lat_1] - bbox should be provided with longitude between -180 and 180, and latitude - between -90 and 90. - """ - if bbox is None: - return bbox - # If bbox provided - if not (isinstance(bbox, list) and len(bbox) == 4): - raise ValueError("Provide valid bbox [lon_0, lon_1, lat_0, lat_1]") - if bbox[2] > 90 or bbox[2] < -90 or bbox[3] > 90 or bbox[3] < -90: - raise ValueError("Latitude is defined between -90 and 90") - # Try to be sure that longitude is specified between -180 and 180 - if bbox[0] > 180 or bbox[1] > 180: - print("bbox should be provided with longitude between -180 and 180") - bbox[0] = bbox[0] - 180 - bbox[1] = bbox[1] - 180 - return bbox diff --git a/gpm_api/io/directories.py b/gpm_api/io/directories.py index 5410cdbe..38bc81e5 100644 --- a/gpm_api/io/directories.py +++ b/gpm_api/io/directories.py @@ -44,8 +44,6 @@ def get_disk_dir_pattern(product, product_type, version): GPM product name. See: gpm_api.available_products() product_type : str, optional GPM product type. Either 'RS' (Research) or 'NRT' (Near-Real-Time). - date : datetime.date - Single date for which to retrieve the data. version : int, optional GPM version of the data to retrieve if product_type = 'RS'. diff --git a/gpm_api/io/download.py b/gpm_api/io/download.py index bb40b62d..4e91f3fd 100644 --- a/gpm_api/io/download.py +++ b/gpm_api/io/download.py @@ -71,7 +71,7 @@ def curl_cmd(server_path, disk_path, username, password): # -------------------------------------------------------------------------. # Replace ftps with ftp to make curl work !!! # - curl expects ftp:// and not ftps:// - server_path = server_path.replace("ftps", "ftp", 1) + server_path = server_path.replace("ftps://", "ftp://", 1) # -------------------------------------------------------------------------. ## Define command to run # Base command: curl -4 --ftp-ssl --user [user name]:[password] -n [url] @@ -258,7 +258,6 @@ def _download_files( progress_bar=True, verbose=False, ): - if transfer_tool == "curl": list_cmd = [ curl_cmd(src_path, dst_path, username, username) @@ -909,6 +908,14 @@ def _check_download_status(status, product, verbose): def flatten_list(nested_list): """Flatten a nested list into a single-level list.""" + + # If list is already flat, return as is to avoid flattening to chars + if ( + isinstance(nested_list, list) + and len(nested_list) == 1 + and not isinstance(nested_list[0], list) + ): + return nested_list return ( [item for sublist in nested_list for item in sublist] if isinstance(nested_list, list) diff --git a/gpm_api/io/filter.py b/gpm_api/io/filter.py index d9f20fc6..e8a7125d 100644 --- a/gpm_api/io/filter.py +++ b/gpm_api/io/filter.py @@ -234,11 +234,10 @@ def filter_by_time(filepaths, start_time=None, end_time=None): ---------- filepaths : list List of filepaths. - start_time : datetime.datetime - Start time + Start time. Will be set to GPM start mission time (1998-01-01) if None. end_time : datetime.datetime - End time. + End time. Will be set to current time (`datetime.datetime.utcnow()`) if None. Returns ---------- @@ -260,7 +259,7 @@ def filter_by_time(filepaths, start_time=None, end_time=None): if start_time is None: start_time = datetime.datetime(1998, 1, 1, 0, 0, 0) # GPM start mission if end_time is None: - end_time = datetime.datetime.now() # Current time + end_time = datetime.datetime.utcnow() # Current time start_time, end_time = check_start_end_time(start_time, end_time) # -------------------------------------------------------------------------. diff --git a/gpm_api/tests/__init__.py b/gpm_api/tests/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/gpm_api/tests/conftest.py b/gpm_api/tests/conftest.py new file mode 100644 index 00000000..8ac8b0c5 --- /dev/null +++ b/gpm_api/tests/conftest.py @@ -0,0 +1,302 @@ +import pytest +import datetime +from typing import Any, List, Dict, Tuple +from gpm_api.io.products import get_info_dict, available_products +import posixpath as pxp +import ntpath as ntp +import gpm_api.configs +from unittest.mock import patch + + +@pytest.fixture(scope="session", autouse=True) +def mock_configuration(): + """Patch the user configuration for entire session + + Doing this will retrieve the configuration from pytest memory and not + alter the local configuration in ~/.config_gpm_api.yml + """ + + mocked_configuration = { + "gpm_username": "testuser", + "gpm_password": "testuser", + "gpm_base_dir": "data", + } + + with patch.object( + gpm_api.configs, + "read_gpm_api_configs", + return_value=mocked_configuration, + ): + yield + + +@pytest.fixture +def product_types() -> List[str]: + """Return a list of all product types from the info dict""" + product_types = [] + for product, info_dict in get_info_dict().items(): + product_types += info_dict["product_types"] + + product_types = list(set(product_types)) # Dedup list + + return product_types + + +@pytest.fixture +def product_categories() -> List[str]: + """Return a list of product categories from the info dict""" + + return list(set([info_dict["product_category"] for info_dict in get_info_dict().values()])) + + +@pytest.fixture +def product_levels() -> List[str]: + """Return a list of product levels from the info dict""" + + # Available in gpm_api.io.checks.check_product_level() + return ["1A", "1B", "1C", "2A", "2B"] + + +@pytest.fixture +def versions() -> List[int]: + """Return a list of versions""" + + # Available in gpm_api.io.checks.check_version() + return [4, 5, 6, 7] + + +@pytest.fixture +def products() -> List[str]: + """Return a list of all products regardless of type""" + + return available_products() + + +@pytest.fixture +def server_paths() -> Dict[str, Dict[str, Any]]: + """Return a list of probable GPM server paths""" + + # Not validated to be real paths but follow the structure + return { + "ftps://arthurhouftps.pps.eosdis.nasa.gov/gpmdata/2020/07/05/radar/2A.GPM.DPR.V9-20211125.20200705-S170044-E183317.036092.V07A.HDF5": { + "year": 2020, + "month": 7, + "day": 5, + "product": "2A-DPR", + "product_category": "radar", + "product_type": "RS", + "start_time": datetime.datetime(2020, 7, 5, 17, 0, 44), + "end_time": datetime.datetime(2020, 7, 5, 18, 33, 17), + "version": 7, + }, + "ftps://arthurhouftps.pps.eosdis.nasa.gov/gpmdata/2020/07/05/radar/2A.GPM.DPR.V9-20211125.20200705-S183318-E200550.036093.V07A.HDF5": { + "year": 2020, + "month": 7, + "day": 5, + "product": "2A-DPR", + "product_category": "radar", + "product_type": "RS", + "start_time": datetime.datetime(2020, 7, 5, 18, 33, 18), + "end_time": datetime.datetime(2020, 7, 5, 20, 5, 50), + "version": 7, + }, + "ftps://arthurhouftps.pps.eosdis.nasa.gov/gpmdata/2020/07/05/radar/2A.GPM.DPR.V9-20211125.20200705-S200551-E213823.036094.V07A.HDF5": { + "year": 2020, + "month": 7, + "day": 5, + "product": "2A-DPR", + "product_category": "radar", + "product_type": "RS", + "start_time": datetime.datetime(2020, 7, 5, 20, 5, 51), + "end_time": datetime.datetime(2020, 7, 5, 21, 38, 23), + "version": 7, + }, + # Include non-ftps folders + "ftp://arthurhouftps.pps.eosdis.nasa.gov/gpmdata/2020/07/05/radar/2A.GPM.DPR.V9-20211125.20200705-S213824-E231056.036095.V07A.HDF5": { + "year": 2020, + "month": 7, + "day": 5, + "product": "2A-DPR", + "product_category": "radar", + "product_type": "RS", + "start_time": datetime.datetime(2020, 7, 5, 21, 38, 24), + "end_time": datetime.datetime(2020, 7, 5, 23, 10, 56), + "version": 7, + }, + "ftp://arthurhouftps.pps.eosdis.nasa.gov/gpmdata/2020/07/05/radar/2A.GPM.DPR.V9-20211125.20200705-S231057-E004329.036096.V07A.HDF5": { + "year": 2020, + "month": 7, + "day": 5, + "product": "2A-DPR", + "product_category": "radar", + "product_type": "RS", + "start_time": datetime.datetime(2020, 7, 5, 23, 10, 57), + "end_time": datetime.datetime(2020, 7, 6, 0, 43, 29), + "version": 7, + }, + "ftp://arthurhouftps.pps.eosdis.nasa.gov/gpmdata/2020/07/05/radar/2A.GPM.DPR.V9-20211125.20200705-S004330-E021602.036097.V07A.HDF5": { + "year": 2020, + "month": 7, + "day": 5, + "product": "2A-DPR", + "product_category": "radar", + "product_type": "RS", + "start_time": datetime.datetime(2020, 7, 5, 0, 43, 30), + "end_time": datetime.datetime(2020, 7, 5, 2, 16, 2), + "version": 7, + }, + "ftp://arthurhouftps.pps.eosdis.nasa.gov/gpmdata/2019/07/05/radar/2A.GPM.DPR.V9-20211125.20190705-S004330-E021602.036097.V07A.HDF5": { + "year": 2019, + "month": 7, + "day": 5, + "product": "2A-DPR", + "product_category": "radar", + "product_type": "RS", + "start_time": datetime.datetime(2019, 7, 5, 0, 43, 30), + "end_time": datetime.datetime(2019, 7, 5, 2, 16, 2), + "version": 7, + }, + # TODO: Add more products with varying attributes ... + } + + +@pytest.fixture +def local_filepaths() -> List[Tuple[str, ...]]: + """Returns a list of probable local filepath structures as a list""" + + return [ + ( + "data", + "GPM", + "RS", + "V05", + "PMW", + "1B-TMI", + "2014", + "07", + "01", + "1B.TRMM.TMI.Tb2017.20140701-S045751-E063013.094690.V05A.HDF5", + ), + ( + "data", + "GPM", + "RS", + "V07", + "PMW", + "1B-TMI", + "2014", + "07", + "01", + "1B.TRMM.TMI.Tb2021.20140701-S063014-E080236.094691.V07A.HDF5", + ), + ( + "data", + "GPM", + "RS", + "V07", + "PMW", + "1C-ATMS-NPP", + "2018", + "07", + "01", + "1C.NPP.ATMS.XCAL2019-V.20180701-S075948-E094117.034588.V07A.HDF5", + ), + ( + "data", + "GPM", + "RS", + "V07", + "RADAR", + "2A-TRMM-SLH", + "2014", + "07", + "01", + "2A.TRMM.PR.TRMM-SLH.20140701-S080237-E093500.094692.V07A.HDF5", + ), + ( + "data", + "GPM", + "RS", + "V07", + "RADAR", + "2A-ENV-PR", + "2014", + "07", + "01", + "2A-ENV.TRMM.PR.V9-20220125.20140701-S063014-E080236.094691.V07A.HDF5", + ), + ( + "data", + "GPM", + "RS", + "V07", + "RADAR", + "1B-PR", + "2014", + "07", + "01", + "1B.TRMM.PR.V9-20210630.20140701-S080237-E093500.094692.V07A.HDF5", + ), + ( + "data", + "GPM", + "RS", + "V07", + "RADAR", + "1B-Ku", + "2020", + "10", + "28", + "GPMCOR_KUR_2010280754_0927_037875_1BS_DUB_07A.h5", + ), + ( + "data", + "GPM", + "RS", + "V07", + "RADAR", + "2A-DPR", + "2022", + "07", + "06", + "2A.GPM.DPR.V9-20211125.20220706-S043937-E061210.047456.V07A.HDF5", + ), + ( + "data", + "GPM", + "RS", + "V07", + "CMB", + "2B-GPM-CORRA", + "2016", + "03", + "09", + "2B.GPM.DPRGMI.CORRA2022.20160309-S091322-E104552.011525.V07A.HDF5", + ), + ( + "data", + "GPM", + "RS", + "V06", + "IMERG", + "IMERG-FR", + "2020", + "02", + "01", + "3B-HHR.MS.MRG.3IMERG.20200201-S180000-E182959.1080.V06B.HDF5", + ), + ] + + +@pytest.fixture +def local_filepaths_unix(local_filepaths) -> List[str]: + """Return the local filepath list as unix paths""" + + return [pxp.join(*path) for path in local_filepaths] + + +@pytest.fixture +def local_filepaths_windows(local_filepaths) -> List[str]: + """Return the local filepath list as windows paths""" + + return [ntp.join(*path) for path in local_filepaths] diff --git a/gpm_api/tests/io/test_checks.py b/gpm_api/tests/io/test_checks.py new file mode 100644 index 00000000..d54a2719 --- /dev/null +++ b/gpm_api/tests/io/test_checks.py @@ -0,0 +1,531 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Mon Jul 17 15:41:14 2023 + +@author: ghiggi +""" + +import pytest +import datetime +import numpy as np +import os +import platform +import ntpath as ntp +import posixpath as ptp +import pytz +import pandas as pd +from typing import List +from gpm_api.io import checks +from gpm_api.io.products import available_products, available_scan_modes + + +def test_is_not_empty() -> None: + """Test is_not_empty() which always returns a boolean""" + + # Test False responses: + for obj in [None, (), {}, []]: + res = checks.is_not_empty(obj) + assert res is False, "Function returned True, expected False" + + # Test True responses: + for obj in [[1, 2, 3], (1, 2, 3), (1), [1]]: + res = checks.is_not_empty(obj) + assert res is True, "Function returned False, expected True" + + +def test_is_empty() -> None: + """Test is_empty()""" + + # Test False responses: + for obj in [[1, 2, 3], (1, 2, 3), (1), [1]]: + res = checks.is_empty(obj) + assert res is False, "Function returned True, expected False" + + # Test True responses: + for obj in [None, (), {}, []]: + res = checks.is_empty(obj) + assert res is True, "Function returned False, expected True" + + +def test_check_base_dir() -> None: + """Check path constructor for base_dir""" + + # Check text entry for Unix/Windows + if platform.system() == "Windows": + res = checks.check_base_dir("C:\\Users\\user\\gpm") + assert res == ntp.join( + "C:", os.path.sep, "Users", "user", "gpm" + ), "Windows path is not returned" + else: + res = checks.check_base_dir("/home/user/gpm") + assert res == ptp.join(ptp.sep, "home", "user", "gpm"), "Unix path is not returned" + + # Check final slash is removed + res = checks.check_base_dir(f"{os.path.join(os.path.expanduser('~'), 'gpm')}{os.path.sep}") + assert res == os.path.join(os.path.expanduser("~"), "gpm"), "Leading slash is not removed" + + # Check if GPM, it is removed + res = checks.check_base_dir(os.path.join(os.path.join(os.path.expanduser("~"), "gpm", "GPM"))) + assert res == os.path.join(os.path.join(os.path.expanduser("~"), "gpm")), "GPM is not removed" + + +def test_check_filepaths() -> None: + """Check path constructor for filepaths""" + + # Create list of unique filepaths (may not reflect real files) + filepaths = [ + os.path.join( + "home", + "user", + "gpm", + "2A.GPM.DPR.V8-20180723.20141231-S003429-E020702.004384.V06A.HDF5", + ), + os.path.join( + "home", + "user", + "gpm", + "2A.GPM.DPR.V8-20180723.20180603-S003429-E020702.004384.V06A.HDF5", + ), + ] + + res = checks.check_filepaths(filepaths) + assert res == filepaths, "List of filepaths is not returned" + + # Check if single string is converted to list + res = checks.check_filepaths(filepaths[0]) + assert res == [filepaths[0]], "String is not converted to list" + + # Check if not list or string, TypeError is raised + with pytest.raises(TypeError): + checks.check_filepaths(123) + + +def test_check_variables() -> None: + """Check variables""" + + var_list = ["precipitationCal", "precipitationUncal", "HQprecipitation"] + + # Check if None, None is returned + res = checks.check_variables(None) + assert res is None, "None is not returned" + + # Check if string, string is returned + res = checks.check_variables(var_list[0]) + assert res == [var_list[0]], "String is not returned" + + # Check if list, list is returned + res = checks.check_variables(var_list) + assert isinstance(res, np.ndarray), "Array is not returned" + + for var in var_list: + assert var in res, f"Variable '{var}' is not returned" + + # Check if numpy array, list is returned + var_list_ndarray = np.array(var_list) + res = checks.check_variables(var_list_ndarray) + assert isinstance(res, np.ndarray), "numpy array is not returned" + assert np.array_equal(res, var_list_ndarray), "Return not equal to input" + + # Check if not list or string, TypeError is raised + with pytest.raises(TypeError): + checks.check_variables(123) + + +def test_check_groups() -> None: + """Test check_groups() + + Similar logic to check_variables + """ + + group_list = ["NS", "HS", "MS"] + + # Check if None, None is returned + res = checks.check_groups(None) + assert res is None, "None is not returned" + + # Check if string, string is returned + res = checks.check_groups(group_list[0]) + assert res == [group_list[0]], "String is not returned" + + # Check if list, list is returned + res = checks.check_groups(group_list) + assert isinstance(res, np.ndarray), "Array is not returned" + + for group in group_list: + assert group in res, f"Group '{group}' is not returned" + + # Check if numpy array, list is returned + group_list_ndarray = np.array(group_list) + res = checks.check_groups(group_list_ndarray) + assert isinstance(res, np.ndarray), "numpy array is not returned" + assert np.array_equal(res, group_list_ndarray), "Return not equal to input" + + # Check if not list or string, TypeError is raised + with pytest.raises(TypeError): + checks.check_groups(123) + + +def test_check_version( + versions: List[int], +) -> None: + """Test check_version() + + Possible versions are integers of 4-7 + """ + + # Check if None, None is returned + with pytest.raises(ValueError): + res = checks.check_version(None) + + # Check if string, exception is raised + with pytest.raises(ValueError): + checks.check_version("6A") + + # Check if outside range + with pytest.raises(ValueError): + checks.check_version(123) + + # Check available range should not raise exception + for version in versions: + res = checks.check_version(version) + assert res is None, f"Function returned {res} for version {version}, expected None" + + # Try versions outside of range + for version in list(range(0, 3)) + list(range(8, 10)): + with pytest.raises(ValueError): + checks.check_version(version) + + +def test_check_product( + product_types: List[str], +) -> None: + """Test check_product() + + Depends on available_products(), test ambiguous product names similar to + those that exist + """ + + # Test a product that does exist + for product_type in product_types: + for product in available_products(product_type=product_type): + res = checks.check_product(product, product_type=product_type) + assert res is None, f"Function returned {res} for product {product} expected None" + + # Test a product that isn't a string + for product_type in product_types: + for product in [("IMERG"), 123, None]: + with pytest.raises(ValueError): + checks.check_product(product, product_type=product_type) + + +def test_check_product_type( + product_types: List[str], +) -> None: + """Test check_product_type()""" + + # Test a product_type that does exist + for product_type in product_types: + res = checks.check_product_type(product_type) + assert res is None, ( + f"Function returned {res} for product_type {product_type}, " f"expected None" + ) + + # Test a product_type that doesn't exist + for product_type in ["IMERG", 123, None]: + with pytest.raises(ValueError): + checks.check_product_type(product_type) + + +def test_check_product_category( + product_categories: List[str], +) -> None: + """Test check_product_category()""" + + # Test types that aren't strings + for product_category in [123, None]: + with pytest.raises(ValueError): + checks.check_product_category(product_category) + + # Test a product_category that does exist + for product_category in product_categories: + res = checks.check_product_category(product_category) + assert res is None, ( + f"Function returned {res} for product_category {product_category}," f" expected None" + ) + + # Test a product_category that doesn't exist + for product_category in ["NOT", "A", "CATEGORY"]: + with pytest.raises(ValueError): + checks.check_product_category(product_category) + + +def test_check_product_level( + product_levels: List[str], +) -> None: + """Test check_product_level()""" + + # Test types that aren't strings + for product_level in [123, None]: + with pytest.raises(ValueError): + checks.check_product_level(product_level) + + # Test a product_level that does exist + for product_level in product_levels: + res = checks.check_product_level(product_level) + assert ( + res is None + ), f"Function returned {res} for product_level {product_level}, expected None" + + # Test a product_level that doesn't exist + for product_level in ["NOT", "A", "LEVEL"]: + with pytest.raises(ValueError): + checks.check_product_level(product_level) + + +def test_check_product_validity( + product_types: List[str], +) -> None: + """Test check_product_validity()""" + + # Test a product that does exist + for product_type in product_types: + for product in available_products(product_type=product_type): + res = checks.check_product_validity(product, product_type=product_type) + assert res is None, f"Function returned {res} for product {product}, expected None" + + # Test a product that doesn't exist + for product_type in product_types: + for product in [("IMERG"), 123, None]: + with pytest.raises(ValueError): + checks.check_product_validity(product, product_type=product_type) + # Test a None product type + with pytest.raises(ValueError): + checks.check_product_validity(product, product_type=None) + + +def test_check_time() -> None: + """Test that time is returned a datetime object from varying inputs""" + + # Test a string + res = checks.check_time("2014-12-31") + assert isinstance(res, datetime.datetime) + assert res == datetime.datetime(2014, 12, 31) + + # Test a string with hh/mm/ss + res = checks.check_time("2014-12-31 12:30:30") + assert isinstance(res, datetime.datetime) + assert res == datetime.datetime(2014, 12, 31, 12, 30, 30) + + # Test a string with T