diff --git a/.coveragerc b/.coveragerc index 9bfc174f..9327a038 100644 --- a/.coveragerc +++ b/.coveragerc @@ -7,7 +7,9 @@ omit = gpm/cli/* gpm/encoding/* gpm/etc/* - gpm/retrieval/* + gpm/retrievals/* + gpm/visualization/animation.py + gpm/utils/pyresample.py gpm/_version.py [report] diff --git a/gpm/bucket/__init__.py b/gpm/bucket/__init__.py index ff7466e3..a0f7148f 100644 --- a/gpm/bucket/__init__.py +++ b/gpm/bucket/__init__.py @@ -27,9 +27,13 @@ """This directory defines the GPM-API geographic binning toolbox.""" from gpm.bucket.partitioning import LonLatPartitioning, TilePartitioning from gpm.bucket.readers import read_bucket as read +from gpm.bucket.routines import merge_granule_buckets, write_bucket, write_granules_bucket __all__ = [ "LonLatPartitioning", "TilePartitioning", "read", + "merge_granule_buckets", + "write_granules_bucket", + "write_bucket", ] diff --git a/gpm/bucket/routines.py b/gpm/bucket/routines.py index 15e7a89c..aa2568bf 100644 --- a/gpm/bucket/routines.py +++ b/gpm/bucket/routines.py @@ -301,7 +301,7 @@ def merge_granule_buckets( src_bucket_dir, dst_bucket_dir, row_group_size="400MB", - max_file_size="2GB", + max_file_size="1GB", compression="snappy", compression_level=None, write_metadata=False, diff --git a/gpm/tests/test_bucket/test_io.py b/gpm/tests/test_bucket/test_io.py index 254b6f6c..f119c118 100644 --- a/gpm/tests/test_bucket/test_io.py +++ b/gpm/tests/test_bucket/test_io.py @@ -173,7 +173,8 @@ def test_get_subdirectories(tmp_path): results = get_subdirectories(base_dir=bucket_dir, path=False) assert results == ["lon_bin=-5.0"] results = get_subdirectories(base_dir=os.path.join(bucket_dir, "lon_bin=-5.0"), path=False) - assert results == ["lat_bin=5.0", "lat_bin=-5.0"] + expected_results = ["lat_bin=5.0", "lat_bin=-5.0"] + assert sorted(results) == sorted(expected_results) def test_get_partitions_paths(tmp_path): @@ -182,10 +183,11 @@ def test_get_partitions_paths(tmp_path): create_test_bucket(bucket_dir=bucket_dir) # Test results results = get_partitions_paths(bucket_dir=bucket_dir) - assert results == [ + expected_results = [ os.path.join(bucket_dir, "lon_bin=-5.0", "lat_bin=-5.0"), os.path.join(bucket_dir, "lon_bin=-5.0", "lat_bin=5.0"), ] + assert sorted(results) == sorted(expected_results) def test_search_leaf_files_in_parallel(tmp_path): diff --git a/gpm/tests/test_bucket/test_readers.py b/gpm/tests/test_bucket/test_readers.py index 8cf03468..292dacff 100644 --- a/gpm/tests/test_bucket/test_readers.py +++ b/gpm/tests/test_bucket/test_readers.py @@ -97,6 +97,8 @@ def test_read_full_bucket(self, tmp_path): # Test read full database df_pl = read_bucket(bucket_dir) + assert df_pl.columns == [] # TODO WINDOW BUG + assert isinstance(df_pl, pl.DataFrame) assert df_pl.shape == (150, NUM_COLUMNS) def test_rows_columns_subsets(self, tmp_path): @@ -106,10 +108,12 @@ def test_rows_columns_subsets(self, tmp_path): # Test read row subset df_pl = read_bucket(bucket_dir, n_rows=2) + assert isinstance(df_pl, pl.DataFrame) assert df_pl.shape == (2, NUM_COLUMNS) # Test read row, columns subset df_pl = read_bucket(bucket_dir, n_rows=3, columns=["lon", "lat"]) + assert df_pl.columns == [] # TODO WINDOW BUG assert df_pl.shape == (3, 2) assert "lon" in df_pl assert "lat" in df_pl