Skip to content

Commit

Permalink
added functionality to skip processed files
Browse files Browse the repository at this point in the history
  • Loading branch information
annajungbluth committed Sep 26, 2024
1 parent 367e493 commit 95ef8f3
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 21 deletions.
13 changes: 7 additions & 6 deletions rs_tools/_src/geoprocessing/goes/geoprocessor_goes16.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ def preprocess_cloud_mask(self, files: List[str]) -> xr.Dataset:

return ds

def preprocess_files(self):
def preprocess_files(self, skip_if_exists: bool = True):
"""
Preprocesses multiple files in read path and saves processed files to save path.
"""
Expand All @@ -297,11 +297,10 @@ def preprocess_files(self):

for itime in pbar_time:

# TODO: Make it modular whether to overwrite or not
# skip if file already exists
itime_name = format_goes_dates(itime)
save_filename = Path(self.save_path).joinpath(f"{itime_name}_goes16.nc")
if os.path.exists(save_filename):
# skip if file already exists
if skip_if_exists and os.path.exists(save_filename):
logger.info(f"File already exists. Skipping: {save_filename}")
continue

Expand Down Expand Up @@ -350,6 +349,7 @@ def geoprocess(
save_path: str = "./",
region: str = None,
resample_method: str = "bilinear",
skip_if_exists: bool = True
):
"""
Geoprocesses GOES 16 files
Expand All @@ -360,7 +360,8 @@ def geoprocess(
save_path (str, optional): The path to save the geoprocessed files to. Defaults to "./".
region (str, optional): The geographic region to extract ("lon_min, lat_min, lon_max, lat_max"). Defaults to None.
resample_method (str, optional): The resampling method to use. Defaults to "bilinear".
skip_if_exists (bool, optional): Whether to skip if the file already exists. Defaults to True.
Returns:
None
"""
Expand All @@ -380,7 +381,7 @@ def geoprocess(
resample_method=resample_method
)
logger.info(f"GeoProcessing Files...")
goes16_geoprocessor.preprocess_files()
goes16_geoprocessor.preprocess_files(skip_if_exists=skip_if_exists)

logger.info(f"Finished GOES 16 GeoProcessing Script...!")

Expand Down
22 changes: 12 additions & 10 deletions rs_tools/_src/geoprocessing/modis/geoprocessor_modis.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ def preprocess_cloud_mask(self, files: List[str]) -> xr.Dataset:
return ds


def preprocess_files(self):
def preprocess_files(self, skip_if_exists: bool = True):
"""
Preprocesses multiple files in read path and saves processed files to save path.
"""
Expand All @@ -224,6 +224,13 @@ def preprocess_files(self):

for itime in pbar_time:

itime_name = format_modis_dates(itime)
save_filename = Path(self.save_path).joinpath(f"{itime_name}_{self.satellite}.nc")
# skip if file already exists
if skip_if_exists and os.path.exists(save_filename):
logger.info(f"File already exists. Skipping: {save_filename}")
continue

pbar_time.set_description(f"Processing: {itime}")

# get files from unique times
Expand Down Expand Up @@ -261,21 +268,15 @@ def preprocess_files(self):
# check if save path exists, and create if not
if not os.path.exists(self.save_path):
os.makedirs(self.save_path)

# remove file if it already exists
itime_name = format_modis_dates(itime)
save_filename = Path(self.save_path).joinpath(f"{itime_name}_{self.satellite}.nc")
if os.path.exists(save_filename):
logger.info(f"File already exists. Overwriting file: {save_filename}")
os.remove(save_filename)

# save to netcdf
ds.to_netcdf(save_filename, engine="netcdf4")

def geoprocess(
satellite: str,
read_path: str = "./",
save_path: str = "./"
save_path: str = "./",
skip_if_exists: bool = True
):
"""
Geoprocesses MODIS files
Expand All @@ -284,6 +285,7 @@ def geoprocess(
satellite (str, optional): The satellite of the data to geoprocess.
read_path (str, optional): The path to read the files from. Defaults to "./".
save_path (str, optional): The path to save the geoprocessed files to. Defaults to "./".
skip_if_exists (bool, optional): Whether to skip if the file already exists. Defaults to True.
Returns:
None
Expand All @@ -297,7 +299,7 @@ def geoprocess(
save_path=save_path
)
logger.info(f"GeoProcessing Files...")
modis_geoprocessor.preprocess_files()
modis_geoprocessor.preprocess_files(skip_if_exists=skip_if_exists)

logger.info(f"Finished {satellite.upper()} GeoProcessing Script...!")

Expand Down
11 changes: 6 additions & 5 deletions rs_tools/_src/geoprocessing/msg/geoprocessor_msg.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ def preprocess_cloud_mask(self, files: List[str]) -> xr.Dataset:

return ds

def preprocess_files(self):
def preprocess_files(self, skip_if_exists: bool = True):
"""
Preprocesses multiple files in read path and saves processed files to save path.
"""
Expand All @@ -290,10 +290,9 @@ def preprocess_files(self):

for itime in pbar_time:

# TODO: Make it modular whether to overwrite or not
# skip if file already exists
save_filename = Path(self.save_path).joinpath(f"{itime}_msg.nc")
if os.path.exists(save_filename):
if skip_if_exists and os.path.exists(save_filename):
# skip if file already exists
logger.info(f"File already exists. Skipping: {save_filename}")
continue

Expand Down Expand Up @@ -340,6 +339,7 @@ def geoprocess(
save_path: str = "./",
region: str = None,
resample_method: str = "bilinear",
skip_existing: bool = True
):
"""
Geoprocesses MSG files
Expand All @@ -350,6 +350,7 @@ def geoprocess(
save_path (str, optional): The path to save the geoprocessed files to. Defaults to "./".
region (str, optional): The geographic region to extract ("lon_min, lat_min, lon_max, lat_max"). Defaults to None.
resample_method (str, optional): The resampling method to use. Defaults to "bilinear".
skip_existing (bool, optional): Whether to skip existing files. Defaults to True.
Returns:
None
Expand All @@ -368,7 +369,7 @@ def geoprocess(
resample_method=resample_method
)
logger.info(f"GeoProcessing Files...")
msg_geoprocessor.preprocess_files()
msg_geoprocessor.preprocess_files(skip_if_exists=skip_if_exists)

logger.info(f"Finished MSG GeoProcessing Script...!")

Expand Down

0 comments on commit 95ef8f3

Please sign in to comment.