diff --git a/README.md b/README.md index 1d31d59..9930da8 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,2 @@ -# OCF Template Repository -Template Repository for OCF Projects - -## Usage - -Do the following to customize the repo to the project: - -- Replace `src` with name of the library/project -- Update `setup.py` with the proper info -- Change `commit` to `True` in `.bumpversion.cfg` if you want the minor version - to increment on every commit. -- Add PyPi access token to release to PyPi -- Update name of folder in the test workflow +# OCF Data Sampler +A repo for sampling from weather data for renewable energy prediction diff --git a/setup.py b/setup.py index 0e0ec1e..c2fafa1 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ name="ocf_data_sampler", version="0.0.1", license="MIT", - description="Super cool OCF Repo", + description="Sample from weather data for renewable energy prediction", author="James Fulton, Peter Dudfield, and the Open Climate Fix team", author_email="info@openclimatefix.org", company="Open Climate Fix Ltd", diff --git a/tests/select/test_find_contiguous_t0_time_periods.py b/tests/select/test_find_contiguous_t0_time_periods.py new file mode 100644 index 0000000..83b9e59 --- /dev/null +++ b/tests/select/test_find_contiguous_t0_time_periods.py @@ -0,0 +1,194 @@ +from datetime import timedelta +import pandas as pd + +from ocf_data_sampler.select.find_contiguous_t0_time_periods import ( + find_contiguous_t0_time_periods, find_contiguous_t0_periods_nwp, + intersection_of_multiple_dataframes_of_periods, +) + + + +def test_find_contiguous_t0_time_periods(): + + # Create 5-minutely data timestamps + freq = timedelta(minutes=5) + history_duration = timedelta(minutes=60) + forecast_duration = timedelta(minutes=15) + + datetimes = ( + pd.date_range("2023-01-01 12:00", "2023-01-01 17:00", freq=freq) + .delete([5, 6, 30]) + ) + + periods = find_contiguous_t0_time_periods( + datetimes=datetimes, + history_duration=history_duration, + forecast_duration=forecast_duration, + sample_period_duration=freq, + ) + + expected_results = pd.DataFrame( + { + "start_dt": pd.to_datetime( + [ + "2023-01-01 13:35", + "2023-01-01 15:35", + ] + ), + "end_dt": pd.to_datetime( + [ + "2023-01-01 14:10", + "2023-01-01 16:45", + ] + ), + }, + ) + + assert periods.equals(expected_results) + + +def test_find_contiguous_t0_time_periods_nwp(): + + # These are the expected results of the test + expected_results = [ + pd.DataFrame( + { + "start_dt": pd.to_datetime(["2023-01-01 03:00", "2023-01-02 03:00"]), + "end_dt": pd.to_datetime(["2023-01-01 21:00", "2023-01-03 06:00"]), + }, + ), + pd.DataFrame( + { + "start_dt": pd.to_datetime( + [ + "2023-01-01 05:00", + "2023-01-02 05:00", + "2023-01-02 14:00", + ] + ), + "end_dt": pd.to_datetime( + [ + "2023-01-01 21:00", + "2023-01-02 12:00", + "2023-01-03 06:00", + ] + ), + }, + ), + pd.DataFrame( + { + "start_dt": pd.to_datetime( + [ + "2023-01-01 05:00", + "2023-01-01 11:00", + "2023-01-02 05:00", + "2023-01-02 14:00", + ] + ), + "end_dt": pd.to_datetime( + [ + "2023-01-01 09:00", + "2023-01-01 18:00", + "2023-01-02 09:00", + "2023-01-03 03:00", + ] + ), + }, + ), + pd.DataFrame( + { + "start_dt": pd.to_datetime( + [ + "2023-01-01 05:00", + "2023-01-01 11:00", + "2023-01-01 14:00", + "2023-01-02 05:00", + "2023-01-02 14:00", + "2023-01-02 17:00", + "2023-01-02 20:00", + "2023-01-02 23:00", + ] + ), + "end_dt": pd.to_datetime( + [ + "2023-01-01 06:00", + "2023-01-01 12:00", + "2023-01-01 15:00", + "2023-01-02 06:00", + "2023-01-02 15:00", + "2023-01-02 18:00", + "2023-01-02 21:00", + "2023-01-03 00:00", + ] + ), + }, + ), + ] + + # Create 3-hourly init times with a few time stamps missing + freq = timedelta(minutes=180) + + datetimes = ( + pd.date_range("2023-01-01 03:00", "2023-01-02 21:00", freq=freq) + .delete([1, 4, 5, 6, 7, 9, 10]) + ) + steps = [timedelta(hours=i) for i in range(24)] + + # Choose some history durations and max stalenesses + history_durations_hr = [0, 2, 2, 2] + max_stalenesses_hr = [9, 9, 6, 3] + + for i in range(len(expected_results)): + history_duration = timedelta(hours=history_durations_hr[i]) + max_staleness = timedelta(hours=max_stalenesses_hr[i]) + + time_periods = find_contiguous_t0_periods_nwp( + datetimes=datetimes, + history_duration=history_duration, + max_staleness=max_staleness, + max_dropout = timedelta(0), + ) + + # Check if results are as expected + assert time_periods.equals(expected_results[i]) + + +def test_intersection_of_multiple_dataframes_of_periods(): + periods_1 = pd.DataFrame( + { + "start_dt": pd.to_datetime(["2023-01-01 05:00", "2023-01-01 14:10"]), + "end_dt": pd.to_datetime(["2023-01-01 13:35", "2023-01-01 18:00"]), + }, + ) + + periods_2 = pd.DataFrame( + { + "start_dt": pd.to_datetime(["2023-01-01 12:00"]), + "end_dt": pd.to_datetime(["2023-01-02 00:00"]), + }, + ) + + periods_3 = pd.DataFrame( + { + "start_dt": pd.to_datetime(["2023-01-01 00:00", "2023-01-01 13:00"]), + "end_dt": pd.to_datetime(["2023-01-01 12:30", "2023-01-01 23:00"]), + }, + ) + + expected_result = pd.DataFrame( + { + "start_dt": pd.to_datetime( + ["2023-01-01 12:00", "2023-01-01 13:00", "2023-01-01 14:10"] + ), + "end_dt": pd.to_datetime([ + "2023-01-01 12:30", "2023-01-01 13:35", "2023-01-01 18:00"] + ), + }, + ) + + overlaping_periods = intersection_of_multiple_dataframes_of_periods( + [periods_1, periods_2, periods_3] + ) + + # Check if results are as expected + assert overlaping_periods.equals(expected_result)