diff --git a/pyproject.toml b/pyproject.toml
index 586921a8..697ccbec 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,7 +29,7 @@ packages = ["tsfm_public", "tsfm_public.toolkit", "tsfm_public.models", "tsfm_pu
 
 notebooks = ["jupyter", "matplotlib", "datasets", "ipywidgets", "plotly", "kaleido", "tensorboard"]
 testing = ["pytest", "tsfm_public[notebooks]", "parameterized"]
-dev = ["pre-commit", "tsfm_public[testing]", "ruff==0.1.5"]
+dev = ["pre-commit", "tsfm_public[testing]", "ruff==0.4.4"]
 evaluation = [
     "tsevaluate @ git+ssh://git@github.ibm.com/srom/tsevaluate.git",
 ]
@@ -54,15 +54,16 @@ version_file = "tsfm_public/_version.py"
 
 [tool.ruff]
 # Never enforce `E501` (line length violations).
-ignore = ["C901", "E501", "E741", "F402", "F823" ]
-select = ["C", "E", "F", "I", "W"]
+lint.ignore = ["C901", "E501", "E741", "F402", "F823" ]
+lint.select = ["C", "E", "F", "I", "W"]
 line-length = 119
+extend-exclude = ["tsfm_public/_version.py"]
 
 # Ignore import violations in all `__init__.py` files.
-[tool.ruff.per-file-ignores]
+[tool.ruff.lint.per-file-ignores]
 "__init__.py" = ["E402", "F401", "F403", "F811"]
 
-[tool.ruff.isort]
+[tool.ruff.lint.isort]
 lines-after-imports = 2
 known-first-party = ["tsfm_public"]
 
diff --git a/tests/models/tinytimemixer/test_modeling_tinytimemixer.py b/tests/models/tinytimemixer/test_modeling_tinytimemixer.py
index 9897f4d1..3ce871c4 100644
--- a/tests/models/tinytimemixer/test_modeling_tinytimemixer.py
+++ b/tests/models/tinytimemixer/test_modeling_tinytimemixer.py
@@ -2,7 +2,7 @@
 #
 # This code is based on the test code for PatchTSMixer in the HuggingFace Transformers Library:
 # https://github.com/huggingface/transformers/blob/main/tests/models/patchtsmixer/test_modeling_patchtsmixer.py
-""" Testing suite for the PyTorch TinyTimeMixer model. """
+"""Testing suite for the PyTorch TinyTimeMixer model."""
 
 # Standard
 import itertools
diff --git a/tests/toolkit/test_dataset.py b/tests/toolkit/test_dataset.py
index 18f27b68..6e23adad 100644
--- a/tests/toolkit/test_dataset.py
+++ b/tests/toolkit/test_dataset.py
@@ -3,7 +3,6 @@
 
 """Tests basic dataset functions"""
 
-
 from datetime import datetime, timedelta
 
 import numpy as np
diff --git a/tests/toolkit/test_time_series_forecasting_pipeline.py b/tests/toolkit/test_time_series_forecasting_pipeline.py
index defdd123..503e5c46 100644
--- a/tests/toolkit/test_time_series_forecasting_pipeline.py
+++ b/tests/toolkit/test_time_series_forecasting_pipeline.py
@@ -2,6 +2,7 @@
 #
 
 """Tests the time series preprocessor and functions"""
+
 import pandas as pd
 from transformers import PatchTSTForPrediction
 
diff --git a/tests/toolkit/test_time_series_preprocessor.py b/tests/toolkit/test_time_series_preprocessor.py
index bdb9c93e..8a52c7c9 100644
--- a/tests/toolkit/test_time_series_preprocessor.py
+++ b/tests/toolkit/test_time_series_preprocessor.py
@@ -429,6 +429,14 @@ def test_get_datasets_with_frequency_token(ts_data):
     assert train[0]["freq_token"] == DEFAULT_FREQUENCY_MAPPING["d"]
 
 
+def test_get_frequency_token():
+    tsp = TimeSeriesPreprocessor(timestamp_column="date")
+
+    assert tsp.get_frequency_token("1h") == DEFAULT_FREQUENCY_MAPPING["h"]
+    assert tsp.get_frequency_token("h") == DEFAULT_FREQUENCY_MAPPING["h"]
+    assert tsp.get_frequency_token("0 days 01:00:00") == DEFAULT_FREQUENCY_MAPPING["h"]
+
+
 def test_id_columns_and_scaling_id_columns(ts_data_runs):
     df = ts_data_runs
 
diff --git a/tsfm_public/models/tinytimemixer/configuration_tinytimemixer.py b/tsfm_public/models/tinytimemixer/configuration_tinytimemixer.py
index b4aafa7f..0e7df2d0 100644
--- a/tsfm_public/models/tinytimemixer/configuration_tinytimemixer.py
+++ b/tsfm_public/models/tinytimemixer/configuration_tinytimemixer.py
@@ -1,6 +1,6 @@
 # Copyright contributors to the TSFM project
 #
-""" TinyTimeMixer model configuration"""
+"""TinyTimeMixer model configuration"""
 
 from typing import Optional, Union
 
diff --git a/tsfm_public/models/tinytimemixer/modeling_tinytimemixer.py b/tsfm_public/models/tinytimemixer/modeling_tinytimemixer.py
index 504dfdd8..eb64518d 100644
--- a/tsfm_public/models/tinytimemixer/modeling_tinytimemixer.py
+++ b/tsfm_public/models/tinytimemixer/modeling_tinytimemixer.py
@@ -2,7 +2,7 @@
 #
 # This code is based on layers and components from the PatchTSMixer model in the HuggingFace Transformers
 # Library: https://github.com/huggingface/transformers/blob/main/src/transformers/models/patchtsmixer/modeling_patchtsmixer.py
-""" PyTorch TinyTimeMixer model."""
+"""PyTorch TinyTimeMixer model."""
 
 # Standard
 import copy
diff --git a/tsfm_public/models/tinytimemixer/utils/ttm_utils.py b/tsfm_public/models/tinytimemixer/utils/ttm_utils.py
index 03bd457e..dbbb5d68 100644
--- a/tsfm_public/models/tinytimemixer/utils/ttm_utils.py
+++ b/tsfm_public/models/tinytimemixer/utils/ttm_utils.py
@@ -1,4 +1,5 @@
 """Utilities for TTM notebooks"""
+
 # Standard
 import argparse
 import os
diff --git a/tsfm_public/toolkit/time_series_preprocessor.py b/tsfm_public/toolkit/time_series_preprocessor.py
index d55a08af..f32258cc 100644
--- a/tsfm_public/toolkit/time_series_preprocessor.py
+++ b/tsfm_public/toolkit/time_series_preprocessor.py
@@ -14,6 +14,7 @@
 import pandas as pd
 from datasets import Dataset
 from deprecated import deprecated
+from pandas.tseries.frequencies import to_offset
 from sklearn.preprocessing import MinMaxScaler as MinMaxScaler_
 from sklearn.preprocessing import OrdinalEncoder as OrdinalEncoder_
 from sklearn.preprocessing import StandardScaler as StandardScaler_
@@ -317,44 +318,6 @@ def from_dict(cls, feature_extractor_dict: Dict[str, Any], **kwargs) -> "PreTrai
 
         return super().from_dict(feature_extractor_dict, **kwargs)
 
-    # def _prepare_single_time_series(self, name, d):
-    #     """
-    #     Segment and prepare the time series based on the configuration arguments.
-
-    #     name: name for the time series, for example as a result of a grouping operation
-    #     d: the data for a single time series
-    #     """
-    #     for s_begin in range(d.shape[0] - self.context_length + 1):
-    #         s_end = s_begin + self.context_length
-    #         seq_x = d[self.input_columns].iloc[s_begin:s_end].values
-
-    #         if self.time_series_task == TimeSeriesTask.FORECASTING:
-    #             seq_y = (
-    #                 d[self.output_columns]
-    #                 .iloc[s_end : s_end + self.prediction_length]
-    #                 .values
-    #             )
-    #         else:
-    #             seq_y = None
-    #         # to do: add handling of other types
-
-    #         if self.timestamp_column:
-    #             ts = d[self.timestamp_column].iloc[s_end - 1]
-    #         else:
-    #             ts = None
-
-    #         if self.id_columns:
-    #             ids = d[self.id_columns].iloc[s_end - 1].values
-    #         else:
-    #             ids = None
-
-    #         yield {
-    #             "timestamp_column": ts,
-    #             "id_columns": ids,
-    #             "past_values": seq_x,
-    #             "future_values": seq_y,
-    #         }
-
     @classmethod
     def _get_scaler_class(cls, scaler_type):
         if scaler_type == ScalerType.MINMAX.value:
@@ -483,10 +446,25 @@ def _train_categorical_encoder(self, df: pd.DataFrame):
 
     def get_frequency_token(self, token_name: str):
         token = self.frequency_mapping.get(token_name, None)
+        if token is not None:
+            return token
 
-        if token is None:
-            warn(f"Frequency token {token_name} was not found in the frequncy token mapping.")
-            token = self.frequency_mapping["oov"]
+        # try to map as a frequency string
+        try:
+            token_name_offs = to_offset(token_name).freqstr
+            token = self.frequency_mapping.get(token_name_offs, None)
+            if token is not None:
+                return token
+        except ValueError:
+            # lastly try to map the timedelta to a frequency string
+            token_name_td = pd._libs.tslibs.timedeltas.Timedelta(token_name)
+            token_name_offs = to_offset(token_name_td).freqstr
+            token = self.frequency_mapping.get(token_name_offs, None)
+            if token is not None:
+                return token
+
+        warn(f"Frequency token {token_name} was not found in the frequncy token mapping.")
+        token = self.frequency_mapping["oov"]
 
         return token