diff --git a/open_lm/data.py b/open_lm/data.py index 107ff6e0..b3902bfa 100644 --- a/open_lm/data.py +++ b/open_lm/data.py @@ -163,6 +163,8 @@ def group_by_keys_nothrow(data, keys=base_plus_ext, lcase=True, suffixes=None, h current_sample = None for filesample in data: assert isinstance(filesample, dict) + if "fname" not in filesample or "data" not in filesample: + continue fname, value = filesample["fname"], filesample["data"] prefix, suffix = keys(fname) if prefix is None: