From a16b93d870a0dfb3529e485e50371d2f9d9a8362 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 6 Nov 2024 16:23:33 -0500
Subject: [PATCH] Update datasets requirement from <2.20,>=2.19 to
 >=2.20.0,<2.21 (#1330)

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Daniel King <43149077+dakinggg@users.noreply.github.com>
Co-authored-by: Milo Cress <iamroot@databricks.com>
Co-authored-by: Milo Cress <milo.cress@databricks.com>
---
 setup.py                                        | 2 +-
 tests/eval/test_in_context_learning_datasets.py | 9 +++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 72543b24c8..ac03fa2a41 100644
--- a/setup.py
+++ b/setup.py
@@ -58,7 +58,7 @@
     'transformers>=4.43.2,<4.47',
     'mosaicml-streaming>=0.9.0,<0.10',
     'torch>=2.4.0,<2.4.1',
-    'datasets>=2.19,<2.20',
+    'datasets>=2.20.0,<2.21',
     'fsspec==2023.6.0',  # newer version results in a bug in datasets that duplicates data
     'sentencepiece==0.2.0',
     'einops==0.8.0',
diff --git a/tests/eval/test_in_context_learning_datasets.py b/tests/eval/test_in_context_learning_datasets.py
index 5fe9643cde..7509e95304 100644
--- a/tests/eval/test_in_context_learning_datasets.py
+++ b/tests/eval/test_in_context_learning_datasets.py
@@ -279,6 +279,7 @@ def test_update_generation_kwargs(
     hf_loading_vars = {
         'split': 'test',
         'name': 'invoker',
+        'trust_remote_code': True,
     }
     hf_parsing_map = {'context': ['quas', 'wex', 'exort'], 'answer': ['spell']}
     gen_kwargs = {'test_arg1': 1, 'test_arg2': 2}
@@ -370,6 +371,7 @@ def test_update_generation_kwargs_no_kwargs(
     hf_loading_vars = {
         'split': 'test',
         'name': 'invoker',
+        'trust_remote_code': True,
     }
     hf_parsing_map = {'context': ['quas', 'wex', 'exort'], 'answer': ['spell']}
 
@@ -459,6 +461,7 @@ def test_construct_context(
     hf_loading_vars = {
         'split': 'test',
         'name': 'invoker',
+        'trust_remote_code': True,
     }
     hf_parsing_map = {'context': ['quas', 'wex', 'exort'], 'answer': ['spell']}
 
@@ -513,6 +516,7 @@ def test_get_answer_from_example(
     hf_loading_vars = {
         'split': 'test',
         'name': 'invoker',
+        'trust_remote_code': True,
     }
     hf_parsing_map = {'context': ['quas', 'wex', 'exort'], 'answer': ['spell']}
 
@@ -552,6 +556,7 @@ def test_fix_eos_on_preamble(tmp_path: Path):
     hf_loading_vars = {
         'split': 'test',
         'name': 'invoker',
+        'trust_remote_code': True,
     }
     hf_parsing_map = {'context': ['quas', 'wex', 'exort'], 'answer': ['spell']}
 
@@ -592,6 +597,7 @@ def test_tokenize_example_with_tokenize_labels(
     hf_loading_vars = {
         'split': 'test',
         'name': 'invoker',
+        'trust_remote_code': True,
     }
     hf_parsing_map = {'context': ['quas', 'wex', 'exort'], 'answer': ['spell']}
 
@@ -658,6 +664,7 @@ def test_tokenize_example_with_no_tokenize_labels(
     hf_loading_vars = {
         'split': 'test',
         'name': 'invoker',
+        'trust_remote_code': True,
     }
     hf_parsing_map = {'context': ['quas', 'wex', 'exort'], 'answer': ['spell']}
 
@@ -2429,6 +2436,7 @@ def test_lm_spacing_dataloader(
     [{
         'split': 'test',
         'name': 'juggernaut',
+        'trust_remote_code': True,
     }],
 )
 @pytest.mark.parametrize(
@@ -2508,6 +2516,7 @@ def test_hf_dataloading_lm_dataloader(
     [{
         'split': 'test',
         'name': 'invoker',
+        'trust_remote_code': True,
     }],
 )
 @pytest.mark.parametrize(