From f73f5e62e2383c1cb6975fca70082d6dc51ec6f2 Mon Sep 17 00:00:00 2001
From: Yih-Dar <2521628+ydshieh@users.noreply.github.com>
Date: Fri, 25 Oct 2024 17:14:07 +0200
Subject: [PATCH 1/3] Avoid check expected exception when it is on CUDA
 (#34408)

* update

* update

---------

Co-authored-by: ydshieh <ydshieh@users.noreply.github.com>
---
 .../pipelines/test_pipelines_summarization.py  |  5 +++--
 .../test_pipelines_text_generation.py          | 18 ++++++++++--------
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/tests/pipelines/test_pipelines_summarization.py b/tests/pipelines/test_pipelines_summarization.py
index 465dba9743c648..613b9dca8e1a71 100644
--- a/tests/pipelines/test_pipelines_summarization.py
+++ b/tests/pipelines/test_pipelines_summarization.py
@@ -85,8 +85,9 @@ def run_pipeline_test(self, summarizer, _):
                 and len(summarizer.model.trainable_weights) > 0
                 and "GPU" in summarizer.model.trainable_weights[0].device
             ):
-                with self.assertRaises(Exception):
-                    outputs = summarizer("This " * 1000)
+                if str(summarizer.device) == "cpu":
+                    with self.assertRaises(Exception):
+                        outputs = summarizer("This " * 1000)
         outputs = summarizer("This " * 1000, truncation=TruncationStrategy.ONLY_FIRST)
 
     @require_torch
diff --git a/tests/pipelines/test_pipelines_text_generation.py b/tests/pipelines/test_pipelines_text_generation.py
index 277c870b4d1074..51f3cae5e31235 100644
--- a/tests/pipelines/test_pipelines_text_generation.py
+++ b/tests/pipelines/test_pipelines_text_generation.py
@@ -493,17 +493,19 @@ def run_pipeline_test(self, text_generator, _):
             and text_generator.model.__class__.__name__ not in EXTRA_MODELS_CAN_HANDLE_LONG_INPUTS
         ):
             # Handling of large generations
-            with self.assertRaises((RuntimeError, IndexError, ValueError, AssertionError)):
-                text_generator("This is a test" * 500, max_new_tokens=20)
+            if str(text_generator.device) == "cpu":
+                with self.assertRaises((RuntimeError, IndexError, ValueError, AssertionError)):
+                    text_generator("This is a test" * 500, max_new_tokens=20)
 
             outputs = text_generator("This is a test" * 500, handle_long_generation="hole", max_new_tokens=20)
             # Hole strategy cannot work
-            with self.assertRaises(ValueError):
-                text_generator(
-                    "This is a test" * 500,
-                    handle_long_generation="hole",
-                    max_new_tokens=tokenizer.model_max_length + 10,
-                )
+            if str(text_generator.device) == "cpu":
+                with self.assertRaises(ValueError):
+                    text_generator(
+                        "This is a test" * 500,
+                        handle_long_generation="hole",
+                        max_new_tokens=tokenizer.model_max_length + 10,
+                    )
 
     @require_torch
     @require_accelerate

From 6a62a6d1b54123ede3a1e3bda57c924c64e78124 Mon Sep 17 00:00:00 2001
From: Rudy Delouya <rudy.delouya@gmail.com>
Date: Fri, 25 Oct 2024 17:52:29 +0200
Subject: [PATCH 2/3] Fix typos in agents_advanced.md (#34405)

---
 docs/source/en/agents_advanced.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/source/en/agents_advanced.md b/docs/source/en/agents_advanced.md
index 2327357525d8d9..ddcc619b4f91f6 100644
--- a/docs/source/en/agents_advanced.md
+++ b/docs/source/en/agents_advanced.md
@@ -66,10 +66,10 @@ manager_agent.run("Who is the CEO of Hugging Face?")
 
 Let's take again the tool example from main documentation, for which we had implemented a `tool` decorator.
 
-If you need to add variation, like custom attributes for your too, you can build your tool following the fine-grained method: building a class that inherits from the [`Tool`] superclass.
+If you need to add variation, like custom attributes for your tool, you can build your tool following the fine-grained method: building a class that inherits from the [`Tool`] superclass.
 
 The custom tool needs:
-- An attribute `name`, which corresponds to the name of the tool itself. The name usually describes what the tool does. Since the code returns the model with the most downloads for a task, let's name is `model_download_counter`.
+- An attribute `name`, which corresponds to the name of the tool itself. The name usually describes what the tool does. Since the code returns the model with the most downloads for a task, let's name it `model_download_counter`.
 - An attribute `description` is used to populate the agent's system prompt.
 - An `inputs` attribute, which is a dictionary with keys `"type"` and `"description"`. It contains information that helps the Python interpreter make educated choices about the input.
 - An `output_type` attribute, which specifies the output type.
@@ -240,4 +240,4 @@ with gr.Blocks() as demo:
 
 if __name__ == "__main__":
     demo.launch()
-```
\ No newline at end of file
+```

From 1d063793318b20654ebb850f48f43e0a247ab7bb Mon Sep 17 00:00:00 2001
From: Steven Liu <59462357+stevhliu@users.noreply.github.com>
Date: Fri, 25 Oct 2024 08:52:45 -0700
Subject: [PATCH 3/3] [docs] Cache implementations (#34325)

cache
---
 src/transformers/generation/configuration_utils.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/transformers/generation/configuration_utils.py b/src/transformers/generation/configuration_utils.py
index 3c204481b04296..9b543f6c35711d 100644
--- a/src/transformers/generation/configuration_utils.py
+++ b/src/transformers/generation/configuration_utils.py
@@ -172,7 +172,15 @@ class GenerationConfig(PushToHubMixin):
             speed up decoding.
         cache_implementation (`str`, *optional*, default to `None`):
             Name of the cache class that will be instantiated in `generate`, for faster decoding. Possible values are:
-            {ALL_CACHE_IMPLEMENTATIONS}. We support other cache types, but they must be manually instantiated and
+
+            - `"static"`: [`StaticCache`]
+            - `"offloaded_static"`: [`OffloadedStaticCache`]
+            - `"sliding_window"`: [`SlidingWindowCache`]
+            - `"hybrid"`: [`HybridCache`]
+            - `"mamba"`: [`MambaCache`]
+            - `"quantized"`: [`QuantizedCache`]
+
+            We support other cache types, but they must be manually instantiated and
             passed to `generate` through the `past_key_values` argument. See our
             [cache documentation](https://huggingface.co/docs/transformers/en/kv_cache) for further information.
         cache_config (`CacheConfig` or `dict`, *optional*, default to `None`):