From f73f5e62e2383c1cb6975fca70082d6dc51ec6f2 Mon Sep 17 00:00:00 2001 From: Yih-Dar <2521628+ydshieh@users.noreply.github.com> Date: Fri, 25 Oct 2024 17:14:07 +0200 Subject: [PATCH 1/3] Avoid check expected exception when it is on CUDA (#34408) * update * update --------- Co-authored-by: ydshieh --- .../pipelines/test_pipelines_summarization.py | 5 +++-- .../test_pipelines_text_generation.py | 18 ++++++++++-------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/tests/pipelines/test_pipelines_summarization.py b/tests/pipelines/test_pipelines_summarization.py index 465dba9743c648..613b9dca8e1a71 100644 --- a/tests/pipelines/test_pipelines_summarization.py +++ b/tests/pipelines/test_pipelines_summarization.py @@ -85,8 +85,9 @@ def run_pipeline_test(self, summarizer, _): and len(summarizer.model.trainable_weights) > 0 and "GPU" in summarizer.model.trainable_weights[0].device ): - with self.assertRaises(Exception): - outputs = summarizer("This " * 1000) + if str(summarizer.device) == "cpu": + with self.assertRaises(Exception): + outputs = summarizer("This " * 1000) outputs = summarizer("This " * 1000, truncation=TruncationStrategy.ONLY_FIRST) @require_torch diff --git a/tests/pipelines/test_pipelines_text_generation.py b/tests/pipelines/test_pipelines_text_generation.py index 277c870b4d1074..51f3cae5e31235 100644 --- a/tests/pipelines/test_pipelines_text_generation.py +++ b/tests/pipelines/test_pipelines_text_generation.py @@ -493,17 +493,19 @@ def run_pipeline_test(self, text_generator, _): and text_generator.model.__class__.__name__ not in EXTRA_MODELS_CAN_HANDLE_LONG_INPUTS ): # Handling of large generations - with self.assertRaises((RuntimeError, IndexError, ValueError, AssertionError)): - text_generator("This is a test" * 500, max_new_tokens=20) + if str(text_generator.device) == "cpu": + with self.assertRaises((RuntimeError, IndexError, ValueError, AssertionError)): + text_generator("This is a test" * 500, max_new_tokens=20) outputs = text_generator("This is a test" * 500, handle_long_generation="hole", max_new_tokens=20) # Hole strategy cannot work - with self.assertRaises(ValueError): - text_generator( - "This is a test" * 500, - handle_long_generation="hole", - max_new_tokens=tokenizer.model_max_length + 10, - ) + if str(text_generator.device) == "cpu": + with self.assertRaises(ValueError): + text_generator( + "This is a test" * 500, + handle_long_generation="hole", + max_new_tokens=tokenizer.model_max_length + 10, + ) @require_torch @require_accelerate From 6a62a6d1b54123ede3a1e3bda57c924c64e78124 Mon Sep 17 00:00:00 2001 From: Rudy Delouya Date: Fri, 25 Oct 2024 17:52:29 +0200 Subject: [PATCH 2/3] Fix typos in agents_advanced.md (#34405) --- docs/source/en/agents_advanced.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/source/en/agents_advanced.md b/docs/source/en/agents_advanced.md index 2327357525d8d9..ddcc619b4f91f6 100644 --- a/docs/source/en/agents_advanced.md +++ b/docs/source/en/agents_advanced.md @@ -66,10 +66,10 @@ manager_agent.run("Who is the CEO of Hugging Face?") Let's take again the tool example from main documentation, for which we had implemented a `tool` decorator. -If you need to add variation, like custom attributes for your too, you can build your tool following the fine-grained method: building a class that inherits from the [`Tool`] superclass. +If you need to add variation, like custom attributes for your tool, you can build your tool following the fine-grained method: building a class that inherits from the [`Tool`] superclass. The custom tool needs: -- An attribute `name`, which corresponds to the name of the tool itself. The name usually describes what the tool does. Since the code returns the model with the most downloads for a task, let's name is `model_download_counter`. +- An attribute `name`, which corresponds to the name of the tool itself. The name usually describes what the tool does. Since the code returns the model with the most downloads for a task, let's name it `model_download_counter`. - An attribute `description` is used to populate the agent's system prompt. - An `inputs` attribute, which is a dictionary with keys `"type"` and `"description"`. It contains information that helps the Python interpreter make educated choices about the input. - An `output_type` attribute, which specifies the output type. @@ -240,4 +240,4 @@ with gr.Blocks() as demo: if __name__ == "__main__": demo.launch() -``` \ No newline at end of file +``` From 1d063793318b20654ebb850f48f43e0a247ab7bb Mon Sep 17 00:00:00 2001 From: Steven Liu <59462357+stevhliu@users.noreply.github.com> Date: Fri, 25 Oct 2024 08:52:45 -0700 Subject: [PATCH 3/3] [docs] Cache implementations (#34325) cache --- src/transformers/generation/configuration_utils.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/transformers/generation/configuration_utils.py b/src/transformers/generation/configuration_utils.py index 3c204481b04296..9b543f6c35711d 100644 --- a/src/transformers/generation/configuration_utils.py +++ b/src/transformers/generation/configuration_utils.py @@ -172,7 +172,15 @@ class GenerationConfig(PushToHubMixin): speed up decoding. cache_implementation (`str`, *optional*, default to `None`): Name of the cache class that will be instantiated in `generate`, for faster decoding. Possible values are: - {ALL_CACHE_IMPLEMENTATIONS}. We support other cache types, but they must be manually instantiated and + + - `"static"`: [`StaticCache`] + - `"offloaded_static"`: [`OffloadedStaticCache`] + - `"sliding_window"`: [`SlidingWindowCache`] + - `"hybrid"`: [`HybridCache`] + - `"mamba"`: [`MambaCache`] + - `"quantized"`: [`QuantizedCache`] + + We support other cache types, but they must be manually instantiated and passed to `generate` through the `past_key_values` argument. See our [cache documentation](https://huggingface.co/docs/transformers/en/kv_cache) for further information. cache_config (`CacheConfig` or `dict`, *optional*, default to `None`):