diff --git a/docker/transformers-all-latest-gpu/Dockerfile b/docker/transformers-all-latest-gpu/Dockerfile index 930fdfb799cd33..b888397f95f176 100644 --- a/docker/transformers-all-latest-gpu/Dockerfile +++ b/docker/transformers-all-latest-gpu/Dockerfile @@ -45,9 +45,6 @@ RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/opt # For video model testing RUN python3 -m pip install --no-cache-dir decord av==9.2.0 -# For GGUF tests -RUN python3 -m pip install --no-cache-dir gguf - # Some slow tests require bnb RUN python3 -m pip install --no-cache-dir bitsandbytes diff --git a/docker/transformers-quantization-latest-gpu/Dockerfile b/docker/transformers-quantization-latest-gpu/Dockerfile index 2b74dca91f30bc..6d94dbee5aa0e9 100755 --- a/docker/transformers-quantization-latest-gpu/Dockerfile +++ b/docker/transformers-quantization-latest-gpu/Dockerfile @@ -48,6 +48,9 @@ RUN python3 -m pip install --no-cache-dir aqlm[gpu]==1.0.2 # Add hqq for quantization testing RUN python3 -m pip install --no-cache-dir hqq +# For GGUF tests +RUN python3 -m pip install --no-cache-dir gguf + # Add autoawq for quantization testing # >=v0.2.3 needed for compatibility with torch 2.2.1 RUN python3 -m pip install --no-cache-dir https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.3/autoawq-0.2.3+cu118-cp38-cp38-linux_x86_64.whl diff --git a/tests/quantization/quanto_integration/test_quanto.py b/tests/quantization/quanto_integration/test_quanto.py index f574478241979d..e662300a4669d3 100644 --- a/tests/quantization/quanto_integration/test_quanto.py +++ b/tests/quantization/quanto_integration/test_quanto.py @@ -440,6 +440,7 @@ def test_quantize_activation(self): self.assertIn("We don't support quantizing the activations with transformers library", str(e.exception)) +@require_quanto @require_torch_gpu class QuantoKVCacheQuantizationTest(unittest.TestCase): @slow @@ -447,7 +448,7 @@ class QuantoKVCacheQuantizationTest(unittest.TestCase): def test_quantized_cache(self): EXPECTED_TEXT_COMPLETION = [ "Simply put, the theory of relativity states that 1) the speed of light is the same for all observers, and 2) the laws of physics are the same for all observers.\nThe first part of the theory of relativity", - "My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs, my fries, my burgers, my hot dogs, my sandwiches, my chicken, my pizza, my sal", + "My favorite all time favorite condiment is ketchup. I love it on everything. I love it on my eggs, my fries, my burgers, my hot dogs, my sandwiches, my salads, my chicken, my fish", ] prompts = [