From 2713b2113270716e172e6500e90f22416764d71c Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Mon, 27 Nov 2023 10:30:35 +0000 Subject: [PATCH 1/4] Let each model resolve their own default dtype. --- server/text_generation_server/models/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/server/text_generation_server/models/__init__.py b/server/text_generation_server/models/__init__.py index 5b1b5715c5e..ab3b25b7c89 100644 --- a/server/text_generation_server/models/__init__.py +++ b/server/text_generation_server/models/__init__.py @@ -87,7 +87,9 @@ def get_model( trust_remote_code: bool, ) -> Model: if dtype is None: - dtype = torch.float16 + # Keep it as default for now and let + # every model resolve their own default dtype. + dtype = None elif dtype == "float16": dtype = torch.float16 elif dtype == "bfloat16": From e3c31c9d922c33c61b13de123e7822e797284c95 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Tue, 28 Nov 2023 14:15:56 +0000 Subject: [PATCH 2/4] Allow dtype for bitsandbytes (it works, checked for idefics 9b/llama/80b)t --- integration-tests/conftest.py | 4 ++++ integration-tests/models/test_idefics.py | 2 +- server/text_generation_server/cli.py | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py index c1cbe7f3e96..8265a045a91 100644 --- a/integration-tests/conftest.py +++ b/integration-tests/conftest.py @@ -210,6 +210,7 @@ def local_launcher( quantize: Optional[str] = None, trust_remote_code: bool = False, use_flash_attention: bool = True, + dtype: Optional[str] = None ): port = random.randint(8000, 10_000) master_port = random.randint(10_000, 20_000) @@ -237,6 +238,9 @@ def local_launcher( if quantize is not None: args.append("--quantize") args.append(quantize) + if dtype is not None: + args.append("--dtype") + args.append(dtype) if trust_remote_code: args.append("--trust-remote-code") diff --git a/integration-tests/models/test_idefics.py b/integration-tests/models/test_idefics.py index 5f4571b57ed..5a81a4f0911 100644 --- a/integration-tests/models/test_idefics.py +++ b/integration-tests/models/test_idefics.py @@ -3,7 +3,7 @@ @pytest.fixture(scope="module") def idefics_handle(launcher): - with launcher("HuggingFaceM4/idefics-9b-instruct", num_shard=2) as handle: + with launcher("HuggingFaceM4/idefics-9b-instruct", num_shard=2, dtype="float16") as handle: yield handle diff --git a/server/text_generation_server/cli.py b/server/text_generation_server/cli.py index b741a84cfbd..3abe86afd23 100644 --- a/server/text_generation_server/cli.py +++ b/server/text_generation_server/cli.py @@ -76,7 +76,7 @@ def serve( # Downgrade enum into str for easier management later on quantize = None if quantize is None else quantize.value dtype = None if dtype is None else dtype.value - if dtype is not None and quantize is not None: + if dtype is not None and quantize not in {None, "bitsandbytes", "bitsandbytes-nf4", "bitsandbytes-fp4"}: raise RuntimeError( "Only 1 can be set between `dtype` and `quantize`, as they both decide how goes the final model." ) From 26a271fad50ef9d59e4e79a0c5004b47ac0cc993 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Tue, 28 Nov 2023 15:14:46 +0000 Subject: [PATCH 3/4] Adding the flag to docker laucnher. --- integration-tests/UNKNOWN.egg-info/PKG-INFO | 3 +++ .../UNKNOWN.egg-info/SOURCES.txt | 24 +++++++++++++++++++ .../UNKNOWN.egg-info/dependency_links.txt | 1 + .../UNKNOWN.egg-info/top_level.txt | 1 + integration-tests/conftest.py | 4 ++++ 5 files changed, 33 insertions(+) create mode 100644 integration-tests/UNKNOWN.egg-info/PKG-INFO create mode 100644 integration-tests/UNKNOWN.egg-info/SOURCES.txt create mode 100644 integration-tests/UNKNOWN.egg-info/dependency_links.txt create mode 100644 integration-tests/UNKNOWN.egg-info/top_level.txt diff --git a/integration-tests/UNKNOWN.egg-info/PKG-INFO b/integration-tests/UNKNOWN.egg-info/PKG-INFO new file mode 100644 index 00000000000..738605f3103 --- /dev/null +++ b/integration-tests/UNKNOWN.egg-info/PKG-INFO @@ -0,0 +1,3 @@ +Metadata-Version: 2.1 +Name: UNKNOWN +Version: 0.0.0 diff --git a/integration-tests/UNKNOWN.egg-info/SOURCES.txt b/integration-tests/UNKNOWN.egg-info/SOURCES.txt new file mode 100644 index 00000000000..199a8ab20cb --- /dev/null +++ b/integration-tests/UNKNOWN.egg-info/SOURCES.txt @@ -0,0 +1,24 @@ +pyproject.toml +UNKNOWN.egg-info/PKG-INFO +UNKNOWN.egg-info/SOURCES.txt +UNKNOWN.egg-info/dependency_links.txt +UNKNOWN.egg-info/top_level.txt +models/test_bloom_560m.py +models/test_bloom_560m_sharded.py +models/test_flash_awq.py +models/test_flash_awq_sharded.py +models/test_flash_falcon.py +models/test_flash_llama.py +models/test_flash_llama_gptq.py +models/test_flash_mistral.py +models/test_flash_neox.py +models/test_flash_neox_sharded.py +models/test_flash_santacoder.py +models/test_flash_starcoder.py +models/test_flash_starcoder_gptq.py +models/test_idefics.py +models/test_mpt.py +models/test_mt0_base.py +models/test_neox.py +models/test_neox_sharded.py +models/test_t5_sharded.py \ No newline at end of file diff --git a/integration-tests/UNKNOWN.egg-info/dependency_links.txt b/integration-tests/UNKNOWN.egg-info/dependency_links.txt new file mode 100644 index 00000000000..8b137891791 --- /dev/null +++ b/integration-tests/UNKNOWN.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/integration-tests/UNKNOWN.egg-info/top_level.txt b/integration-tests/UNKNOWN.egg-info/top_level.txt new file mode 100644 index 00000000000..604f0f2cfbd --- /dev/null +++ b/integration-tests/UNKNOWN.egg-info/top_level.txt @@ -0,0 +1 @@ +models diff --git a/integration-tests/conftest.py b/integration-tests/conftest.py index 8265a045a91..d2216241720 100644 --- a/integration-tests/conftest.py +++ b/integration-tests/conftest.py @@ -273,6 +273,7 @@ def docker_launcher( quantize: Optional[str] = None, trust_remote_code: bool = False, use_flash_attention: bool = True, + dtype: Optional[str] = None ): port = random.randint(8000, 10_000) @@ -283,6 +284,9 @@ def docker_launcher( if quantize is not None: args.append("--quantize") args.append(quantize) + if dtype is not None: + args.append("--dtype") + args.append(dtype) if trust_remote_code: args.append("--trust-remote-code") From 5723454b9e1b2baf9cde476e80e4f010655e1b79 Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Tue, 28 Nov 2023 15:15:10 +0000 Subject: [PATCH 4/4] Dummy files. --- integration-tests/UNKNOWN.egg-info/PKG-INFO | 3 --- .../UNKNOWN.egg-info/SOURCES.txt | 24 ------------------- .../UNKNOWN.egg-info/dependency_links.txt | 1 - .../UNKNOWN.egg-info/top_level.txt | 1 - 4 files changed, 29 deletions(-) delete mode 100644 integration-tests/UNKNOWN.egg-info/PKG-INFO delete mode 100644 integration-tests/UNKNOWN.egg-info/SOURCES.txt delete mode 100644 integration-tests/UNKNOWN.egg-info/dependency_links.txt delete mode 100644 integration-tests/UNKNOWN.egg-info/top_level.txt diff --git a/integration-tests/UNKNOWN.egg-info/PKG-INFO b/integration-tests/UNKNOWN.egg-info/PKG-INFO deleted file mode 100644 index 738605f3103..00000000000 --- a/integration-tests/UNKNOWN.egg-info/PKG-INFO +++ /dev/null @@ -1,3 +0,0 @@ -Metadata-Version: 2.1 -Name: UNKNOWN -Version: 0.0.0 diff --git a/integration-tests/UNKNOWN.egg-info/SOURCES.txt b/integration-tests/UNKNOWN.egg-info/SOURCES.txt deleted file mode 100644 index 199a8ab20cb..00000000000 --- a/integration-tests/UNKNOWN.egg-info/SOURCES.txt +++ /dev/null @@ -1,24 +0,0 @@ -pyproject.toml -UNKNOWN.egg-info/PKG-INFO -UNKNOWN.egg-info/SOURCES.txt -UNKNOWN.egg-info/dependency_links.txt -UNKNOWN.egg-info/top_level.txt -models/test_bloom_560m.py -models/test_bloom_560m_sharded.py -models/test_flash_awq.py -models/test_flash_awq_sharded.py -models/test_flash_falcon.py -models/test_flash_llama.py -models/test_flash_llama_gptq.py -models/test_flash_mistral.py -models/test_flash_neox.py -models/test_flash_neox_sharded.py -models/test_flash_santacoder.py -models/test_flash_starcoder.py -models/test_flash_starcoder_gptq.py -models/test_idefics.py -models/test_mpt.py -models/test_mt0_base.py -models/test_neox.py -models/test_neox_sharded.py -models/test_t5_sharded.py \ No newline at end of file diff --git a/integration-tests/UNKNOWN.egg-info/dependency_links.txt b/integration-tests/UNKNOWN.egg-info/dependency_links.txt deleted file mode 100644 index 8b137891791..00000000000 --- a/integration-tests/UNKNOWN.egg-info/dependency_links.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/integration-tests/UNKNOWN.egg-info/top_level.txt b/integration-tests/UNKNOWN.egg-info/top_level.txt deleted file mode 100644 index 604f0f2cfbd..00000000000 --- a/integration-tests/UNKNOWN.egg-info/top_level.txt +++ /dev/null @@ -1 +0,0 @@ -models