From a72f339c79728c12e66629ac179d735d586ec10c Mon Sep 17 00:00:00 2001 From: drbh Date: Mon, 16 Dec 2024 16:12:34 -0500 Subject: [PATCH] fix: lint backend and doc files (#2850) --- .devcontainer/Dockerfile_trtllm | 2 +- backends/trtllm/csrc/backend.hpp | 2 +- backends/trtllm/csrc/ffi.hpp | 2 +- backends/trtllm/csrc/hardware.hpp | 2 +- backends/trtllm/tests/test_backend.cpp | 2 +- backends/trtllm/tests/test_hardware.cpp | 2 +- docs/source/backends/trtllm.md | 8 ++++---- docs/source/multi_backend_support.md | 12 ++++++------ 8 files changed, 16 insertions(+), 16 deletions(-) diff --git a/.devcontainer/Dockerfile_trtllm b/.devcontainer/Dockerfile_trtllm index 21b7114ce03..239a7bf8c2c 100644 --- a/.devcontainer/Dockerfile_trtllm +++ b/.devcontainer/Dockerfile_trtllm @@ -72,4 +72,4 @@ RUN cargo install cargo-chef COPY --from=trt-builder /usr/local/tensorrt /usr/local/tensorrt COPY --from=mpi-builder /usr/local/mpi /usr/local/mpi -ENV MPI_HOME=/usr/local/mpi \ No newline at end of file +ENV MPI_HOME=/usr/local/mpi diff --git a/backends/trtllm/csrc/backend.hpp b/backends/trtllm/csrc/backend.hpp index f49c437a2b2..40b44a842b3 100644 --- a/backends/trtllm/csrc/backend.hpp +++ b/backends/trtllm/csrc/backend.hpp @@ -228,4 +228,4 @@ struct fmt::formatter : f } }; -#endif \ No newline at end of file +#endif diff --git a/backends/trtllm/csrc/ffi.hpp b/backends/trtllm/csrc/ffi.hpp index de2333afe37..d0342d4bb38 100644 --- a/backends/trtllm/csrc/ffi.hpp +++ b/backends/trtllm/csrc/ffi.hpp @@ -159,4 +159,4 @@ namespace huggingface::tgi::backends::trtllm { ); } } -#endif \ No newline at end of file +#endif diff --git a/backends/trtllm/csrc/hardware.hpp b/backends/trtllm/csrc/hardware.hpp index 8e5fa696dbb..abfb4afd51d 100644 --- a/backends/trtllm/csrc/hardware.hpp +++ b/backends/trtllm/csrc/hardware.hpp @@ -78,4 +78,4 @@ namespace huggingface::tgi::hardware::cuda { [[nodiscard]] constexpr bool is_at_least_hopper() const { return is_at_least(HOPPER); } }; } -#endif \ No newline at end of file +#endif diff --git a/backends/trtllm/tests/test_backend.cpp b/backends/trtllm/tests/test_backend.cpp index ae097405bc4..14d92b75434 100644 --- a/backends/trtllm/tests/test_backend.cpp +++ b/backends/trtllm/tests/test_backend.cpp @@ -149,4 +149,4 @@ TEST_CASE("sampling_params_t to tle::SamplingConfig", "[backend_t]") REQUIRE(config.getTemperature().has_value()); REQUIRE_THAT(*config.getTemperature(), Catch::Matchers::WithinAbs(params.temperature, 1e-6f)); -} \ No newline at end of file +} diff --git a/backends/trtllm/tests/test_hardware.cpp b/backends/trtllm/tests/test_hardware.cpp index 4cb7b562087..e14f1f357f4 100644 --- a/backends/trtllm/tests/test_hardware.cpp +++ b/backends/trtllm/tests/test_hardware.cpp @@ -79,4 +79,4 @@ TEST_CASE("is_at_least") { REQUIRE(HOPPER_CAPABILITIES.is_at_least(AMPERE)); REQUIRE(HOPPER_CAPABILITIES.is_at_least(ADA_LOVELACE)); REQUIRE(HOPPER_CAPABILITIES.is_at_least(HOPPER)); -} \ No newline at end of file +} diff --git a/docs/source/backends/trtllm.md b/docs/source/backends/trtllm.md index 8eb37180c7d..be6416b15e5 100644 --- a/docs/source/backends/trtllm.md +++ b/docs/source/backends/trtllm.md @@ -17,7 +17,7 @@ supported. You can use [Optimum-NVIDIA](https://github.com/huggingface/optimum-nvidia) to compile engines for the models you want to use. -```bash +```bash MODEL_NAME="meta-llama/Llama-3.1-8B-Instruct" # Install huggingface_cli @@ -32,7 +32,7 @@ mkdir -p /tmp/models/$MODEL_NAME # Create a directory to store the compiled engine mkdir -p /tmp/engines/$MODEL_NAME -# Download the model +# Download the model HF_HUB_ENABLE_HF_TRANSFER=1 huggingface-cli download --local-dir /tmp/models/$MODEL_NAME $MODEL_NAME # Compile the engine using Optimum-NVIDIA @@ -69,7 +69,7 @@ docker run \ -e MODEL=$MODEL_NAME \ -e PORT=3000 \ -e HF_TOKEN='hf_XXX' \ - -v /tmp/engines/$MODEL_NAME:/data \ + -v /tmp/engines/$MODEL_NAME:/data \ ghcr.io/huggingface/text-generation-inference:latest-trtllm \ --executor-worker executorWorker \ --model-id /data/$MODEL_NAME @@ -78,4 +78,4 @@ docker run \ ## Development To develop TRTLLM backend, you can use [dev containers](https://containers.dev/) located in -`.devcontainer` directory. \ No newline at end of file +`.devcontainer` directory. diff --git a/docs/source/multi_backend_support.md b/docs/source/multi_backend_support.md index 5899e4b77d4..c4df15bc2ca 100644 --- a/docs/source/multi_backend_support.md +++ b/docs/source/multi_backend_support.md @@ -1,13 +1,13 @@ # Multi-backend support TGI (Text Generation Inference) offers flexibility by supporting multiple backends for serving large language models (LLMs). -With multi-backend support, you can choose the backend that best suits your needs, -whether you prioritize performance, ease of use, or compatibility with specific hardware. API interaction with +With multi-backend support, you can choose the backend that best suits your needs, +whether you prioritize performance, ease of use, or compatibility with specific hardware. API interaction with TGI remains consistent across backends, allowing you to switch between them seamlessly. **Supported backends:** -* **TGI CUDA backend**: This high-performance backend is optimized for NVIDIA GPUs and serves as the default option +* **TGI CUDA backend**: This high-performance backend is optimized for NVIDIA GPUs and serves as the default option within TGI. Developed in-house, it boasts numerous optimizations and is used in production by various projects, including those by Hugging Face. -* **[TGI TRTLLM backend](./backends/trtllm)**: This backend leverages NVIDIA's TensorRT library to accelerate LLM inference. - It utilizes specialized optimizations and custom kernels for enhanced performance. - However, it requires a model-specific compilation step for each GPU architecture. \ No newline at end of file +* **[TGI TRTLLM backend](./backends/trtllm)**: This backend leverages NVIDIA's TensorRT library to accelerate LLM inference. + It utilizes specialized optimizations and custom kernels for enhanced performance. + However, it requires a model-specific compilation step for each GPU architecture.