diff --git a/containers/tei/cpu/1.5.1/Dockerfile b/containers/tei/cpu/1.5.1/Dockerfile new file mode 100644 index 00000000..dbb95c0d --- /dev/null +++ b/containers/tei/cpu/1.5.1/Dockerfile @@ -0,0 +1,93 @@ +# Fetch and extract the TGI sources +FROM alpine AS tei + +RUN mkdir -p /tei +ADD https://github.com/huggingface/text-embeddings-inference/archive/refs/tags/v1.5.1.tar.gz /tei/sources.tar.gz +RUN tar -C /tei -xf /tei/sources.tar.gz --strip-components=1 + +# Build cargo components (adapted from TEI original Dockerfile) +FROM lukemathwalker/cargo-chef:latest-rust-1.75-bookworm AS chef +WORKDIR /usr/src + +ENV SCCACHE=0.5.4 +ENV RUSTC_WRAPPER=/usr/local/bin/sccache + +# Donwload, configure sccache +RUN curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache && \ + chmod +x /usr/local/bin/sccache + +FROM chef AS planner + +COPY --from=tei /tei/backends backends +COPY --from=tei /tei/core core +COPY --from=tei /tei/router router +COPY --from=tei /tei/Cargo.toml ./ +COPY --from=tei /tei/Cargo.lock ./ + +RUN cargo chef prepare --recipe-path recipe.json + +FROM chef AS builder + +COPY --from=planner /usr/src/recipe.json recipe.json + +RUN cargo chef cook --release --features ort --no-default-features --recipe-path recipe.json && sccache -s + +COPY --from=tei /tei/backends backends +COPY --from=tei /tei/core core +COPY --from=tei /tei/router router +COPY --from=tei /tei/Cargo.toml ./ +COPY --from=tei /tei/Cargo.lock ./ + +FROM builder AS http-builder + +RUN cargo build --release --bin text-embeddings-router -F google -F ort -F http --no-default-features && sccache -s + +FROM builder AS grpc-builder + +RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \ + curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \ + unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \ + unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \ + rm -f $PROTOC_ZIP + +COPY --from=tei /tei/proto proto + +RUN cargo build --release --bin text-embeddings-router -F google -F grpc -F ort --no-default-features && sccache -s + +FROM debian:bookworm-slim AS base + +ENV HUGGINGFACE_HUB_CACHE=/tmp \ + PORT=8080 + +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + ca-certificates \ + libssl-dev \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# Install Google CLI single command +RUN apt-get update -y && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends apt-transport-https ca-certificates gnupg curl && \ + echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" \ + | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \ + curl https://packages.cloud.google.com/apt/doc/apt-key.gpg \ + | apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - && \ + apt-get update -y && \ + apt-get install google-cloud-sdk -y + +# COPY custom entrypoint for Google +COPY --chmod=775 containers/tei/cpu/1.5.1/entrypoint.sh entrypoint.sh + +FROM base AS grpc + +COPY --from=grpc-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router + +ENTRYPOINT ["./entrypoint.sh"] +CMD ["--json-output"] + +FROM base AS http + +COPY --from=http-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router + +ENTRYPOINT ["./entrypoint.sh"] +CMD ["--json-output"] diff --git a/containers/tei/cpu/1.5.1/entrypoint.sh b/containers/tei/cpu/1.5.1/entrypoint.sh new file mode 100644 index 00000000..e25cedff --- /dev/null +++ b/containers/tei/cpu/1.5.1/entrypoint.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +# Check if MODEL_ID starts with "gcs://" +if [[ $AIP_STORAGE_URI == gs://* ]]; then + echo "AIP_STORAGE_URI set and starts with 'gs://', proceeding to download from GCS." + echo "AIP_STORAGE_URI: $AIP_STORAGE_URI" + + # Define the target directory + TARGET_DIR="/tmp/model" + mkdir -p "$TARGET_DIR" + + # Use gsutil to copy the content from GCS to the target directory + echo "Running: gcloud storage storage cp $AIP_STORAGE_URI/* $TARGET_DIR --recursive" + gcloud storage cp "$AIP_STORAGE_URI/*" "$TARGET_DIR" --recursive + + # Check if gsutil command was successful + if [ $? -eq 0 ]; then + echo "Model downloaded successfully to ${TARGET_DIR}." + # Update MODEL_ID to point to the local directory + echo "Updating MODEL_ID to point to the local directory." + export MODEL_ID="$TARGET_DIR" + else + echo "Failed to download model from GCS." + exit 1 + fi +fi + +ldconfig 2>/dev/null || echo "unable to refresh ld cache, not a big deal in most cases" + +exec text-embeddings-router $@ diff --git a/containers/tei/gpu/1.5.1/Dockerfile b/containers/tei/gpu/1.5.1/Dockerfile new file mode 100644 index 00000000..f20c3e46 --- /dev/null +++ b/containers/tei/gpu/1.5.1/Dockerfile @@ -0,0 +1,113 @@ +# Fetch and extract the TGI sources +FROM alpine AS tei + +RUN mkdir -p /tei +ADD https://github.com/huggingface/text-embeddings-inference/archive/refs/tags/v1.5.1.tar.gz /tei/sources.tar.gz +RUN tar -C /tei -xf /tei/sources.tar.gz --strip-components=1 + +# Build cargo components (adapted from TEI original Dockerfile) +FROM nvidia/cuda:12.2.0-devel-ubuntu22.04 AS base-builder + +ENV SCCACHE=0.5.4 +ENV RUSTC_WRAPPER=/usr/local/bin/sccache +ENV PATH="/root/.cargo/bin:${PATH}" + +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + curl \ + libssl-dev \ + pkg-config \ + && rm -rf /var/lib/apt/lists/* + +# Donwload and configure sccache +RUN curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache && \ + chmod +x /usr/local/bin/sccache + +RUN curl https://sh.rustup.rs -sSf | bash -s -- -y +RUN cargo install cargo-chef --locked + +FROM base-builder AS planner + +WORKDIR /usr/src + +COPY --from=tei /tei/backends backends +COPY --from=tei /tei/core core +COPY --from=tei /tei/router router +COPY --from=tei /tei/Cargo.toml ./ +COPY --from=tei /tei/Cargo.lock ./ + +RUN cargo chef prepare --recipe-path recipe.json + +FROM base-builder AS builder + +WORKDIR /usr/src + +COPY --from=planner /usr/src/recipe.json recipe.json + +RUN cargo chef cook --release --features google --recipe-path recipe.json && sccache -s + +FROM builder AS builder-75 + +RUN CUDA_COMPUTE_CAP=75 cargo chef cook --release --features google --features candle-cuda-turing --recipe-path recipe.json && sccache -s + +COPY --from=tei /tei/backends backends +COPY --from=tei /tei/core core +COPY --from=tei /tei/router router +COPY --from=tei /tei/Cargo.toml ./ +COPY --from=tei /tei/Cargo.lock ./ + +RUN CUDA_COMPUTE_CAP=75 cargo build --release --bin text-embeddings-router -F candle-cuda-turing -F google && sccache -s + +FROM builder AS builder-80 + +RUN CUDA_COMPUTE_CAP=80 cargo chef cook --release --features google --features candle-cuda --recipe-path recipe.json && sccache -s + +COPY --from=tei /tei/backends backends +COPY --from=tei /tei/core core +COPY --from=tei /tei/router router +COPY --from=tei /tei/Cargo.toml ./ +COPY --from=tei /tei/Cargo.lock ./ + +RUN CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda -F google && sccache -s + +FROM builder AS builder-90 + +RUN CUDA_COMPUTE_CAP=90 cargo chef cook --release --features google --features candle-cuda --recipe-path recipe.json && sccache -s + +COPY --from=tei /tei/backends backends +COPY --from=tei /tei/core core +COPY --from=tei /tei/router router +COPY --from=tei /tei/Cargo.toml ./ +COPY --from=tei /tei/Cargo.lock ./ + +RUN CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda -F google && sccache -s + +FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04 AS base + +ENV HUGGINGFACE_HUB_CACHE=/tmp \ + PORT=8080 \ + USE_FLASH_ATTENTION=True + +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ + ca-certificates \ + libssl-dev \ + curl \ + && rm -rf /var/lib/apt/lists/* + +COPY --from=builder-75 /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router-75 +COPY --from=builder-80 /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router-80 +COPY --from=builder-90 /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router-90 + +# Install Google CLI single command +RUN apt-get update -y && \ + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends apt-transport-https ca-certificates gnupg curl && \ + echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" \ + | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \ + curl https://packages.cloud.google.com/apt/doc/apt-key.gpg \ + | apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - && \ + apt-get update -y && \ + apt-get install google-cloud-sdk -y + +# COPY custom entrypoint for Google +COPY --chmod=775 containers/tei/gpu/1.5.1/entrypoint.sh entrypoint.sh +ENTRYPOINT ["./entrypoint.sh"] +CMD ["--json-output"] diff --git a/containers/tei/gpu/1.5.1/entrypoint.sh b/containers/tei/gpu/1.5.1/entrypoint.sh new file mode 100644 index 00000000..53eb8d84 --- /dev/null +++ b/containers/tei/gpu/1.5.1/entrypoint.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +# Check if MODEL_ID starts with "gcs://" +if [[ $AIP_STORAGE_URI == gs://* ]]; then + echo "AIP_STORAGE_URI set and starts with 'gs://', proceeding to download from GCS." + echo "AIP_STORAGE_URI: $AIP_STORAGE_URI" + + # Define the target directory + TARGET_DIR="/tmp/model" + mkdir -p "$TARGET_DIR" + + # Use gsutil to copy the content from GCS to the target directory + echo "Running: gcloud storage storage cp $AIP_STORAGE_URI/* $TARGET_DIR --recursive" + gcloud storage cp "$AIP_STORAGE_URI/*" "$TARGET_DIR" --recursive + + # Check if gsutil command was successful + if [ $? -eq 0 ]; then + echo "Model downloaded successfully to ${TARGET_DIR}." + # Update MODEL_ID to point to the local directory + echo "Updating MODEL_ID to point to the local directory." + export MODEL_ID="$TARGET_DIR" + else + echo "Failed to download model from GCS." + exit 1 + fi +fi + +ldconfig 2>/dev/null || echo "unable to refresh ld cache, not a big deal in most cases" + +# Below is the original `cuda-all-entrypoint.sh` script. +# Reference: https://github.com/huggingface/text-embeddings-inference/blob/v1.5.1/cuda-all-entrypoint.sh +if ! command -v nvidia-smi &>/dev/null; then + echo "Error: 'nvidia-smi' command not found." + exit 1 +fi + +compute_cap=$(nvidia-smi --query-gpu=compute_cap --format=csv | sed -n '2p' | sed 's/\.//g') + +if [ ${compute_cap} -eq 75 ]; then + exec text-embeddings-router-75 "$@" +elif [ ${compute_cap} -ge 80 -a ${compute_cap} -lt 90 ]; then + exec text-embeddings-router-80 "$@" +elif [ ${compute_cap} -eq 90 ]; then + exec text-embeddings-router-90 "$@" +else + echo "cuda compute cap ${compute_cap} is not supported" + exit 1 +fi