Skip to content

Commit

Permalink
hf_transfer in docker variable, jinja2
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelfeil committed Nov 23, 2024
1 parent cf958e9 commit bcb4ef2
Show file tree
Hide file tree
Showing 6 changed files with 181 additions and 40 deletions.
19 changes: 11 additions & 8 deletions libs/infinity_emb/Dockerfile.amd_auto
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,18 @@
FROM rocm/pytorch:rocm6.2.3_ubuntu22.04_py3.10_pytorch_release_2.3.0 AS base

ENV PYTHONUNBUFFERED=1 \
\
# pip
PIP_NO_CACHE_DIR=off \
PIP_DISABLE_PIP_VERSION_CHECK=on \
PIP_DEFAULT_TIMEOUT=100 \
\
# make poetry create the virtual environment in the project's root
# it gets named `.venv`
POETRY_VIRTUALENVS_CREATE="true" \
POETRY_VIRTUALENVS_IN_PROJECT="true" \
# do not ask any interactive question
POETRY_NO_INTERACTION=1 \
# huggingface
HF_HUB_ENABLE_HF_TRANSFER=1 \
# extras
EXTRAS="all" \
PYTHON="python3.10"
RUN apt-get update && apt-get install --no-install-recommends -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl
Expand Down Expand Up @@ -129,17 +129,20 @@ COPY --from=testing /app/test_results.txt /app/test_results.txt
ENV HF_HOME=/app/.cache/huggingface
ENV PATH=/app/.venv/bin:$PATH

# Use a multi-stage build -> production version, with download
# docker buildx build --target=production-with-download \
# --build-arg MODEL_NAME=BAAI/bge-small-en-v1.5 --build-arg ENGINE=torch -f Dockerfile -t infinity-BAAI-small .
# Use a multi-stage build -> production version, with download:
# docker buildx build --target=production-with-download --build-arg MODEL_NAME=mixedbread-ai/mxbai-rerank-xsmall-v1 \
# --build-arg ENGINE=torch -f Dockerfile.nvidia_auto -t infinity-with-mixedbread-ai-mxbai-rerank-xsmall-v1 .
FROM tested-builder AS production-with-download
# collect model name and engine from build args
ARG MODEL_NAME
RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
ARG ENGINE
RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi

ENV INFINITY_MODEL_ID=$MODEL_NAME
ENV INFINITY_ENGINE=$ENGINE
# will exit with 3 if model is downloaded # TODO: better exit code
RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ]
RUN infinity_emb v2 --preload-only || [ $? -eq 3 ]
ENTRYPOINT ["infinity_emb"]

# Use a multi-stage build -> production version
Expand Down
19 changes: 11 additions & 8 deletions libs/infinity_emb/Dockerfile.cpu_auto
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,18 @@
FROM ubuntu:22.04 AS base

ENV PYTHONUNBUFFERED=1 \
\
# pip
PIP_NO_CACHE_DIR=off \
PIP_DISABLE_PIP_VERSION_CHECK=on \
PIP_DEFAULT_TIMEOUT=100 \
\
# make poetry create the virtual environment in the project's root
# it gets named `.venv`
POETRY_VIRTUALENVS_CREATE="true" \
POETRY_VIRTUALENVS_IN_PROJECT="true" \
# do not ask any interactive question
POETRY_NO_INTERACTION=1 \
# huggingface
HF_HUB_ENABLE_HF_TRANSFER=1 \
# extras
EXTRAS="all" \
PYTHON="python3.11"
RUN apt-get update && apt-get install --no-install-recommends -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl
Expand Down Expand Up @@ -97,17 +97,20 @@ COPY --from=testing /app/test_results.txt /app/test_results.txt
ENV HF_HOME=/app/.cache/huggingface
ENV PATH=/app/.venv/bin:$PATH

# Use a multi-stage build -> production version, with download
# docker buildx build --target=production-with-download \
# --build-arg MODEL_NAME=BAAI/bge-small-en-v1.5 --build-arg ENGINE=torch -f Dockerfile -t infinity-BAAI-small .
# Use a multi-stage build -> production version, with download:
# docker buildx build --target=production-with-download --build-arg MODEL_NAME=mixedbread-ai/mxbai-rerank-xsmall-v1 \
# --build-arg ENGINE=torch -f Dockerfile.nvidia_auto -t infinity-with-mixedbread-ai-mxbai-rerank-xsmall-v1 .
FROM tested-builder AS production-with-download
# collect model name and engine from build args
ARG MODEL_NAME
RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
ARG ENGINE
RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi

ENV INFINITY_MODEL_ID=$MODEL_NAME
ENV INFINITY_ENGINE=$ENGINE
# will exit with 3 if model is downloaded # TODO: better exit code
RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ]
RUN infinity_emb v2 --preload-only || [ $? -eq 3 ]
ENTRYPOINT ["infinity_emb"]

# Use a multi-stage build -> production version
Expand Down
19 changes: 11 additions & 8 deletions libs/infinity_emb/Dockerfile.jinja2
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,18 @@
FROM {{ base_image }} AS base

ENV PYTHONUNBUFFERED=1 \
\
# pip
PIP_NO_CACHE_DIR=off \
PIP_DISABLE_PIP_VERSION_CHECK=on \
PIP_DEFAULT_TIMEOUT=100 \
\
# make poetry create the virtual environment in the project's root
# it gets named `.venv`
POETRY_VIRTUALENVS_CREATE="{{poetry_virtualenvs_create | default('true')}}" \
POETRY_VIRTUALENVS_IN_PROJECT="{{poetry_virtualenvs_in_project | default('true')}}" \
# do not ask any interactive question
POETRY_NO_INTERACTION=1 \
# huggingface
HF_HUB_ENABLE_HF_TRANSFER=1 \
# extras
EXTRAS="{{poetry_extras | default('all')}}" \
PYTHON="{{python_version | default('python3.11')}}"
RUN apt-get update && apt-get install --no-install-recommends -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl
Expand Down Expand Up @@ -83,17 +83,20 @@ COPY --from=testing /app/test_results.txt /app/test_results.txt
ENV HF_HOME=/app/.cache/huggingface
ENV PATH=/app/.venv/bin:$PATH

# Use a multi-stage build -> production version, with download
# docker buildx build --target=production-with-download \
# --build-arg MODEL_NAME=BAAI/bge-small-en-v1.5 --build-arg ENGINE=torch -f Dockerfile -t infinity-BAAI-small .
# Use a multi-stage build -> production version, with download:
# docker buildx build --target=production-with-download --build-arg MODEL_NAME=mixedbread-ai/mxbai-rerank-xsmall-v1 \
# --build-arg ENGINE=torch -f Dockerfile.nvidia_auto -t infinity-with-mixedbread-ai-mxbai-rerank-xsmall-v1 .
FROM tested-builder AS production-with-download
# collect model name and engine from build args
ARG MODEL_NAME
RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
ARG ENGINE
RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi

ENV INFINITY_MODEL_ID=$MODEL_NAME
ENV INFINITY_ENGINE=$ENGINE
# will exit with 3 if model is downloaded # TODO: better exit code
RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ]
RUN infinity_emb v2 --preload-only || [ $? -eq 3 ]
ENTRYPOINT ["infinity_emb"]

# Use a multi-stage build -> production version
Expand Down
19 changes: 11 additions & 8 deletions libs/infinity_emb/Dockerfile.nvidia_auto
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,18 @@
FROM nvidia/cuda:12.1.1-base-ubuntu22.04 AS base

ENV PYTHONUNBUFFERED=1 \
\
# pip
PIP_NO_CACHE_DIR=off \
PIP_DISABLE_PIP_VERSION_CHECK=on \
PIP_DEFAULT_TIMEOUT=100 \
\
# make poetry create the virtual environment in the project's root
# it gets named `.venv`
POETRY_VIRTUALENVS_CREATE="true" \
POETRY_VIRTUALENVS_IN_PROJECT="true" \
# do not ask any interactive question
POETRY_NO_INTERACTION=1 \
# huggingface
HF_HUB_ENABLE_HF_TRANSFER=1 \
# extras
EXTRAS="all" \
PYTHON="python3.11"
RUN apt-get update && apt-get install --no-install-recommends -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl
Expand Down Expand Up @@ -83,17 +83,20 @@ COPY --from=testing /app/test_results.txt /app/test_results.txt
ENV HF_HOME=/app/.cache/huggingface
ENV PATH=/app/.venv/bin:$PATH

# Use a multi-stage build -> production version, with download
# docker buildx build --target=production-with-download \
# --build-arg MODEL_NAME=BAAI/bge-small-en-v1.5 --build-arg ENGINE=torch -f Dockerfile -t infinity-BAAI-small .
# Use a multi-stage build -> production version, with download:
# docker buildx build --target=production-with-download --build-arg MODEL_NAME=mixedbread-ai/mxbai-rerank-xsmall-v1 \
# --build-arg ENGINE=torch -f Dockerfile.nvidia_auto -t infinity-with-mixedbread-ai-mxbai-rerank-xsmall-v1 .
FROM tested-builder AS production-with-download
# collect model name and engine from build args
ARG MODEL_NAME
RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
ARG ENGINE
RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi

ENV INFINITY_MODEL_ID=$MODEL_NAME
ENV INFINITY_ENGINE=$ENGINE
# will exit with 3 if model is downloaded # TODO: better exit code
RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ]
RUN infinity_emb v2 --preload-only || [ $? -eq 3 ]
ENTRYPOINT ["infinity_emb"]

# Use a multi-stage build -> production version
Expand Down
19 changes: 11 additions & 8 deletions libs/infinity_emb/Dockerfile.trt_onnx_auto
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,18 @@
FROM nvidia/cuda:12.3.2-cudnn9-devel-ubuntu22.04 AS base

ENV PYTHONUNBUFFERED=1 \
\
# pip
PIP_NO_CACHE_DIR=off \
PIP_DISABLE_PIP_VERSION_CHECK=on \
PIP_DEFAULT_TIMEOUT=100 \
\
# make poetry create the virtual environment in the project's root
# it gets named `.venv`
POETRY_VIRTUALENVS_CREATE="true" \
POETRY_VIRTUALENVS_IN_PROJECT="true" \
# do not ask any interactive question
POETRY_NO_INTERACTION=1 \
# huggingface
HF_HUB_ENABLE_HF_TRANSFER=1 \
# extras
EXTRAS="all onnxruntime-gpu" \
PYTHON="python3.10"
RUN apt-get update && apt-get install --no-install-recommends -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl
Expand Down Expand Up @@ -90,17 +90,20 @@ COPY --from=testing /app/test_results.txt /app/test_results.txt
ENV HF_HOME=/app/.cache/huggingface
ENV PATH=/app/.venv/bin:$PATH

# Use a multi-stage build -> production version, with download
# docker buildx build --target=production-with-download \
# --build-arg MODEL_NAME=BAAI/bge-small-en-v1.5 --build-arg ENGINE=torch -f Dockerfile -t infinity-BAAI-small .
# Use a multi-stage build -> production version, with download:
# docker buildx build --target=production-with-download --build-arg MODEL_NAME=mixedbread-ai/mxbai-rerank-xsmall-v1 \
# --build-arg ENGINE=torch -f Dockerfile.nvidia_auto -t infinity-with-mixedbread-ai-mxbai-rerank-xsmall-v1 .
FROM tested-builder AS production-with-download
# collect model name and engine from build args
ARG MODEL_NAME
RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
ARG ENGINE
RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi

ENV INFINITY_MODEL_ID=$MODEL_NAME
ENV INFINITY_ENGINE=$ENGINE
# will exit with 3 if model is downloaded # TODO: better exit code
RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ]
RUN infinity_emb v2 --preload-only || [ $? -eq 3 ]
ENTRYPOINT ["infinity_emb"]

# Use a multi-stage build -> production version
Expand Down
126 changes: 126 additions & 0 deletions subsribe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import zmq
import multiprocessing
import time
import psutil # For memory checks

def get_zmq_socket(context: zmq.Context, socket_type: zmq.SocketType, endpoint: str):
mem = psutil.virtual_memory()
total_mem = mem.total / 1024**3
available_mem = mem.available / 1024**3
if total_mem > 32 and available_mem > 16:
buf_size = int(0.5 * 1024**3) # 0.5 GB
else:
buf_size = -1 # Use system default

socket = context.socket(socket_type)
if socket_type == zmq.PUB:
socket.setsockopt(zmq.SNDHWM, 0) # No limit on messages queued
socket.setsockopt(zmq.SNDBUF, buf_size)
socket.bind(endpoint)
elif socket_type == zmq.SUB:
socket.setsockopt(zmq.RCVHWM, 0) # No limit on messages queued
socket.setsockopt(zmq.RCVBUF, buf_size)
socket.connect(endpoint)
elif socket_type == zmq.REP:
socket.bind(endpoint)
elif socket_type == zmq.REQ:
socket.connect(endpoint)
else:
raise ValueError(f"Unsupported socket type: {socket_type}")

return socket

def subscriber_process(ident):
context = zmq.Context()
# Synchronization socket to signal readiness
sync_socket = get_zmq_socket(context, zmq.REQ, "ipc://sync.ipc")

# Subscriber socket
socket = get_zmq_socket(context, zmq.SUB, "ipc://pubsub.ipc")
# Subscribe to messages with the given ident as bytes
topic_filter = ident.to_bytes(1, byteorder='big')
socket.setsockopt(zmq.SUBSCRIBE, topic_filter)

# Signal readiness to publisher
sync_socket.send(b'READY')
sync_socket.recv() # Wait for acknowledgment

running = True
while running:
try:
# Receive multipart message: [topic][payload]
topic = socket.recv()
payload = socket.recv_pyobj()
topic_int = int.from_bytes(topic, 'big')
print(f"Subscriber {ident} received on topic {topic_int}: {payload}")
# Check for stop command
if payload.get('command') == 'STOP':
running = False
except Exception as e:
print(f"Subscriber {ident} exception: {e}")
running = False
socket.close()
context.term()

def spawn_one_subscriber(ident):
# Create and start a subscriber process
p = multiprocessing.Process(target=subscriber_process, args=(ident,))
p.start()
return p

def main():
context = zmq.Context()

# Synchronization socket to receive readiness signals
sync_socket = get_zmq_socket(context, zmq.REP, "ipc://sync.ipc")

# Publisher socket
socket = get_zmq_socket(context, zmq.PUB, "ipc://pubsub.ipc")

# Spawn subscribers with integer identifiers 1 and 2
sub1 = spawn_one_subscriber(1)
sub2 = spawn_one_subscriber(2)

# Wait for subscribers to signal readiness
for _ in range(2):
msg = sync_socket.recv()
print("Received subscriber ready signal")
sync_socket.send(b'') # Send acknowledgment

# Allow some time for subscribers to process the sync messages
time.sleep(1)

# Send messages to subscribers
for i in range(5):
msg1 = {'message': f"Message {i} to subscriber 1", 'data': b'\x00\x01\x02'}
msg2 = {'message': f"Message {i} to subscriber 2", 'data': b'\x03\x04\x05'}
topic1 = (1).to_bytes(1, byteorder='big')
topic2 = (2).to_bytes(1, byteorder='big')
print(f"Publishing to topic {1}: {msg1}")
socket.send_multipart([topic1], zmq.SNDMORE)
socket.send_pyobj(msg1)
print(f"Publishing to topic {2}: {msg2}")
socket.send_multipart([topic2], zmq.SNDMORE)
socket.send_pyobj(msg2)
time.sleep(0.5)

# Send stop commands to subscribers
print("Sending stop commands")
stop_msg = {'command': 'STOP'}
topic1 = (1).to_bytes(1, byteorder='big')
topic2 = (2).to_bytes(1, byteorder='big')
socket.send_multipart([topic1], zmq.SNDMORE)
socket.send_pyobj(stop_msg)
socket.send_multipart([topic2], zmq.SNDMORE)
socket.send_pyobj(stop_msg)

# Wait for subscribers to terminate
sub1.join()
sub2.join()

socket.close()
sync_socket.close()
context.term()

if __name__ == "__main__":
main()

0 comments on commit bcb4ef2

Please sign in to comment.