hf_transfer in docker variable, jinja2

michaelfeil · Nov 23, 2024 · bcb4ef2 · bcb4ef2
1 parent cf958e9
commit bcb4ef2
Show file tree

Hide file tree

Showing 6 changed files with 181 additions and 40 deletions.
diff --git a/libs/infinity_emb/Dockerfile.amd_auto b/libs/infinity_emb/Dockerfile.amd_auto
@@ -5,18 +5,18 @@
 FROM rocm/pytorch:rocm6.2.3_ubuntu22.04_py3.10_pytorch_release_2.3.0 AS base
 
 ENV PYTHONUNBUFFERED=1 \
-    \
     # pip
     PIP_NO_CACHE_DIR=off \
     PIP_DISABLE_PIP_VERSION_CHECK=on \
     PIP_DEFAULT_TIMEOUT=100 \
-    \
     # make poetry create the virtual environment in the project's root
     # it gets named `.venv`
     POETRY_VIRTUALENVS_CREATE="true" \
     POETRY_VIRTUALENVS_IN_PROJECT="true" \
-    # do not ask any interactive question
     POETRY_NO_INTERACTION=1 \
+    # huggingface     
+    HF_HUB_ENABLE_HF_TRANSFER=1 \
+    # extras
     EXTRAS="all" \
     PYTHON="python3.10"
 RUN apt-get update && apt-get install --no-install-recommends -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl
@@ -129,17 +129,20 @@ COPY --from=testing /app/test_results.txt /app/test_results.txt
 ENV HF_HOME=/app/.cache/huggingface
 ENV PATH=/app/.venv/bin:$PATH
 
-# Use a multi-stage build -> production version, with download
-# docker buildx build --target=production-with-download \
-# --build-arg MODEL_NAME=BAAI/bge-small-en-v1.5 --build-arg ENGINE=torch -f Dockerfile -t infinity-BAAI-small .
+# Use a multi-stage build -> production version, with download:
+# docker buildx build --target=production-with-download --build-arg MODEL_NAME=mixedbread-ai/mxbai-rerank-xsmall-v1 \
+# --build-arg ENGINE=torch -f Dockerfile.nvidia_auto -t infinity-with-mixedbread-ai-mxbai-rerank-xsmall-v1 .
 FROM tested-builder AS production-with-download
 # collect model name and engine from build args
 ARG MODEL_NAME
-RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
 ARG ENGINE
+RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
 RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi
+
+ENV INFINITY_MODEL_ID=$MODEL_NAME
+ENV INFINITY_ENGINE=$ENGINE
 # will exit with 3 if model is downloaded # TODO: better exit code
-RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ]
+RUN infinity_emb v2 --preload-only || [ $? -eq 3 ]
 ENTRYPOINT ["infinity_emb"]
 
 # Use a multi-stage build -> production version

diff --git a/libs/infinity_emb/Dockerfile.cpu_auto b/libs/infinity_emb/Dockerfile.cpu_auto
@@ -5,18 +5,18 @@
 FROM ubuntu:22.04 AS base
 
 ENV PYTHONUNBUFFERED=1 \
-    \
     # pip
     PIP_NO_CACHE_DIR=off \
     PIP_DISABLE_PIP_VERSION_CHECK=on \
     PIP_DEFAULT_TIMEOUT=100 \
-    \
     # make poetry create the virtual environment in the project's root
     # it gets named `.venv`
     POETRY_VIRTUALENVS_CREATE="true" \
     POETRY_VIRTUALENVS_IN_PROJECT="true" \
-    # do not ask any interactive question
     POETRY_NO_INTERACTION=1 \
+    # huggingface     
+    HF_HUB_ENABLE_HF_TRANSFER=1 \
+    # extras
     EXTRAS="all" \
     PYTHON="python3.11"
 RUN apt-get update && apt-get install --no-install-recommends -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl
@@ -97,17 +97,20 @@ COPY --from=testing /app/test_results.txt /app/test_results.txt
 ENV HF_HOME=/app/.cache/huggingface
 ENV PATH=/app/.venv/bin:$PATH
 
-# Use a multi-stage build -> production version, with download
-# docker buildx build --target=production-with-download \
-# --build-arg MODEL_NAME=BAAI/bge-small-en-v1.5 --build-arg ENGINE=torch -f Dockerfile -t infinity-BAAI-small .
+# Use a multi-stage build -> production version, with download:
+# docker buildx build --target=production-with-download --build-arg MODEL_NAME=mixedbread-ai/mxbai-rerank-xsmall-v1 \
+# --build-arg ENGINE=torch -f Dockerfile.nvidia_auto -t infinity-with-mixedbread-ai-mxbai-rerank-xsmall-v1 .
 FROM tested-builder AS production-with-download
 # collect model name and engine from build args
 ARG MODEL_NAME
-RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
 ARG ENGINE
+RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
 RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi
+
+ENV INFINITY_MODEL_ID=$MODEL_NAME
+ENV INFINITY_ENGINE=$ENGINE
 # will exit with 3 if model is downloaded # TODO: better exit code
-RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ]
+RUN infinity_emb v2 --preload-only || [ $? -eq 3 ]
 ENTRYPOINT ["infinity_emb"]
 
 # Use a multi-stage build -> production version

diff --git a/libs/infinity_emb/Dockerfile.jinja2 b/libs/infinity_emb/Dockerfile.jinja2
@@ -5,18 +5,18 @@
 FROM {{ base_image }} AS base
 
 ENV PYTHONUNBUFFERED=1 \
-    \
     # pip
     PIP_NO_CACHE_DIR=off \
     PIP_DISABLE_PIP_VERSION_CHECK=on \
     PIP_DEFAULT_TIMEOUT=100 \
-    \
     # make poetry create the virtual environment in the project's root
     # it gets named `.venv`
     POETRY_VIRTUALENVS_CREATE="{{poetry_virtualenvs_create | default('true')}}" \
     POETRY_VIRTUALENVS_IN_PROJECT="{{poetry_virtualenvs_in_project | default('true')}}" \
-    # do not ask any interactive question
     POETRY_NO_INTERACTION=1 \
+    # huggingface     
+    HF_HUB_ENABLE_HF_TRANSFER=1 \
+    # extras
     EXTRAS="{{poetry_extras | default('all')}}" \
     PYTHON="{{python_version | default('python3.11')}}"
 RUN apt-get update && apt-get install --no-install-recommends -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl
@@ -83,17 +83,20 @@ COPY --from=testing /app/test_results.txt /app/test_results.txt
 ENV HF_HOME=/app/.cache/huggingface
 ENV PATH=/app/.venv/bin:$PATH
 
-# Use a multi-stage build -> production version, with download
-# docker buildx build --target=production-with-download \
-# --build-arg MODEL_NAME=BAAI/bge-small-en-v1.5 --build-arg ENGINE=torch -f Dockerfile -t infinity-BAAI-small .
+# Use a multi-stage build -> production version, with download:
+# docker buildx build --target=production-with-download --build-arg MODEL_NAME=mixedbread-ai/mxbai-rerank-xsmall-v1 \
+# --build-arg ENGINE=torch -f Dockerfile.nvidia_auto -t infinity-with-mixedbread-ai-mxbai-rerank-xsmall-v1 .
 FROM tested-builder AS production-with-download
 # collect model name and engine from build args
 ARG MODEL_NAME
-RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
 ARG ENGINE
+RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
 RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi
+
+ENV INFINITY_MODEL_ID=$MODEL_NAME
+ENV INFINITY_ENGINE=$ENGINE
 # will exit with 3 if model is downloaded # TODO: better exit code
-RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ]
+RUN infinity_emb v2 --preload-only || [ $? -eq 3 ]
 ENTRYPOINT ["infinity_emb"]
 
 # Use a multi-stage build -> production version

diff --git a/libs/infinity_emb/Dockerfile.nvidia_auto b/libs/infinity_emb/Dockerfile.nvidia_auto
@@ -5,18 +5,18 @@
 FROM nvidia/cuda:12.1.1-base-ubuntu22.04 AS base
 
 ENV PYTHONUNBUFFERED=1 \
-    \
     # pip
     PIP_NO_CACHE_DIR=off \
     PIP_DISABLE_PIP_VERSION_CHECK=on \
     PIP_DEFAULT_TIMEOUT=100 \
-    \
     # make poetry create the virtual environment in the project's root
     # it gets named `.venv`
     POETRY_VIRTUALENVS_CREATE="true" \
     POETRY_VIRTUALENVS_IN_PROJECT="true" \
-    # do not ask any interactive question
     POETRY_NO_INTERACTION=1 \
+    # huggingface     
+    HF_HUB_ENABLE_HF_TRANSFER=1 \
+    # extras
     EXTRAS="all" \
     PYTHON="python3.11"
 RUN apt-get update && apt-get install --no-install-recommends -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl
@@ -83,17 +83,20 @@ COPY --from=testing /app/test_results.txt /app/test_results.txt
 ENV HF_HOME=/app/.cache/huggingface
 ENV PATH=/app/.venv/bin:$PATH
 
-# Use a multi-stage build -> production version, with download
-# docker buildx build --target=production-with-download \
-# --build-arg MODEL_NAME=BAAI/bge-small-en-v1.5 --build-arg ENGINE=torch -f Dockerfile -t infinity-BAAI-small .
+# Use a multi-stage build -> production version, with download:
+# docker buildx build --target=production-with-download --build-arg MODEL_NAME=mixedbread-ai/mxbai-rerank-xsmall-v1 \
+# --build-arg ENGINE=torch -f Dockerfile.nvidia_auto -t infinity-with-mixedbread-ai-mxbai-rerank-xsmall-v1 .
 FROM tested-builder AS production-with-download
 # collect model name and engine from build args
 ARG MODEL_NAME
-RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
 ARG ENGINE
+RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
 RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi
+
+ENV INFINITY_MODEL_ID=$MODEL_NAME
+ENV INFINITY_ENGINE=$ENGINE
 # will exit with 3 if model is downloaded # TODO: better exit code
-RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ]
+RUN infinity_emb v2 --preload-only || [ $? -eq 3 ]
 ENTRYPOINT ["infinity_emb"]
 
 # Use a multi-stage build -> production version

diff --git a/libs/infinity_emb/Dockerfile.trt_onnx_auto b/libs/infinity_emb/Dockerfile.trt_onnx_auto
@@ -5,18 +5,18 @@
 FROM nvidia/cuda:12.3.2-cudnn9-devel-ubuntu22.04 AS base
 
 ENV PYTHONUNBUFFERED=1 \
-    \
     # pip
     PIP_NO_CACHE_DIR=off \
     PIP_DISABLE_PIP_VERSION_CHECK=on \
     PIP_DEFAULT_TIMEOUT=100 \
-    \
     # make poetry create the virtual environment in the project's root
     # it gets named `.venv`
     POETRY_VIRTUALENVS_CREATE="true" \
     POETRY_VIRTUALENVS_IN_PROJECT="true" \
-    # do not ask any interactive question
     POETRY_NO_INTERACTION=1 \
+    # huggingface     
+    HF_HUB_ENABLE_HF_TRANSFER=1 \
+    # extras
     EXTRAS="all onnxruntime-gpu" \
     PYTHON="python3.10"
 RUN apt-get update && apt-get install --no-install-recommends -y build-essential python3-dev libsndfile1 $PYTHON-venv $PYTHON curl
@@ -90,17 +90,20 @@ COPY --from=testing /app/test_results.txt /app/test_results.txt
 ENV HF_HOME=/app/.cache/huggingface
 ENV PATH=/app/.venv/bin:$PATH
 
-# Use a multi-stage build -> production version, with download
-# docker buildx build --target=production-with-download \
-# --build-arg MODEL_NAME=BAAI/bge-small-en-v1.5 --build-arg ENGINE=torch -f Dockerfile -t infinity-BAAI-small .
+# Use a multi-stage build -> production version, with download:
+# docker buildx build --target=production-with-download --build-arg MODEL_NAME=mixedbread-ai/mxbai-rerank-xsmall-v1 \
+# --build-arg ENGINE=torch -f Dockerfile.nvidia_auto -t infinity-with-mixedbread-ai-mxbai-rerank-xsmall-v1 .
 FROM tested-builder AS production-with-download
 # collect model name and engine from build args
 ARG MODEL_NAME
-RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
 ARG ENGINE
+RUN if [ -z "${MODEL_NAME}" ]; then echo "Error: Build argument MODEL_NAME not set." && exit 1; fi
 RUN if [ -z "${ENGINE}" ]; then echo "Error: Build argument ENGINE not set." && exit 1; fi
+
+ENV INFINITY_MODEL_ID=$MODEL_NAME
+ENV INFINITY_ENGINE=$ENGINE
 # will exit with 3 if model is downloaded # TODO: better exit code
-RUN infinity_emb v2 --model-id $MODEL_NAME --engine $ENGINE --preload-only || [ $? -eq 3 ]
+RUN infinity_emb v2 --preload-only || [ $? -eq 3 ]
 ENTRYPOINT ["infinity_emb"]
 
 # Use a multi-stage build -> production version

diff --git a/subsribe.py b/subsribe.py
@@ -0,0 +1,126 @@
+import zmq
+import multiprocessing
+import time
+import psutil  # For memory checks
+
+def get_zmq_socket(context: zmq.Context, socket_type: zmq.SocketType, endpoint: str):
+    mem = psutil.virtual_memory()
+    total_mem = mem.total / 1024**3
+    available_mem = mem.available / 1024**3
+    if total_mem > 32 and available_mem > 16:
+        buf_size = int(0.5 * 1024**3)  # 0.5 GB
+    else:
+        buf_size = -1  # Use system default
+
+    socket = context.socket(socket_type)
+    if socket_type == zmq.PUB:
+        socket.setsockopt(zmq.SNDHWM, 0)  # No limit on messages queued
+        socket.setsockopt(zmq.SNDBUF, buf_size)
+        socket.bind(endpoint)
+    elif socket_type == zmq.SUB:
+        socket.setsockopt(zmq.RCVHWM, 0)  # No limit on messages queued
+        socket.setsockopt(zmq.RCVBUF, buf_size)
+        socket.connect(endpoint)
+    elif socket_type == zmq.REP:
+        socket.bind(endpoint)
+    elif socket_type == zmq.REQ:
+        socket.connect(endpoint)
+    else:
+        raise ValueError(f"Unsupported socket type: {socket_type}")
+
+    return socket
+
+def subscriber_process(ident):
+    context = zmq.Context()
+    # Synchronization socket to signal readiness
+    sync_socket = get_zmq_socket(context, zmq.REQ, "ipc://sync.ipc")
+
+    # Subscriber socket
+    socket = get_zmq_socket(context, zmq.SUB, "ipc://pubsub.ipc")
+    # Subscribe to messages with the given ident as bytes
+    topic_filter = ident.to_bytes(1, byteorder='big')
+    socket.setsockopt(zmq.SUBSCRIBE, topic_filter)
+
+    # Signal readiness to publisher
+    sync_socket.send(b'READY')
+    sync_socket.recv()  # Wait for acknowledgment
+
+    running = True
+    while running:
+        try:
+            # Receive multipart message: [topic][payload]
+            topic = socket.recv()
+            payload = socket.recv_pyobj()
+            topic_int = int.from_bytes(topic, 'big')
+            print(f"Subscriber {ident} received on topic {topic_int}: {payload}")
+            # Check for stop command
+            if payload.get('command') == 'STOP':
+                running = False
+        except Exception as e:
+            print(f"Subscriber {ident} exception: {e}")
+            running = False
+    socket.close()
+    context.term()
+
+def spawn_one_subscriber(ident):
+    # Create and start a subscriber process
+    p = multiprocessing.Process(target=subscriber_process, args=(ident,))
+    p.start()
+    return p
+
+def main():
+    context = zmq.Context()
+
+    # Synchronization socket to receive readiness signals
+    sync_socket = get_zmq_socket(context, zmq.REP, "ipc://sync.ipc")
+
+    # Publisher socket
+    socket = get_zmq_socket(context, zmq.PUB, "ipc://pubsub.ipc")
+
+    # Spawn subscribers with integer identifiers 1 and 2
+    sub1 = spawn_one_subscriber(1)
+    sub2 = spawn_one_subscriber(2)
+
+    # Wait for subscribers to signal readiness
+    for _ in range(2):
+        msg = sync_socket.recv()
+        print("Received subscriber ready signal")
+        sync_socket.send(b'')  # Send acknowledgment
+
+    # Allow some time for subscribers to process the sync messages
+    time.sleep(1)
+
+    # Send messages to subscribers
+    for i in range(5):
+        msg1 = {'message': f"Message {i} to subscriber 1", 'data': b'\x00\x01\x02'}
+        msg2 = {'message': f"Message {i} to subscriber 2", 'data': b'\x03\x04\x05'}
+        topic1 = (1).to_bytes(1, byteorder='big')
+        topic2 = (2).to_bytes(1, byteorder='big')
+        print(f"Publishing to topic {1}: {msg1}")
+        socket.send_multipart([topic1], zmq.SNDMORE)
+        socket.send_pyobj(msg1)
+        print(f"Publishing to topic {2}: {msg2}")
+        socket.send_multipart([topic2], zmq.SNDMORE)
+        socket.send_pyobj(msg2)
+        time.sleep(0.5)
+
+    # Send stop commands to subscribers
+    print("Sending stop commands")
+    stop_msg = {'command': 'STOP'}
+    topic1 = (1).to_bytes(1, byteorder='big')
+    topic2 = (2).to_bytes(1, byteorder='big')
+    socket.send_multipart([topic1], zmq.SNDMORE)
+    socket.send_pyobj(stop_msg)
+    socket.send_multipart([topic2], zmq.SNDMORE)
+    socket.send_pyobj(stop_msg)
+
+    # Wait for subscribers to terminate
+    sub1.join()
+    sub2.join()
+
+    socket.close()
+    sync_socket.close()
+    context.term()
+
+if __name__ == "__main__":
+    main()