efeslab · AlongWY · Aug 29, 2024 · Aug 30, 2024
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -0,0 +1,150 @@
+name: Build
+
+permissions:
+  contents: write
+
+on:
+  push:
+    tags:
+      - v*
+  workflow_dispatch:
+
+jobs:
+  build_cu121:
+    name: Build Shared Library (CUDA 12.1)
+    runs-on: ubuntu-latest
+    container:
+      image: pytorch/manylinux2_28-builder:cuda12.1
+
+    # see https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"]
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          submodules: recursive
+
+      - name: Set up Environments
+        run: |
+          echo "PYTHON_VERSION=$(python3 -c "from os import environ as env; print({'3.7': 'cp37-cp37m', '3.8': 'cp38-cp38', '3.9': 'cp39-cp39', '3.10': 'cp310-cp310', '3.11': 'cp311-cp311', '3.12': 'cp312-cp312' }['${{ matrix.python-version }}'])")" >> $GITHUB_ENV
+          echo "PATH=$PATH" >> $GITHUB_ENV
+
+      - name: Set up Packages
+        env:
+          PYBIN: /opt/python/${{ env.PYTHON_VERSION }}/bin
+        run: |
+          dnf -y install epel-release && dnf upgrade
+          dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo && dnf makecache
+          dnf module install -y nvidia-driver:latest && dnf clean all
+          yum install -y pigz clang-tools-extra openmpi openmpi-devel spdlog spdlog-devel libibverbs rdma-core-devel numactl numactl-devel numactl-libs
+          pwd && ls -la
+          sed -i '256d' 3rdparty/mscclpp/src/bootstrap/bootstrap.cc
+          cat 3rdparty/patches/spdlog/*.patch | patch -p1 -d 3rdparty/spdlog
+          $PYBIN/pip install cmake mypy pybind11 black
+          cd pipeline/src/generate-gemm && $PYBIN/python genGEMM.py && cd ../../
+
+      - name: Build Library
+        env:
+          PATH: ${{ env.PATH }}:/usr/lib64/openmpi/bin
+          PYBIN: /opt/python/${{ env.PYTHON_VERSION }}/bin
+          PREFIX: /opt/python/${{ env.PYTHON_VERSION }};/usr/lib64/openmpi
+          pybind11_DIR: /opt/python/${{ env.PYTHON_VERSION }}/lib/python${{ matrix.python-version }}/site-packages/pybind11
+          CUDACXX: "/usr/local/cuda-12.1/bin/nvcc"
+          CUDA_HOME: "/usr/local/cuda-12.1"
+          CFLAGS: "-I/usr/include" 
+          LDFLAGS: "-L/usr/lib"
+        run: |
+          cd pipeline && mkdir -p build && cd build
+          which cmake && cmake --version
+          which mpicxx && mpicxx --version
+          ${PYBIN}/cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=${PREFIX} -DBUILD_PYTHON_BINDINGS=OFF -DBYPASS_GPU_CHECK=ON -DUSE_CUDA=ON ..
+          make -j$(nproc)
+          tar -zcvf pllm_cu121_py${{ matrix.python-version }}.tgz pllm_python.* test_*
+          ls -la
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: pllm_cu121_py${{ matrix.python-version }}.tgz
+          path: |
+            pipeline/build/pllm_cu121_py${{ matrix.python-version }}.tgz
+
+      - name: Upload Release Asset
+        uses: softprops/action-gh-release@v2
+        if: startsWith(github.ref, 'refs/tags/')
+        with:
+          files: |
+            pipeline/build/pllm_cu121_py${{ matrix.python-version }}.tgz
+
+  build_cu124:
+    name: Build Shared Library (CUDA 12.4)
+    runs-on: ubuntu-latest
+    container:
+      image: pytorch/manylinux2_28-builder:cuda12.4
+
+    # see https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"]
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          submodules: recursive
+
+      - name: Set up Environments
+        run: |
+          echo "PYTHON_VERSION=$(python3 -c "from os import environ as env; print({'3.7': 'cp37-cp37m', '3.8': 'cp38-cp38', '3.9': 'cp39-cp39', '3.10': 'cp310-cp310', '3.11': 'cp311-cp311', '3.12': 'cp312-cp312' }['${{ matrix.python-version }}'])")" >> $GITHUB_ENV
+          echo "PATH=$PATH" >> $GITHUB_ENV
+
+      - name: Set up Packages
+        env:
+          PYBIN: /opt/python/${{ env.PYTHON_VERSION }}/bin
+        run: |
+          dnf -y install epel-release && dnf upgrade
+          dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo && dnf makecache
+          dnf module install -y nvidia-driver:latest && dnf clean all
+          yum install -y pigz clang-tools-extra openmpi openmpi-devel spdlog spdlog-devel libibverbs rdma-core-devel numactl numactl-devel numactl-libs
+          pwd && ls -la
+          sed -i '256d' 3rdparty/mscclpp/src/bootstrap/bootstrap.cc
+          cat 3rdparty/patches/spdlog/*.patch | patch -p1 -d 3rdparty/spdlog
+          $PYBIN/pip install cmake mypy pybind11 black
+          cd pipeline/src/generate-gemm && $PYBIN/python genGEMM.py && cd ../../
+
+      - name: Build Library
+        env:
+          PATH: ${{ env.PATH }}:/usr/lib64/openmpi/bin
+          PYBIN: /opt/python/${{ env.PYTHON_VERSION }}/bin
+          PREFIX: /opt/python/${{ env.PYTHON_VERSION }};/usr/lib64/openmpi
+          pybind11_DIR: /opt/python/${{ env.PYTHON_VERSION }}/lib/python${{ matrix.python-version }}/site-packages/pybind11
+          CUDACXX: "/usr/local/cuda-12.4/bin/nvcc"
+          CUDA_HOME: "/usr/local/cuda-12.4"
+          CFLAGS: "-I/usr/include" 
+          LDFLAGS: "-L/usr/lib"
+        run: |
+          cd pipeline && mkdir -p build && cd build
+          which cmake && cmake --version
+          which mpicxx && mpicxx --version
+          ${PYBIN}/cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=${PREFIX} -DBUILD_PYTHON_BINDINGS=OFF -DBYPASS_GPU_CHECK=ON -DUSE_CUDA=ON ..
+          make -j$(nproc)
+          tar -zcvf pllm_cu124_py${{ matrix.python-version }}.tgz pllm_python.* test_*
+          ls -la
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: pllm_cu124_py${{ matrix.python-version }}.tgz
+          path: |
+            pipeline/build/pllm_cu124_py${{ matrix.python-version }}.tgz
+
+      - name: Upload Release Asset
+        uses: softprops/action-gh-release@v2
+        if: startsWith(github.ref, 'refs/tags/')
+        with:
+          files: |
+            pipeline/build/pllm_cu124_py${{ matrix.python-version }}.tgz
+
diff --git a/.gitmodules b/.gitmodules
@@ -1,7 +1,6 @@
 [submodule "3rdparty/flashinfer"]
 	path = 3rdparty/flashinfer
 	url = https://github.com/happierpig/flashinfer-ai.git
-
 [submodule "3rdparty/nvbench"]
 	path = 3rdparty/nvbench
 	url = https://github.com/NVIDIA/nvbench.git

diff --git a/3rdparty/mscclpp b/3rdparty/mscclpp
diff --git a/3rdparty/spdlog b/3rdparty/spdlog
diff --git a/pipeline/CMakeLists.txt b/pipeline/CMakeLists.txt
@@ -15,7 +15,7 @@ set(NVBENCH_DIR ${PARENT_DIR}/3rdparty/nvbench CACHE PATH "Path to nvbench src")
 set(GTEST_DIR ${PARENT_DIR}/3rdparty/googletest CACHE PATH "Path to googletest src")
 set(CUTLASS_DIR ${PARENT_DIR}/3rdparty/cutlass CACHE PATH "Path to modified cutlass src")
 set(SPDLOG_DIR  ${PARENT_DIR}/3rdparty/spdlog CACHE PATH "Path to spdlog src")
-set(MSCCLPP_DIR /usr/local/mscclpp CACHE PATH "Path to mscclpp install")
+set(MSCCLPP_DIR ${PARENT_DIR}/3rdparty/mscclpp CACHE PATH "Path to mscclpp install")
 set(FLASHINFER_DIR ${PARENT_DIR}/3rdparty/flashinfer CACHE PATH "Path to flashinfer src")
 # override by SMALL_FLASHINFER
 #set(FLASHINFER_DIR ${PARENT_DIR}/3rdparty/flashinfer CACHE PATH "Path to flashinfer src")
@@ -36,6 +36,7 @@ if (NOT MPI_FOUND)
 endif()
 
 add_subdirectory(${SPDLOG_DIR} spdlog)
+add_subdirectory(${MSCCLPP_DIR} mscclpp)
 
 find_package(pybind11 REQUIRED)
 if (NOT pybind11_FOUND)
@@ -45,21 +46,18 @@ endif()
 find_program(STUBGEN_EXECUTABLE NAMES stubgen)
 
 add_compile_options(-w)
-include_directories(${MSCCLPP_DIR}/include)
 
 include_directories(${CUTLASS_DIR}/include)
 include_directories(${CUTLASS_DIR}/tools/util/include)
 # override by SMALL_FLASHINFER
 #include_directories(${FLASHINFER_DIR}/include)
 # ------------- Build Network Test -----------------#
 
-find_library(MSCCLPP_LIBRARY NAMES mscclpp PATHS ${MSCCLPP_DIR}/lib NO_DEFAULT_PATH)
-
 add_executable(test_comm "${CMAKE_SOURCE_DIR}/src/comm_test.cu" "${CMAKE_SOURCE_DIR}/src/comm.cu")
 target_include_directories(test_comm PRIVATE ${CMAKE_SOURCE_DIR}/include)
 target_include_directories(test_comm PRIVATE ${PARENT_DIR}/3rdparty/cutlass/include)
 target_link_libraries(test_comm PRIVATE MPI::MPI_CXX)
-target_link_libraries(test_comm PRIVATE ${MSCCLPP_LIBRARY})
+target_link_libraries(test_comm PRIVATE mscclpp_static)
 target_link_libraries(test_comm PRIVATE spdlog::spdlog)
 target_compile_definitions(test_comm PRIVATE -DENABLE_MPI)
 
@@ -193,7 +191,7 @@ target_include_directories(shared_lib PRIVATE ${PARENT_DIR}/3rdparty/cutlass/inc
 target_include_directories(shared_lib PRIVATE ${PARENT_DIR}/3rdparty/cutlass/tools/util/include)
 target_include_directories(shared_lib PRIVATE ${SMALL_FLASHINFER_DIR}/include ${FLASHINFER_DIR}/include)
 target_link_libraries(shared_lib PRIVATE decode_kernels prefill_kernels)
-target_link_libraries(shared_lib PRIVATE ${MSCCLPP_LIBRARY})
+target_link_libraries(shared_lib PRIVATE mscclpp_static)
 target_link_libraries(shared_lib PRIVATE MPI::MPI_CXX)
 target_link_libraries(shared_lib PRIVATE spdlog::spdlog)
 target_link_libraries(shared_lib PRIVATE gemm_lib)
@@ -210,7 +208,7 @@ target_include_directories(test_compute PRIVATE ${CMAKE_SOURCE_DIR}/include)
 target_include_directories(test_compute PRIVATE ${SMALL_FLASHINFER_DIR}/include ${FLASHINFER_DIR}/include)
 target_link_libraries(test_compute PRIVATE decode_kernels prefill_kernels)
 # Include Network Libraries
-target_link_libraries(test_compute PRIVATE ${MSCCLPP_LIBRARY})
+target_link_libraries(test_compute PRIVATE mscclpp_static)
 target_link_libraries(test_compute PRIVATE MPI::MPI_CXX)
 # Include Log Libraries
 target_link_libraries(test_compute PRIVATE spdlog::spdlog)
@@ -229,7 +227,7 @@ target_include_directories(pllm_python PRIVATE ${CMAKE_SOURCE_DIR}/include)
 target_include_directories(pllm_python PRIVATE ${SMALL_FLASHINFER_DIR}/include ${FLASHINFER_DIR}/include)
 target_link_libraries(pllm_python PRIVATE decode_kernels prefill_kernels)
 # Include Network Libraries
-target_link_libraries(pllm_python PRIVATE ${MSCCLPP_LIBRARY})
+target_link_libraries(pllm_python PRIVATE mscclpp_static)
 target_link_libraries(pllm_python PRIVATE MPI::MPI_CXX)
 # Include Log Libraries
 target_link_libraries(pllm_python PRIVATE spdlog::spdlog)

diff --git a/pipeline/utils/kv_cache.py b/pipeline/utils/kv_cache.py
@@ -14,7 +14,7 @@
 from pybindUtil import toGPU, toGPUTensor
 from torch.profiler import profile, record_function, ProfilerActivity
 import time
-
+from typing import List
 
 
 class DistKVPool:
@@ -73,15 +73,15 @@ class DistKVCache:
 
     def __init__(self, pool: DistKVPool):
         self._pool = pool
-        self._indices : list[int] = []
+        self._indices : List[int] = []
         self._seqlen : int = 0
 
     @property
     def seqlen(self) -> int:
         return self._seqlen
 
     @property
-    def indicies(self) -> list[int]:
+    def indicies(self) -> List[int]:
         return self._indices
 
     @property
@@ -116,11 +116,11 @@ def __init__(self, decode_kvs: Sequence[DistKVCache], prefill_kvs: Sequence[Dist
         """
         # batch_size = len(decode_kvs) + len(prefill_kvs)
         # [batch_size + 1,]
-        self._kv_indptr : list[int] = [0]
+        self._kv_indptr : List[int] = [0]
         # [num_pages_in_total, ]
-        self._kv_indices : list[int] = []
+        self._kv_indices : List[int] = []
         # [batch_size, ]
-        self._kv_last_page_len : list[int] = []
+        self._kv_last_page_len : List[int] = []
 
         # Here we do not materialize data into specific devices,
         # for distributed assignment.

diff --git a/pipeline/utils/pybindUtil.py b/pipeline/utils/pybindUtil.py
@@ -3,6 +3,7 @@
 import numpy as np
 import pllm_python
 from torch.profiler import profile, record_function, ProfilerActivity
+from typing import List
 
 def toGPU(data, nranks, dtype, non_blocking=True):
 
@@ -88,8 +89,8 @@ def initUpdateData(
         kv_last_page_len: int,
         rev_input_indptr: int,
         per_token_offset: int,
-        gemv_batch_size: list[int],
-        gemv_block_num: list[int] ) -> list[pllm_python.VortexUpdateData]:
+        gemv_batch_size: List[int],
+        gemv_block_num: List[int] ) -> List[pllm_python.VortexUpdateData]:
     updateDataList = []
     gemv_batch_size = np.array(gemv_batch_size, dtype=np.int32)
     gemv_block_num = np.array(gemv_block_num, dtype=np.int32)
@@ -118,4 +119,4 @@ def initUpdateData(
     print("Time taken: ", time.time() - t)
 
 
-
+
diff --git a/pipeline/utils/request_info.py b/pipeline/utils/request_info.py
@@ -1,11 +1,13 @@
 from collections import deque
+from typing import List
+
 class NewRequestInfo:
     """
     Request info for incoming request
     NOTE (Yilong): add support for offloading / onloading KV-Cache
     """
     req_idx: int
-    prompt: list[int]
+    prompt: List[int]
     output_len : int
     start_time: float
 
@@ -36,7 +38,7 @@ class FlyRequestInfo:
     NOTE (Yilong): add support for offloading / onloading KV-Cache
     """
 
-    def __init__(self, req_idx: int, input: list[int], output: list[int], prompt: list[int], request_comein_time: float, 
+    def __init__(self, req_idx: int, input: List[int], output: List[int], prompt: List[int], request_comein_time: float, 
                  chunked_prefill: bool, kv_cache, encode_latency: float, 
                  decode_start_at: float, decode_latency: float, output_len: int, input_len: int):
         self.req_idx = req_idx

diff --git a/pipeline/utils/scheduler.py b/pipeline/utils/scheduler.py
@@ -13,6 +13,7 @@
 
 import pickle
 from collections import deque
+from typing import List
 
 from kv_cache import DistKVPool, DistKVCache, BatchedDistKVCache
 from request_info import NewRequestInfo, NewRequestQueue, FlyRequestInfo
@@ -38,7 +39,7 @@ class WorkingSet:
     """
 
     def __init__(self) -> None:
-        self._set : list[FlyRequestInfo] = []
+        self._set : List[FlyRequestInfo] = []
 
     def put(self, req: FlyRequestInfo):
         self._set.append(req)
@@ -209,12 +210,12 @@ def bench_text_gen(self, retired_rq, actualRun = True):
             self._prefill_workset.adjust_kv_cache()
             self._decode_workset.adjust_kv_cache()
 
-            input_ids : list[int] = []
-            input_indptr : list[int] = [0]
-            prev_len : list[int] = []
+            input_ids : List[int] = []
+            input_indptr : List[int] = [0]
+            prev_len : List[int] = []
             decodePrefillBorder = self._decode_workset.effective_bsz
-            decode_kvs : list[DistKVCache] = []
-            prefill_kvs : list[DistKVCache] = []
+            decode_kvs : List[DistKVCache] = []
+            prefill_kvs : List[DistKVCache] = []
 
         with record_function("calc batch size"):
             t3 = time.perf_counter()
@@ -409,7 +410,7 @@ def print_and_to_file(string, f):
     scheduler = Scheduler(pool, request_manager.avaliable_request_queue)
     scheduler.init_pipe(args.config_path)
 
-    retired_rq : list[FlyRequestInfo] = []
+    retired_rq : List[FlyRequestInfo] = []
     totalCycle = 0
 
     skip_cycle = args.skip_cycles
+6 −2		CMakeLists.txt
+15 −0		README.md
+16 −1		example/example.cpp
+1 −1		include/spdlog/common.h
+1 −1		include/spdlog/details/file_helper-inl.h
+9 −0		include/spdlog/details/os-inl.h
+1 −26		include/spdlog/details/registry-inl.h
+4 −11		include/spdlog/details/registry.h
+1 −3		include/spdlog/details/thread_pool.h
+1 −1		include/spdlog/fmt/fmt.h
+32 −22		include/spdlog/mdc.h
+2 −6		include/spdlog/pattern_formatter-inl.h
+1 −1		include/spdlog/sinks/ansicolor_sink.h
+4 −4		include/spdlog/sinks/base_sink.h
+1 −1		include/spdlog/sinks/rotating_file_sink-inl.h
+3 −2		include/spdlog/sinks/syslog_sink.h
+9 −0		include/spdlog/sinks/wincolor_sink-inl.h
+0 −10		include/spdlog/spdlog-inl.h
+0 −8		include/spdlog/spdlog.h
+1 −1		include/spdlog/version.h
+2 −2		tests/test_async.cpp
+61 −0		tests/test_create_dir.cpp