Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix link error and auto build library. #6

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 150 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
name: Build

permissions:
contents: write

on:
push:
tags:
- v*
workflow_dispatch:

jobs:
build_cu121:
name: Build Shared Library (CUDA 12.1)
runs-on: ubuntu-latest
container:
image: pytorch/manylinux2_28-builder:cuda12.1

# see https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix
strategy:
fail-fast: false
matrix:
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"]

steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: recursive

- name: Set up Environments
run: |
echo "PYTHON_VERSION=$(python3 -c "from os import environ as env; print({'3.7': 'cp37-cp37m', '3.8': 'cp38-cp38', '3.9': 'cp39-cp39', '3.10': 'cp310-cp310', '3.11': 'cp311-cp311', '3.12': 'cp312-cp312' }['${{ matrix.python-version }}'])")" >> $GITHUB_ENV
echo "PATH=$PATH" >> $GITHUB_ENV

- name: Set up Packages
env:
PYBIN: /opt/python/${{ env.PYTHON_VERSION }}/bin
run: |
dnf -y install epel-release && dnf upgrade
dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo && dnf makecache
dnf module install -y nvidia-driver:latest && dnf clean all
yum install -y pigz clang-tools-extra openmpi openmpi-devel spdlog spdlog-devel libibverbs rdma-core-devel numactl numactl-devel numactl-libs
pwd && ls -la
sed -i '256d' 3rdparty/mscclpp/src/bootstrap/bootstrap.cc
cat 3rdparty/patches/spdlog/*.patch | patch -p1 -d 3rdparty/spdlog
$PYBIN/pip install cmake mypy pybind11 black
cd pipeline/src/generate-gemm && $PYBIN/python genGEMM.py && cd ../../

- name: Build Library
env:
PATH: ${{ env.PATH }}:/usr/lib64/openmpi/bin
PYBIN: /opt/python/${{ env.PYTHON_VERSION }}/bin
PREFIX: /opt/python/${{ env.PYTHON_VERSION }};/usr/lib64/openmpi
pybind11_DIR: /opt/python/${{ env.PYTHON_VERSION }}/lib/python${{ matrix.python-version }}/site-packages/pybind11
CUDACXX: "/usr/local/cuda-12.1/bin/nvcc"
CUDA_HOME: "/usr/local/cuda-12.1"
CFLAGS: "-I/usr/include"
LDFLAGS: "-L/usr/lib"
run: |
cd pipeline && mkdir -p build && cd build
which cmake && cmake --version
which mpicxx && mpicxx --version
${PYBIN}/cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=${PREFIX} -DBUILD_PYTHON_BINDINGS=OFF -DBYPASS_GPU_CHECK=ON -DUSE_CUDA=ON ..
make -j$(nproc)
tar -zcvf pllm_cu121_py${{ matrix.python-version }}.tgz pllm_python.* test_*
ls -la

- uses: actions/upload-artifact@v4
with:
name: pllm_cu121_py${{ matrix.python-version }}.tgz
path: |
pipeline/build/pllm_cu121_py${{ matrix.python-version }}.tgz

- name: Upload Release Asset
uses: softprops/action-gh-release@v2
if: startsWith(github.ref, 'refs/tags/')
with:
files: |
pipeline/build/pllm_cu121_py${{ matrix.python-version }}.tgz

build_cu124:
name: Build Shared Library (CUDA 12.4)
runs-on: ubuntu-latest
container:
image: pytorch/manylinux2_28-builder:cuda12.4

# see https://github.com/pytorch/pytorch/blob/main/RELEASE.md#release-compatibility-matrix
strategy:
fail-fast: false
matrix:
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"]

steps:
- name: Checkout
uses: actions/checkout@v4
with:
submodules: recursive

- name: Set up Environments
run: |
echo "PYTHON_VERSION=$(python3 -c "from os import environ as env; print({'3.7': 'cp37-cp37m', '3.8': 'cp38-cp38', '3.9': 'cp39-cp39', '3.10': 'cp310-cp310', '3.11': 'cp311-cp311', '3.12': 'cp312-cp312' }['${{ matrix.python-version }}'])")" >> $GITHUB_ENV
echo "PATH=$PATH" >> $GITHUB_ENV

- name: Set up Packages
env:
PYBIN: /opt/python/${{ env.PYTHON_VERSION }}/bin
run: |
dnf -y install epel-release && dnf upgrade
dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo && dnf makecache
dnf module install -y nvidia-driver:latest && dnf clean all
yum install -y pigz clang-tools-extra openmpi openmpi-devel spdlog spdlog-devel libibverbs rdma-core-devel numactl numactl-devel numactl-libs
pwd && ls -la
sed -i '256d' 3rdparty/mscclpp/src/bootstrap/bootstrap.cc
cat 3rdparty/patches/spdlog/*.patch | patch -p1 -d 3rdparty/spdlog
$PYBIN/pip install cmake mypy pybind11 black
cd pipeline/src/generate-gemm && $PYBIN/python genGEMM.py && cd ../../

- name: Build Library
env:
PATH: ${{ env.PATH }}:/usr/lib64/openmpi/bin
PYBIN: /opt/python/${{ env.PYTHON_VERSION }}/bin
PREFIX: /opt/python/${{ env.PYTHON_VERSION }};/usr/lib64/openmpi
pybind11_DIR: /opt/python/${{ env.PYTHON_VERSION }}/lib/python${{ matrix.python-version }}/site-packages/pybind11
CUDACXX: "/usr/local/cuda-12.4/bin/nvcc"
CUDA_HOME: "/usr/local/cuda-12.4"
CFLAGS: "-I/usr/include"
LDFLAGS: "-L/usr/lib"
run: |
cd pipeline && mkdir -p build && cd build
which cmake && cmake --version
which mpicxx && mpicxx --version
${PYBIN}/cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_PREFIX_PATH=${PREFIX} -DBUILD_PYTHON_BINDINGS=OFF -DBYPASS_GPU_CHECK=ON -DUSE_CUDA=ON ..
make -j$(nproc)
tar -zcvf pllm_cu124_py${{ matrix.python-version }}.tgz pllm_python.* test_*
ls -la

- uses: actions/upload-artifact@v4
with:
name: pllm_cu124_py${{ matrix.python-version }}.tgz
path: |
pipeline/build/pllm_cu124_py${{ matrix.python-version }}.tgz

- name: Upload Release Asset
uses: softprops/action-gh-release@v2
if: startsWith(github.ref, 'refs/tags/')
with:
files: |
pipeline/build/pllm_cu124_py${{ matrix.python-version }}.tgz

1 change: 0 additions & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
[submodule "3rdparty/flashinfer"]
path = 3rdparty/flashinfer
url = https://github.com/happierpig/flashinfer-ai.git

[submodule "3rdparty/nvbench"]
path = 3rdparty/nvbench
url = https://github.com/NVIDIA/nvbench.git
Expand Down
2 changes: 1 addition & 1 deletion 3rdparty/mscclpp
Submodule mscclpp updated 100 files
14 changes: 6 additions & 8 deletions pipeline/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ set(NVBENCH_DIR ${PARENT_DIR}/3rdparty/nvbench CACHE PATH "Path to nvbench src")
set(GTEST_DIR ${PARENT_DIR}/3rdparty/googletest CACHE PATH "Path to googletest src")
set(CUTLASS_DIR ${PARENT_DIR}/3rdparty/cutlass CACHE PATH "Path to modified cutlass src")
set(SPDLOG_DIR ${PARENT_DIR}/3rdparty/spdlog CACHE PATH "Path to spdlog src")
set(MSCCLPP_DIR /usr/local/mscclpp CACHE PATH "Path to mscclpp install")
set(MSCCLPP_DIR ${PARENT_DIR}/3rdparty/mscclpp CACHE PATH "Path to mscclpp install")
set(FLASHINFER_DIR ${PARENT_DIR}/3rdparty/flashinfer CACHE PATH "Path to flashinfer src")
# override by SMALL_FLASHINFER
#set(FLASHINFER_DIR ${PARENT_DIR}/3rdparty/flashinfer CACHE PATH "Path to flashinfer src")
Expand All @@ -36,6 +36,7 @@ if (NOT MPI_FOUND)
endif()

add_subdirectory(${SPDLOG_DIR} spdlog)
add_subdirectory(${MSCCLPP_DIR} mscclpp)

find_package(pybind11 REQUIRED)
if (NOT pybind11_FOUND)
Expand All @@ -45,21 +46,18 @@ endif()
find_program(STUBGEN_EXECUTABLE NAMES stubgen)

add_compile_options(-w)
include_directories(${MSCCLPP_DIR}/include)

include_directories(${CUTLASS_DIR}/include)
include_directories(${CUTLASS_DIR}/tools/util/include)
# override by SMALL_FLASHINFER
#include_directories(${FLASHINFER_DIR}/include)
# ------------- Build Network Test -----------------#

find_library(MSCCLPP_LIBRARY NAMES mscclpp PATHS ${MSCCLPP_DIR}/lib NO_DEFAULT_PATH)

add_executable(test_comm "${CMAKE_SOURCE_DIR}/src/comm_test.cu" "${CMAKE_SOURCE_DIR}/src/comm.cu")
target_include_directories(test_comm PRIVATE ${CMAKE_SOURCE_DIR}/include)
target_include_directories(test_comm PRIVATE ${PARENT_DIR}/3rdparty/cutlass/include)
target_link_libraries(test_comm PRIVATE MPI::MPI_CXX)
target_link_libraries(test_comm PRIVATE ${MSCCLPP_LIBRARY})
target_link_libraries(test_comm PRIVATE mscclpp_static)
target_link_libraries(test_comm PRIVATE spdlog::spdlog)
target_compile_definitions(test_comm PRIVATE -DENABLE_MPI)

Expand Down Expand Up @@ -193,7 +191,7 @@ target_include_directories(shared_lib PRIVATE ${PARENT_DIR}/3rdparty/cutlass/inc
target_include_directories(shared_lib PRIVATE ${PARENT_DIR}/3rdparty/cutlass/tools/util/include)
target_include_directories(shared_lib PRIVATE ${SMALL_FLASHINFER_DIR}/include ${FLASHINFER_DIR}/include)
target_link_libraries(shared_lib PRIVATE decode_kernels prefill_kernels)
target_link_libraries(shared_lib PRIVATE ${MSCCLPP_LIBRARY})
target_link_libraries(shared_lib PRIVATE mscclpp_static)
target_link_libraries(shared_lib PRIVATE MPI::MPI_CXX)
target_link_libraries(shared_lib PRIVATE spdlog::spdlog)
target_link_libraries(shared_lib PRIVATE gemm_lib)
Expand All @@ -210,7 +208,7 @@ target_include_directories(test_compute PRIVATE ${CMAKE_SOURCE_DIR}/include)
target_include_directories(test_compute PRIVATE ${SMALL_FLASHINFER_DIR}/include ${FLASHINFER_DIR}/include)
target_link_libraries(test_compute PRIVATE decode_kernels prefill_kernels)
# Include Network Libraries
target_link_libraries(test_compute PRIVATE ${MSCCLPP_LIBRARY})
target_link_libraries(test_compute PRIVATE mscclpp_static)
target_link_libraries(test_compute PRIVATE MPI::MPI_CXX)
# Include Log Libraries
target_link_libraries(test_compute PRIVATE spdlog::spdlog)
Expand All @@ -229,7 +227,7 @@ target_include_directories(pllm_python PRIVATE ${CMAKE_SOURCE_DIR}/include)
target_include_directories(pllm_python PRIVATE ${SMALL_FLASHINFER_DIR}/include ${FLASHINFER_DIR}/include)
target_link_libraries(pllm_python PRIVATE decode_kernels prefill_kernels)
# Include Network Libraries
target_link_libraries(pllm_python PRIVATE ${MSCCLPP_LIBRARY})
target_link_libraries(pllm_python PRIVATE mscclpp_static)
target_link_libraries(pllm_python PRIVATE MPI::MPI_CXX)
# Include Log Libraries
target_link_libraries(pllm_python PRIVATE spdlog::spdlog)
Expand Down
12 changes: 6 additions & 6 deletions pipeline/utils/kv_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from pybindUtil import toGPU, toGPUTensor
from torch.profiler import profile, record_function, ProfilerActivity
import time

from typing import List


class DistKVPool:
Expand Down Expand Up @@ -73,15 +73,15 @@ class DistKVCache:

def __init__(self, pool: DistKVPool):
self._pool = pool
self._indices : list[int] = []
self._indices : List[int] = []
self._seqlen : int = 0

@property
def seqlen(self) -> int:
return self._seqlen

@property
def indicies(self) -> list[int]:
def indicies(self) -> List[int]:
return self._indices

@property
Expand Down Expand Up @@ -116,11 +116,11 @@ def __init__(self, decode_kvs: Sequence[DistKVCache], prefill_kvs: Sequence[Dist
"""
# batch_size = len(decode_kvs) + len(prefill_kvs)
# [batch_size + 1,]
self._kv_indptr : list[int] = [0]
self._kv_indptr : List[int] = [0]
# [num_pages_in_total, ]
self._kv_indices : list[int] = []
self._kv_indices : List[int] = []
# [batch_size, ]
self._kv_last_page_len : list[int] = []
self._kv_last_page_len : List[int] = []

# Here we do not materialize data into specific devices,
# for distributed assignment.
Expand Down
7 changes: 4 additions & 3 deletions pipeline/utils/pybindUtil.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import numpy as np
import pllm_python
from torch.profiler import profile, record_function, ProfilerActivity
from typing import List

def toGPU(data, nranks, dtype, non_blocking=True):

Expand Down Expand Up @@ -88,8 +89,8 @@ def initUpdateData(
kv_last_page_len: int,
rev_input_indptr: int,
per_token_offset: int,
gemv_batch_size: list[int],
gemv_block_num: list[int] ) -> list[pllm_python.VortexUpdateData]:
gemv_batch_size: List[int],
gemv_block_num: List[int] ) -> List[pllm_python.VortexUpdateData]:
updateDataList = []
gemv_batch_size = np.array(gemv_batch_size, dtype=np.int32)
gemv_block_num = np.array(gemv_block_num, dtype=np.int32)
Expand Down Expand Up @@ -118,4 +119,4 @@ def initUpdateData(
print("Time taken: ", time.time() - t)




6 changes: 4 additions & 2 deletions pipeline/utils/request_info.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from collections import deque
from typing import List

class NewRequestInfo:
"""
Request info for incoming request
NOTE (Yilong): add support for offloading / onloading KV-Cache
"""
req_idx: int
prompt: list[int]
prompt: List[int]
output_len : int
start_time: float

Expand Down Expand Up @@ -36,7 +38,7 @@ class FlyRequestInfo:
NOTE (Yilong): add support for offloading / onloading KV-Cache
"""

def __init__(self, req_idx: int, input: list[int], output: list[int], prompt: list[int], request_comein_time: float,
def __init__(self, req_idx: int, input: List[int], output: List[int], prompt: List[int], request_comein_time: float,
chunked_prefill: bool, kv_cache, encode_latency: float,
decode_start_at: float, decode_latency: float, output_len: int, input_len: int):
self.req_idx = req_idx
Expand Down
15 changes: 8 additions & 7 deletions pipeline/utils/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import pickle
from collections import deque
from typing import List

from kv_cache import DistKVPool, DistKVCache, BatchedDistKVCache
from request_info import NewRequestInfo, NewRequestQueue, FlyRequestInfo
Expand All @@ -38,7 +39,7 @@ class WorkingSet:
"""

def __init__(self) -> None:
self._set : list[FlyRequestInfo] = []
self._set : List[FlyRequestInfo] = []

def put(self, req: FlyRequestInfo):
self._set.append(req)
Expand Down Expand Up @@ -209,12 +210,12 @@ def bench_text_gen(self, retired_rq, actualRun = True):
self._prefill_workset.adjust_kv_cache()
self._decode_workset.adjust_kv_cache()

input_ids : list[int] = []
input_indptr : list[int] = [0]
prev_len : list[int] = []
input_ids : List[int] = []
input_indptr : List[int] = [0]
prev_len : List[int] = []
decodePrefillBorder = self._decode_workset.effective_bsz
decode_kvs : list[DistKVCache] = []
prefill_kvs : list[DistKVCache] = []
decode_kvs : List[DistKVCache] = []
prefill_kvs : List[DistKVCache] = []

with record_function("calc batch size"):
t3 = time.perf_counter()
Expand Down Expand Up @@ -409,7 +410,7 @@ def print_and_to_file(string, f):
scheduler = Scheduler(pool, request_manager.avaliable_request_queue)
scheduler.init_pipe(args.config_path)

retired_rq : list[FlyRequestInfo] = []
retired_rq : List[FlyRequestInfo] = []
totalCycle = 0

skip_cycle = args.skip_cycles
Expand Down
Loading