forked from ggerganov/llama.cpp
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' into layla-build
- Loading branch information
Showing
137 changed files
with
27,035 additions
and
11,292 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
ARG UBUNTU_VERSION=22.04 | ||
|
||
# This needs to generally match the container host's environment. | ||
ARG ROCM_VERSION=5.6 | ||
|
||
# Target the CUDA build image | ||
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete | ||
|
||
FROM ${BASE_ROCM_DEV_CONTAINER} as build | ||
|
||
# Unless otherwise specified, we make a fat build. | ||
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878 | ||
# This is mostly tied to rocBLAS supported archs. | ||
ARG ROCM_DOCKER_ARCH=\ | ||
gfx803 \ | ||
gfx900 \ | ||
gfx906 \ | ||
gfx908 \ | ||
gfx90a \ | ||
gfx1010 \ | ||
gfx1030 \ | ||
gfx1100 \ | ||
gfx1101 \ | ||
gfx1102 | ||
|
||
COPY requirements.txt requirements.txt | ||
|
||
RUN pip install --upgrade pip setuptools wheel \ | ||
&& pip install -r requirements.txt | ||
|
||
WORKDIR /app | ||
|
||
COPY . . | ||
|
||
# Set nvcc architecture | ||
ENV GPU_TARGETS=${ROCM_DOCKER_ARCH} | ||
# Enable ROCm | ||
ENV LLAMA_HIPBLAS=1 | ||
ENV CC=/opt/rocm/llvm/bin/clang | ||
ENV CXX=/opt/rocm/llvm/bin/clang++ | ||
|
||
RUN make | ||
|
||
ENTRYPOINT ["/app/.devops/tools.sh"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
# SRPM for building from source and packaging an RPM for RPM-based distros. | ||
# https://fedoraproject.org/wiki/How_to_create_an_RPM_package | ||
# Built and maintained by John Boero - [email protected] | ||
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal | ||
|
||
# Notes for llama.cpp: | ||
# 1. Tags are currently based on hash - which will not sort asciibetically. | ||
# We need to declare standard versioning if people want to sort latest releases. | ||
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies. | ||
# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed. | ||
# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo | ||
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries. | ||
# It is up to the user to install the correct vendor-specific support. | ||
|
||
Name: llama.cpp-clblast | ||
Version: %( date "+%%Y%%m%%d" ) | ||
Release: 1%{?dist} | ||
Summary: OpenCL Inference of LLaMA model in C/C++ | ||
License: MIT | ||
Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz | ||
BuildRequires: coreutils make gcc-c++ git mesa-libOpenCL-devel clblast-devel | ||
Requires: clblast | ||
URL: https://github.com/ggerganov/llama.cpp | ||
|
||
%define debug_package %{nil} | ||
%define source_date_epoch_from_changelog 0 | ||
|
||
%description | ||
CPU inference for Meta's Lllama2 models using default options. | ||
|
||
%prep | ||
%setup -n llama.cpp-master | ||
|
||
%build | ||
make -j LLAMA_CLBLAST=1 | ||
|
||
%install | ||
mkdir -p %{buildroot}%{_bindir}/ | ||
cp -p main %{buildroot}%{_bindir}/llamaclblast | ||
cp -p server %{buildroot}%{_bindir}/llamaclblastserver | ||
cp -p simple %{buildroot}%{_bindir}/llamaclblastsimple | ||
|
||
mkdir -p %{buildroot}/usr/lib/systemd/system | ||
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamaclblast.service | ||
[Unit] | ||
Description=Llama.cpp server, CPU only (no GPU support in this build). | ||
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target | ||
|
||
[Service] | ||
Type=simple | ||
EnvironmentFile=/etc/sysconfig/llama | ||
ExecStart=/usr/bin/llamaclblastserver $LLAMA_ARGS | ||
ExecReload=/bin/kill -s HUP $MAINPID | ||
Restart=never | ||
|
||
[Install] | ||
WantedBy=default.target | ||
EOF | ||
|
||
mkdir -p %{buildroot}/etc/sysconfig | ||
%{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama | ||
LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin" | ||
EOF | ||
|
||
%clean | ||
rm -rf %{buildroot} | ||
rm -rf %{_builddir}/* | ||
|
||
%files | ||
%{_bindir}/llamaclblast | ||
%{_bindir}/llamaclblastserver | ||
%{_bindir}/llamaclblastsimple | ||
/usr/lib/systemd/system/llamaclblast.service | ||
%config /etc/sysconfig/llama | ||
|
||
|
||
%pre | ||
|
||
%post | ||
|
||
%preun | ||
%postun | ||
|
||
%changelog |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
# SRPM for building from source and packaging an RPM for RPM-based distros. | ||
# https://fedoraproject.org/wiki/How_to_create_an_RPM_package | ||
# Built and maintained by John Boero - [email protected] | ||
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal | ||
|
||
# Notes for llama.cpp: | ||
# 1. Tags are currently based on hash - which will not sort asciibetically. | ||
# We need to declare standard versioning if people want to sort latest releases. | ||
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies. | ||
# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed. | ||
# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo | ||
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries. | ||
# It is up to the user to install the correct vendor-specific support. | ||
|
||
Name: llama.cpp-cublas | ||
Version: %( date "+%%Y%%m%%d" ) | ||
Release: 1%{?dist} | ||
Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL) | ||
License: MIT | ||
Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz | ||
BuildRequires: coreutils make gcc-c++ git cuda-toolkit | ||
Requires: cuda-toolkit | ||
URL: https://github.com/ggerganov/llama.cpp | ||
|
||
%define debug_package %{nil} | ||
%define source_date_epoch_from_changelog 0 | ||
|
||
%description | ||
CPU inference for Meta's Lllama2 models using default options. | ||
|
||
%prep | ||
%setup -n llama.cpp-master | ||
|
||
%build | ||
make -j LLAMA_CUBLAS=1 | ||
|
||
%install | ||
mkdir -p %{buildroot}%{_bindir}/ | ||
cp -p main %{buildroot}%{_bindir}/llamacppcublas | ||
cp -p server %{buildroot}%{_bindir}/llamacppcublasserver | ||
cp -p simple %{buildroot}%{_bindir}/llamacppcublassimple | ||
|
||
mkdir -p %{buildroot}/usr/lib/systemd/system | ||
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamacublas.service | ||
[Unit] | ||
Description=Llama.cpp server, CPU only (no GPU support in this build). | ||
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target | ||
|
||
[Service] | ||
Type=simple | ||
EnvironmentFile=/etc/sysconfig/llama | ||
ExecStart=/usr/bin/llamacppcublasserver $LLAMA_ARGS | ||
ExecReload=/bin/kill -s HUP $MAINPID | ||
Restart=never | ||
|
||
[Install] | ||
WantedBy=default.target | ||
EOF | ||
|
||
mkdir -p %{buildroot}/etc/sysconfig | ||
%{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama | ||
LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin" | ||
EOF | ||
|
||
%clean | ||
rm -rf %{buildroot} | ||
rm -rf %{_builddir}/* | ||
|
||
%files | ||
%{_bindir}/llamacppcublas | ||
%{_bindir}/llamacppcublasserver | ||
%{_bindir}/llamacppcublassimple | ||
/usr/lib/systemd/system/llamacublas.service | ||
%config /etc/sysconfig/llama | ||
|
||
%pre | ||
|
||
%post | ||
|
||
%preun | ||
%postun | ||
|
||
%changelog |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
# SRPM for building from source and packaging an RPM for RPM-based distros. | ||
# https://fedoraproject.org/wiki/How_to_create_an_RPM_package | ||
# Built and maintained by John Boero - [email protected] | ||
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal | ||
|
||
# Notes for llama.cpp: | ||
# 1. Tags are currently based on hash - which will not sort asciibetically. | ||
# We need to declare standard versioning if people want to sort latest releases. | ||
# In the meantime, YYYYMMDD format will be used. | ||
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies. | ||
# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed. | ||
# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo | ||
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries. | ||
# It is up to the user to install the correct vendor-specific support. | ||
|
||
Name: llama.cpp | ||
Version: %( date "+%%Y%%m%%d" ) | ||
Release: 1%{?dist} | ||
Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL) | ||
License: MIT | ||
Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz | ||
BuildRequires: coreutils make gcc-c++ git libstdc++-devel | ||
Requires: libstdc++ | ||
URL: https://github.com/ggerganov/llama.cpp | ||
|
||
%define debug_package %{nil} | ||
%define source_date_epoch_from_changelog 0 | ||
|
||
%description | ||
CPU inference for Meta's Lllama2 models using default options. | ||
Models are not included in this package and must be downloaded separately. | ||
|
||
%prep | ||
%setup -n llama.cpp-master | ||
|
||
%build | ||
make -j | ||
|
||
%install | ||
mkdir -p %{buildroot}%{_bindir}/ | ||
cp -p main %{buildroot}%{_bindir}/llama | ||
cp -p server %{buildroot}%{_bindir}/llamaserver | ||
cp -p simple %{buildroot}%{_bindir}/llamasimple | ||
|
||
mkdir -p %{buildroot}/usr/lib/systemd/system | ||
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llama.service | ||
[Unit] | ||
Description=Llama.cpp server, CPU only (no GPU support in this build). | ||
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target | ||
|
||
[Service] | ||
Type=simple | ||
EnvironmentFile=/etc/sysconfig/llama | ||
ExecStart=/usr/bin/llamaserver $LLAMA_ARGS | ||
ExecReload=/bin/kill -s HUP $MAINPID | ||
Restart=never | ||
|
||
[Install] | ||
WantedBy=default.target | ||
EOF | ||
|
||
mkdir -p %{buildroot}/etc/sysconfig | ||
%{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama | ||
LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin" | ||
EOF | ||
|
||
%clean | ||
rm -rf %{buildroot} | ||
rm -rf %{_builddir}/* | ||
|
||
%files | ||
%{_bindir}/llama | ||
%{_bindir}/llamaserver | ||
%{_bindir}/llamasimple | ||
/usr/lib/systemd/system/llama.service | ||
%config /etc/sysconfig/llama | ||
|
||
%pre | ||
|
||
%post | ||
|
||
%preun | ||
%postun | ||
|
||
%changelog |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
ARG UBUNTU_VERSION=22.04 | ||
|
||
# This needs to generally match the container host's environment. | ||
ARG ROCM_VERSION=5.6 | ||
|
||
# Target the CUDA build image | ||
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete | ||
|
||
FROM ${BASE_ROCM_DEV_CONTAINER} as build | ||
|
||
# Unless otherwise specified, we make a fat build. | ||
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878 | ||
# This is mostly tied to rocBLAS supported archs. | ||
ARG ROCM_DOCKER_ARCH=\ | ||
gfx803 \ | ||
gfx900 \ | ||
gfx906 \ | ||
gfx908 \ | ||
gfx90a \ | ||
gfx1010 \ | ||
gfx1030 \ | ||
gfx1100 \ | ||
gfx1101 \ | ||
gfx1102 | ||
|
||
COPY requirements.txt requirements.txt | ||
|
||
RUN pip install --upgrade pip setuptools wheel \ | ||
&& pip install -r requirements.txt | ||
|
||
WORKDIR /app | ||
|
||
COPY . . | ||
|
||
# Set nvcc architecture | ||
ENV GPU_TARGETS=${ROCM_DOCKER_ARCH} | ||
# Enable ROCm | ||
ENV LLAMA_HIPBLAS=1 | ||
ENV CC=/opt/rocm/llvm/bin/clang | ||
ENV CXX=/opt/rocm/llvm/bin/clang++ | ||
|
||
RUN make | ||
|
||
ENTRYPOINT [ "/app/main" ] |
Oops, something went wrong.