Skip to content

Commit

Permalink
Merge branch 'master' into cuda-cublas-opts
Browse files Browse the repository at this point in the history
  • Loading branch information
ggerganov committed Dec 5, 2023
2 parents c830a05 + 5aa365d commit 66a8dd3
Show file tree
Hide file tree
Showing 55 changed files with 2,420 additions and 513 deletions.
4 changes: 4 additions & 0 deletions .devops/tools.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
./quantize "$@"
elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
./main "$@"
elif [[ "$arg1" == '--finetune' || "$arg1" == '-f' ]]; then
./finetune "$@"
elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
echo "Converting PTH to GGML..."
for i in `ls $1/$2/ggml-model-f16.bin*`; do
Expand All @@ -34,6 +36,8 @@ else
echo " ex: --outtype f16 \"/models/7B/\" "
echo " --quantize (-q): Optimize with quantization process ggml"
echo " ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2"
echo " --finetune (-f): Run finetune command to create a lora finetune of the model"
echo " See documentation for finetune for command-line parameters"
echo " --all-in-one (-a): Execute --convert & --quantize"
echo " ex: \"/models/\" 7B"
echo " --server (-s): Run a model on the server"
Expand Down
11 changes: 11 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,17 @@ jobs:
path: |
cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
ios-xcode-build:
runs-on: macos-latest

steps:
- name: Checkout code
uses: actions/checkout@v3

- name: Build Xcode project
run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build


# freeBSD-latest:
# runs-on: macos-12
# steps:
Expand Down
25 changes: 13 additions & 12 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -88,15 +88,16 @@ poetry.lock
poetry.toml

# Test binaries
tests/test-grammar-parser
tests/test-llama-grammar
tests/test-double-float
tests/test-grad0
tests/test-opt
tests/test-quantize-fns
tests/test-quantize-perf
tests/test-sampling
tests/test-tokenizer-0-llama
tests/test-tokenizer-0-falcon
tests/test-tokenizer-1-llama
tests/test-tokenizer-1-bpe
/tests/test-grammar-parser
/tests/test-llama-grammar
/tests/test-double-float
/tests/test-grad0
/tests/test-opt
/tests/test-quantize-fns
/tests/test-quantize-perf
/tests/test-sampling
/tests/test-tokenizer-0-llama
/tests/test-tokenizer-0-falcon
/tests/test-tokenizer-1-llama
/tests/test-tokenizer-1-bpe
/tests/test-rope
11 changes: 10 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ else()
endif()

# general
option(BUILD_SHARED_LIBS "build shared libraries" OFF)
option(LLAMA_STATIC "llama: static link libraries" OFF)
option(LLAMA_NATIVE "llama: enable -march=native flag" ON)
option(LLAMA_LTO "llama: enable link time optimization" OFF)
Expand Down Expand Up @@ -100,6 +101,9 @@ option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALO
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
option(LLAMA_BUILD_SERVER "llama: build server example" ON)

# Required for relocatable CMake package
include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake)

#
# Compile flags
#
Expand All @@ -112,6 +116,11 @@ set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)
include(CheckCXXCompilerFlag)

# enable libstdc++ assertions for debug builds
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
add_compile_definitions($<$<CONFIG:Debug>:_GLIBCXX_ASSERTIONS>)
endif()

if (NOT MSVC)
if (LLAMA_SANITIZE_THREAD)
add_compile_options(-fsanitize=thread)
Expand Down Expand Up @@ -161,7 +170,7 @@ if (LLAMA_METAL)
#add_compile_definitions(GGML_METAL_DIR_KERNELS="${CMAKE_CURRENT_SOURCE_DIR}/")

# copy ggml-metal.metal to bin directory
configure_file(ggml-metal.metal bin/ggml-metal.metal COPYONLY)
configure_file(ggml-metal.metal ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal COPYONLY)

set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS}
${FOUNDATION_LIBRARY}
Expand Down
29 changes: 18 additions & 11 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ BUILD_TARGETS = \
TEST_TARGETS = \
tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt \
tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama \
tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe
tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe tests/test-rope

# Code coverage output files
COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
Expand All @@ -30,7 +30,7 @@ ifeq '' '$(findstring clang,$(shell $(CC) --version))'
CC_VER := $(shell $(CC) -dumpfullversion -dumpversion | awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }')
else
CC_IS_CLANG=1
ifeq '' '$(findstring Apple LLVM,$(shell $(CC) --version))'
ifeq '' '$(findstring Apple,$(shell $(CC) --version))'
CC_IS_LLVM_CLANG=1
else
CC_IS_APPLE_CLANG=1
Expand Down Expand Up @@ -174,6 +174,10 @@ ifdef LLAMA_DEBUG
MK_CFLAGS += -O0 -g
MK_CXXFLAGS += -O0 -g
MK_LDFLAGS += -g

ifeq ($(UNAME_S),Linux)
MK_CXXFLAGS += -Wp,-D_GLIBCXX_ASSERTIONS
endif
else
MK_CPPFLAGS += -DNDEBUG
endif
Expand Down Expand Up @@ -648,7 +652,7 @@ beam-search: examples/beam-search/beam-search.cpp ggml.o llama.o $(COMMON_DEPS)
finetune: examples/finetune/finetune.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)

export-lora: examples/export-lora/export-lora.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
export-lora: examples/export-lora/export-lora.cpp ggml.o common/common.h $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)

speculative: examples/speculative/speculative.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
Expand Down Expand Up @@ -701,28 +705,28 @@ vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
q8dot: pocs/vdot/q8dot.cpp ggml.o $(OBJS)
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)

tests/test-llama-grammar: tests/test-llama-grammar.cpp ggml.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
tests/test-llama-grammar: tests/test-llama-grammar.cpp ggml.o grammar-parser.o $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)

tests/test-grammar-parser: tests/test-grammar-parser.cpp ggml.o llama.o $(COMMON_DEPS) grammar-parser.o $(OBJS)
tests/test-grammar-parser: tests/test-grammar-parser.cpp ggml.o llama.o grammar-parser.o $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)

tests/test-double-float: tests/test-double-float.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
tests/test-double-float: tests/test-double-float.cpp ggml.o $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)

tests/test-grad0: tests/test-grad0.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
tests/test-grad0: tests/test-grad0.cpp ggml.o $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)

tests/test-opt: tests/test-opt.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
tests/test-opt: tests/test-opt.cpp ggml.o $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)

tests/test-quantize-fns: tests/test-quantize-fns.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
tests/test-quantize-fns: tests/test-quantize-fns.cpp ggml.o $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)

tests/test-quantize-perf: tests/test-quantize-perf.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
tests/test-quantize-perf: tests/test-quantize-perf.cpp ggml.o $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)

tests/test-sampling: tests/test-sampling.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
tests/test-sampling: tests/test-sampling.cpp ggml.o llama.o $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)

tests/test-tokenizer-0-falcon: tests/test-tokenizer-0-falcon.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
Expand All @@ -737,5 +741,8 @@ tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp ggml.o llama.o $(COMM
tests/test-tokenizer-1-llama: tests/test-tokenizer-1-llama.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)

tests/test-rope: tests/test-rope.cpp ggml.o $(OBJS)
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)

tests/test-c.o: tests/test-c.c llama.h
$(CC) $(CFLAGS) -c $(filter-out %.h,$^) -o $@
46 changes: 16 additions & 30 deletions Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -2,59 +2,45 @@

import PackageDescription

#if arch(arm) || arch(arm64)
let platforms: [SupportedPlatform]? = [
.macOS(.v12),
.iOS(.v14),
.watchOS(.v4),
.tvOS(.v14)
]
let exclude: [String] = []
let resources: [Resource] = [
.process("ggml-metal.metal")
]
let additionalSources: [String] = ["ggml-metal.m"]
let additionalSettings: [CSetting] = [
.unsafeFlags(["-fno-objc-arc"]),
.define("GGML_USE_METAL")
]
#else
let platforms: [SupportedPlatform]? = nil
let exclude: [String] = ["ggml-metal.metal"]
let resources: [Resource] = []
let additionalSources: [String] = []
let additionalSettings: [CSetting] = []
#endif

let package = Package(
name: "llama",
platforms: platforms,
platforms: [
.macOS(.v12),
.iOS(.v14),
.watchOS(.v4),
.tvOS(.v14)
],
products: [
.library(name: "llama", targets: ["llama"]),
],
targets: [
.target(
name: "llama",
path: ".",
exclude: exclude,
exclude: [],
sources: [
"ggml.c",
"llama.cpp",
"ggml-alloc.c",
"ggml-backend.c",
"ggml-quants.c",
] + additionalSources,
resources: resources,
"ggml-metal.m",
],
resources: [
.process("ggml-metal.metal")
],
publicHeadersPath: "spm-headers",
cSettings: [
.unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
.define("GGML_USE_ACCELERATE")
.define("GGML_USE_ACCELERATE"),
.unsafeFlags(["-fno-objc-arc"]),
.define("GGML_USE_METAL"),
// NOTE: NEW_LAPACK will required iOS version 16.4+
// We should consider add this in the future when we drop support for iOS 14
// (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc)
// .define("ACCELERATE_NEW_LAPACK"),
// .define("ACCELERATE_LAPACK_ILP64")
] + additionalSettings,
],
linkerSettings: [
.linkedFramework("Accelerate")
]
Expand Down
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ as the main playground for developing new features for the [ggml](https://github
- [nat/openplayground](https://github.com/nat/openplayground)
- [oobabooga/text-generation-webui](https://github.com/oobabooga/text-generation-webui)
- [withcatai/catai](https://github.com/withcatai/catai)
- [semperai/amica](https://github.com/semperai/amica)
- [psugihara/FreeChat](https://github.com/psugihara/FreeChat)

---

Expand Down Expand Up @@ -322,7 +324,7 @@ mpirun -hostfile hostfile -n 3 ./main -m ./models/7B/ggml-model-q4_0.gguf -n 128
### BLAS Build
Building the program with BLAS support may lead to some performance improvements in prompt processing using batch sizes higher than 32 (the default is 512). BLAS doesn't affect the normal generation performance. There are currently three different implementations of it:
Building the program with BLAS support may lead to some performance improvements in prompt processing using batch sizes higher than 32 (the default is 512). Support with CPU-only BLAS implementations doesn't affect the normal generation performance. We may see generation performance improvements with GPU-involved BLAS implementations, e.g. cuBLAS, hipBLAS and CLBlast. There are currently several different BLAS implementations available for build and use:

- #### Accelerate Framework:

Expand Down Expand Up @@ -894,7 +896,7 @@ Additionally, there the following images, similar to the above:
- `ghcr.io/ggerganov/llama.cpp:full-rocm`: Same as `full` but compiled with ROCm support. (platforms: `linux/amd64`, `linux/arm64`)
- `ghcr.io/ggerganov/llama.cpp:light-rocm`: Same as `light` but compiled with ROCm support. (platforms: `linux/amd64`, `linux/arm64`)

The GPU enabled images are not currently tested by CI beyond being built. They are not built with any variation from the ones in the Dockerfiles defined in [.devops/](.devops/) and the Gitlab Action defined in [.github/workflows/docker.yml](.github/workflows/docker.yml). If you need different settings (for example, a different CUDA or ROCm library, you'll need to build the images locally for now).
The GPU enabled images are not currently tested by CI beyond being built. They are not built with any variation from the ones in the Dockerfiles defined in [.devops/](.devops/) and the GitHub Action defined in [.github/workflows/docker.yml](.github/workflows/docker.yml). If you need different settings (for example, a different CUDA or ROCm library, you'll need to build the images locally for now).
#### Usage
Expand Down
9 changes: 7 additions & 2 deletions common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,12 @@ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/../.git")
if(NOT IS_DIRECTORY "${GIT_DIR}")
file(READ ${GIT_DIR} REAL_GIT_DIR_LINK)
string(REGEX REPLACE "gitdir: (.*)\n$" "\\1" REAL_GIT_DIR ${REAL_GIT_DIR_LINK})
set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../${REAL_GIT_DIR}")
string(FIND "${REAL_GIT_DIR}" "/" SLASH_POS)
if (SLASH_POS EQUAL 0)
set(GIT_DIR "${REAL_GIT_DIR}")
else()
set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../${REAL_GIT_DIR}")
endif()
endif()

set(GIT_INDEX "${GIT_DIR}/index")
Expand All @@ -26,7 +31,7 @@ add_custom_command(
COMMENT "Generating build details from Git"
COMMAND ${CMAKE_COMMAND} -DMSVC=${MSVC} -DCMAKE_C_COMPILER_VERSION=${CMAKE_C_COMPILER_VERSION}
-DCMAKE_C_COMPILER_ID=${CMAKE_C_COMPILER_ID} -DCMAKE_VS_PLATFORM_NAME=${CMAKE_VS_PLATFORM_NAME}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -P "${CMAKE_CURRENT_SOURCE_DIR}/../scripts/build-info.cmake"
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -P "${CMAKE_CURRENT_SOURCE_DIR}/../scripts/gen-build-info-cpp.cmake"
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/.."
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp.in" ${GIT_INDEX}
VERBATIM
Expand Down
Loading

0 comments on commit 66a8dd3

Please sign in to comment.