Skip to content

Commit

Permalink
Merge branch 'master' into layla-build
Browse files Browse the repository at this point in the history
  • Loading branch information
l3utterfly committed Sep 15, 2024
2 parents dd3efc2 + 3c7989f commit ccbcce0
Show file tree
Hide file tree
Showing 88 changed files with 4,541 additions and 3,542 deletions.
16 changes: 8 additions & 8 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ jobs:
steps:
- name: Clone
id: checkout
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: Dependencies
id: depends
Expand All @@ -401,7 +401,7 @@ jobs:
continue-on-error: true

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4

- name: add oneAPI to apt
shell: bash
Expand Down Expand Up @@ -442,7 +442,7 @@ jobs:
continue-on-error: true

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4

- name: add oneAPI to apt
shell: bash
Expand Down Expand Up @@ -546,7 +546,7 @@ jobs:
steps:
- name: Clone
id: checkout
uses: actions/checkout@v1
uses: actions/checkout@v4

- name: Dependencies
id: depends
Expand Down Expand Up @@ -576,7 +576,7 @@ jobs:
steps:
- name: Clone
id: checkout
uses: actions/checkout@v1
uses: actions/checkout@v4

- name: Dependencies
id: depends
Expand Down Expand Up @@ -610,7 +610,7 @@ jobs:
steps:
- name: Clone
id: checkout
uses: actions/checkout@v1
uses: actions/checkout@v4

- name: Dependencies
id: depends
Expand Down Expand Up @@ -969,14 +969,14 @@ jobs:
steps:
- name: Clone
id: checkout
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: Install
id: depends
run: |
$ErrorActionPreference = "Stop"
write-host "Downloading AMD HIP SDK Installer"
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-23.Q4-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
write-host "Installing AMD HIP SDK"
Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
write-host "Completed AMD HIP SDK installation"
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/server.yml
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ jobs:
if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }}
run: |
cd examples/server/tests
$env:PYTHONIOENCODING = ":replace"
behave.exe --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp
- name: Slow tests
Expand Down
8 changes: 7 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -139,10 +139,16 @@ set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location o
# determining _precisely_ which defines are necessary for the llama-config
# package.
#
set(GGML_TRANSIENT_DEFINES)
get_target_property(GGML_DIRECTORY ggml SOURCE_DIR)
get_directory_property(GGML_DIR_DEFINES DIRECTORY ${GGML_DIRECTORY} COMPILE_DEFINITIONS)
if (GGML_DIR_DEFINES)
list(APPEND GGML_TRANSIENT_DEFINES ${GGML_DIR_DEFINES})
endif()
get_target_property(GGML_TARGET_DEFINES ggml COMPILE_DEFINITIONS)
set(GGML_TRANSIENT_DEFINES ${GGML_TARGET_DEFINES} ${GGML_DIR_DEFINES})
if (GGML_TARGET_DEFINES)
list(APPEND GGML_TRANSIENT_DEFINES ${GGML_TARGET_DEFINES})
endif()
get_target_property(GGML_LINK_LIBRARIES ggml LINK_LIBRARIES)

set_target_properties(llama PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/include/llama.h)
Expand Down
17 changes: 14 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,7 @@ endif
# TODO: probably these flags need to be tweaked on some architectures
# feel free to update the Makefile for your architecture and send a pull request or issue

ifndef RISCV
ifndef RISCV_CROSS_COMPILE

ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
# Use all CPU extensions that are available:
Expand Down Expand Up @@ -514,7 +514,12 @@ ifneq ($(filter loongarch64%,$(UNAME_M)),)
MK_CXXFLAGS += -mlasx
endif

else
ifneq ($(filter riscv64%,$(UNAME_M)),)
MK_CFLAGS += -march=rv64gcv -mabi=lp64d
MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d
endif

else # RISC-V CROSS COMPILATION
MK_CFLAGS += -march=rv64gcv -mabi=lp64d
MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d
endif
Expand Down Expand Up @@ -925,6 +930,7 @@ OBJ_LLAMA = \

OBJ_COMMON = \
common/common.o \
common/arg.o \
common/console.o \
common/ngram-cache.o \
common/sampling.o \
Expand Down Expand Up @@ -1157,6 +1163,11 @@ common/common.o: \
include/llama.h
$(CXX) $(CXXFLAGS) -c $< -o $@

common/arg.o: \
common/arg.cpp \
common/arg.h
$(CXX) $(CXXFLAGS) -c $< -o $@

common/sampling.o: \
common/sampling.cpp \
common/sampling.h \
Expand Down Expand Up @@ -1429,6 +1440,7 @@ llama-server: \
examples/server/system-prompts.js.hpp \
examples/server/prompt-formats.js.hpp \
examples/server/json-schema-to-grammar.mjs.hpp \
examples/server/loading.html.hpp \
common/json.hpp \
common/stb_image.h \
$(OBJ_ALL)
Expand All @@ -1448,7 +1460,6 @@ llama-gen-docs: examples/gen-docs/gen-docs.cpp \
$(OBJ_ALL)
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
./llama-gen-docs

libllava.a: examples/llava/llava.cpp \
examples/llava/llava.h \
Expand Down
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others)

## Hot topics

- *add hot topics here*
- Huggingface GGUF editor: [discussion](https://github.com/ggerganov/llama.cpp/discussions/9268) | [tool](https://huggingface.co/spaces/CISCai/gguf-editor)

----

Expand Down Expand Up @@ -89,6 +89,7 @@ Typically finetunes of the base models below are supported as well.
- [x] [SmolLM](https://huggingface.co/collections/HuggingFaceTB/smollm-6695016cad7167254ce15966)
- [x] [EXAONE-3.0-7.8B-Instruct](https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct)
- [x] [FalconMamba Models](https://huggingface.co/collections/tiiuae/falconmamba-7b-66b9a580324dd1598b0f6d4a)
- [x] [Jais](https://huggingface.co/inceptionai/jais-13b-chat)

(instructions for supporting more models: [HOWTO-add-model.md](./docs/development/HOWTO-add-model.md))

Expand Down Expand Up @@ -163,6 +164,7 @@ Unless otherwise noted these projects are open-source with permissive licensing:
- [AI Sublime Text plugin](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (MIT)
- [AIKit](https://github.com/sozercan/aikit) (MIT)
- [LARS - The LLM & Advanced Referencing Solution](https://github.com/abgulati/LARS) (AGPL)
- [LLMUnity](https://github.com/undreamai/LLMUnity) (MIT)

*(to have a project listed here, it should clearly state that it depends on `llama.cpp`)*

Expand All @@ -171,6 +173,7 @@ Unless otherwise noted these projects are open-source with permissive licensing:
- [akx/ggify](https://github.com/akx/ggify) – download PyTorch models from HuggingFace Hub and convert them to GGML
- [crashr/gppm](https://github.com/crashr/gppm) – launch llama.cpp instances utilizing NVIDIA Tesla P40 or P100 GPUs with reduced idle power consumption
- [gpustack/gguf-parser](https://github.com/gpustack/gguf-parser-go/tree/main/cmd/gguf-parser) - review/check the GGUF file and estimate the memory usage
- [Styled Lines](https://marketplace.unity.com/packages/tools/generative-ai/styled-lines-llama-cpp-model-292902) (proprietary licensed, async wrapper of inference part for game development in Unity3d with prebuild Mobile and Web platform wrappers and a model example)

**Infrastructure:**

Expand Down
2 changes: 2 additions & 0 deletions common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ add_library(${TARGET} STATIC
base64.hpp
common.h
common.cpp
arg.h
arg.cpp
sampling.h
sampling.cpp
console.h
Expand Down
Loading

0 comments on commit ccbcce0

Please sign in to comment.