Skip to content

Commit

Permalink
Merge branch 'Mozilla-Ocho:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
tybalex authored Dec 9, 2024
2 parents bfb377e + 9b03e32 commit c04039e
Show file tree
Hide file tree
Showing 762 changed files with 3,095,670 additions and 44,433 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
/cosmocc
/perf.data
/perf.data.old
/trace.json

/*.log
*.DS_Store
Expand Down
19 changes: 15 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,36 +11,42 @@ MAKEFLAGS += --no-builtin-rules
include build/config.mk
include build/rules.mk

include third_party/BUILD.mk
include llamafile/BUILD.mk
include llama.cpp/BUILD.mk
include stable-diffusion.cpp/BUILD.mk
include double-conversion/BUILD.mk
include stb/BUILD.mk
include whisper.cpp/BUILD.mk

# the root package is `o//` by default
# building a package also builds its sub-packages
.PHONY: o/$(MODE)/
o/$(MODE)/: o/$(MODE)/llamafile \
o/$(MODE)/llama.cpp \
o/$(MODE)/stb \
o/$(MODE)/stable-diffusion.cpp \
o/$(MODE)/whisper.cpp \
o/$(MODE)/third_party \
o/$(MODE)/depend.test

# for installing to `make PREFIX=/usr/local`
.PHONY: install
install: llamafile/zipalign.1 \
llamafile/server/main.1 \
llama.cpp/main/main.1 \
llama.cpp/imatrix/imatrix.1 \
llama.cpp/quantize/quantize.1 \
llama.cpp/perplexity/perplexity.1 \
llama.cpp/llava/llava-quantize.1 \
whisper.cpp/main.1 \
o/$(MODE)/llamafile/zipalign \
o/$(MODE)/llamafile/tokenize \
o/$(MODE)/llama.cpp/main/main \
o/$(MODE)/llama.cpp/imatrix/imatrix \
o/$(MODE)/llama.cpp/quantize/quantize \
o/$(MODE)/llama.cpp/llama-bench/llama-bench \
o/$(MODE)/llama.cpp/perplexity/perplexity \
o/$(MODE)/llama.cpp/llava/llava-quantize
o/$(MODE)/llama.cpp/llava/llava-quantize \
o/$(MODE)/whisper.cpp/main \
o/$(MODE)/llamafile/server/main
mkdir -p $(PREFIX)/bin
$(INSTALL) o/$(MODE)/llamafile/zipalign $(PREFIX)/bin/zipalign
$(INSTALL) o/$(MODE)/llamafile/tokenize $(PREFIX)/bin/llamafile-tokenize
Expand All @@ -52,13 +58,18 @@ install: llamafile/zipalign.1 \
$(INSTALL) build/llamafile-upgrade-engine $(PREFIX)/bin/llamafile-upgrade-engine
$(INSTALL) o/$(MODE)/llama.cpp/perplexity/perplexity $(PREFIX)/bin/llamafile-perplexity
$(INSTALL) o/$(MODE)/llama.cpp/llava/llava-quantize $(PREFIX)/bin/llava-quantize
$(INSTALL) o/$(MODE)/llamafile/server/main $(PREFIX)/bin/llamafiler
$(INSTALL) o/$(MODE)/stable-diffusion.cpp/main $(PREFIX)/bin/sdfile
$(INSTALL) o/$(MODE)/whisper.cpp/main $(PREFIX)/bin/whisperfile
mkdir -p $(PREFIX)/share/man/man1
$(INSTALL) -m 0644 llamafile/zipalign.1 $(PREFIX)/share/man/man1/zipalign.1
$(INSTALL) -m 0644 llamafile/server/main.1 $(PREFIX)/share/man/man1/llamafiler.1
$(INSTALL) -m 0644 llama.cpp/main/main.1 $(PREFIX)/share/man/man1/llamafile.1
$(INSTALL) -m 0644 llama.cpp/imatrix/imatrix.1 $(PREFIX)/share/man/man1/llamafile-imatrix.1
$(INSTALL) -m 0644 llama.cpp/quantize/quantize.1 $(PREFIX)/share/man/man1/llamafile-quantize.1
$(INSTALL) -m 0644 llama.cpp/perplexity/perplexity.1 $(PREFIX)/share/man/man1/llamafile-perplexity.1
$(INSTALL) -m 0644 llama.cpp/llava/llava-quantize.1 $(PREFIX)/share/man/man1/llava-quantize.1
$(INSTALL) -m 0644 whisper.cpp/main.1 $(PREFIX)/share/man/man1/whisperfile.1

.PHONY: check
check: o/$(MODE)/llamafile/check
Expand Down
97 changes: 67 additions & 30 deletions README.md

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions build/config.mk
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,21 @@
#── vi: set noet ft=make ts=8 sw=8 fenc=utf-8 :vi ────────────────────┘

PREFIX = /usr/local
COSMOCC = .cosmocc/3.5.1
COSMOCC = .cosmocc/3.9.7
TOOLCHAIN = $(COSMOCC)/bin/cosmo

AR = $(TOOLCHAIN)ar
CC = $(TOOLCHAIN)cc
CXX = $(TOOLCHAIN)c++
AR = $(COSMOCC)/bin/ar.ape
ZIPOBJ = $(COSMOCC)/bin/zipobj
MKDEPS = $(COSMOCC)/bin/mkdeps
INSTALL = install

ARFLAGS = rcsD
CXXFLAGS = -frtti -std=gnu++23
CCFLAGS = -g -ggdb -O3 -fexceptions -fsignaling-nans -ffunction-sections -fdata-sections
CCFLAGS = -O2 -g -fexceptions -ffunction-sections -fdata-sections -mclang
CPPFLAGS_ = -iquote. -mcosmo -DGGML_MULTIPLATFORM -Wno-attributes -DLLAMAFILE_DEBUG
TARGET_ARCH = -Xx86_64-mavx -Xx86_64-mtune=znver4
TARGET_ARCH = -Xx86_64-mtune=znver4

TMPDIR = o//tmp
IGNORE := $(shell mkdir -p $(TMPDIR))
Expand Down Expand Up @@ -52,5 +52,5 @@ clean:; rm -rf o
.PHONY: distclean
distclean:; rm -rf o .cosmocc

.cosmocc/3.5.1:
build/download-cosmocc.sh $@ 3.5.1 ea1f47cd4ead6ce3038551be164ad357bd45a4b5b7824871c561d2af23f871d6
.cosmocc/3.9.7:
build/download-cosmocc.sh $@ 3.9.7 3f559555d08ece35bab1a66293a2101f359ac9841d563419756efa9c79f7a150
Binary file added build/gperf
Binary file not shown.
16 changes: 13 additions & 3 deletions build/rules.mk
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,14 @@ LINK.o = $(CXX) $(CCFLAGS) $(LDFLAGS)
COMPILE.c = $(CC) $(CCFLAGS) $(CFLAGS) $(CPPFLAGS_) $(CPPFLAGS) $(TARGET_ARCH) -c
COMPILE.cc = $(CXX) $(CCFLAGS) $(CXXFLAGS) $(CPPFLAGS_) $(CPPFLAGS) $(TARGET_ARCH) -c

o/$(MODE)/%.a:
$(AR) $(ARFLAGS) $@ $^

o/$(MODE)/%.o: %.c $(COSMOCC)
@mkdir -p $(@D)
$(COMPILE.c) -o $@ $<

o/$(MODE)/%.o: o/$(MODE)/%.c $(COSMOCC)
@mkdir -p $(@D)
$(COMPILE.c) -o $@ $<

o/$(MODE)/%.o: %.cc $(COSMOCC)
@mkdir -p $(@D)
$(COMPILE.cc) -o $@ $<
Expand All @@ -24,6 +25,15 @@ o/$(MODE)/%.o: %.cpp $(COSMOCC)
@mkdir -p $(@D)
$(COMPILE.cc) -o $@ $<

o/$(MODE)/%.c: %.gperf
@mkdir -p $(@D)
build/gperf --output-file=$@ $<

o/$(MODE)/%.a:
@mkdir -p $(dir $@)/.aarch64
$(AR) $(ARFLAGS) $@ $^
$(AR) $(ARFLAGS) $(dir $@)/.aarch64/$(notdir $@) $(foreach x,$^,$(dir $(x)).aarch64/$(notdir $(x)))

o/$(MODE)/%: o/$(MODE)/%.o
$(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -o $@

Expand Down
60 changes: 41 additions & 19 deletions llama.cpp/BUILD.mk
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,13 @@ LLAMA_CPP_SRCS_C = $(filter %.c,$(LLAMA_CPP_FILES))
LLAMA_CPP_SRCS_CPP = $(filter %.cpp,$(LLAMA_CPP_FILES))
LLAMA_CPP_SRCS = $(LLAMA_CPP_SRCS_C) $(LLAMA_CPP_SRCS_CPP)

LLAMA_CPP_OBJS = \
$(LLAMAFILE_OBJS) \
LLAMA_CPP_SRCS_OBJS = \
$(LLAMA_CPP_SRCS_C:%.c=o/$(MODE)/%.o) \
$(LLAMA_CPP_SRCS_CPP:%.cpp=o/$(MODE)/%.o) \

LLAMA_CPP_OBJS = \
$(LLAMAFILE_OBJS) \
$(LLAMA_CPP_SRCS_OBJS) \
$(LLAMA_CPP_FILES:%=o/$(MODE)/%.zip.o)

o/$(MODE)/llama.cpp/llama.cpp.a: $(LLAMA_CPP_OBJS)
Expand All @@ -26,39 +29,58 @@ include llama.cpp/quantize/BUILD.mk
include llama.cpp/perplexity/BUILD.mk
include llama.cpp/llama-bench/BUILD.mk

$(LLAMA_CPP_OBJS): private \
$(LLAMA_CPP_SRCS_OBJS): private \
CCFLAGS += \
-DNDEBUG \

$(LLAMA_CPP_OBJS): private \
CCFLAGS += \
-DGGML_MULTIPLATFORM \
-DGGML_USE_LLAMAFILE
-DGGML_USE_LLAMAFILE \

o/$(MODE)/llama.cpp/common.o \
o/$(MODE)/llama.cpp/llama.o: private \
CCFLAGS += -O
o/$(MODE)/llama.cpp/ggml.o \
o/$(MODE)/llama.cpp/ggml-vector-amd-avx2.o \
o/$(MODE)/llama.cpp/ggml-vector-amd-avx512bf16.o \
o/$(MODE)/llama.cpp/ggml-vector-amd-avx512.o \
o/$(MODE)/llama.cpp/ggml-vector-amd-avx.o \
o/$(MODE)/llama.cpp/ggml-vector-amd-f16c.o \
o/$(MODE)/llama.cpp/ggml-vector-amd-fma.o \
o/$(MODE)/llama.cpp/ggml-vector-arm80.o \
o/$(MODE)/llama.cpp/ggml-vector-arm82.o: \
private CCFLAGS += -O3 -mgcc

o/$(MODE)/llama.cpp/ggml-alloc.o \
o/$(MODE)/llama.cpp/ggml-backend.o \
o/$(MODE)/llama.cpp/grammar-parser.o \
o/$(MODE)/llama.cpp/json-schema-to-grammar.o \
o/$(MODE)/llama.cpp/llama.o \
o/$(MODE)/llama.cpp/vector.o \
o/$(MODE)/llama.cpp/unicode.o \
o/$(MODE)/llama.cpp/sampling.o \
o/$(MODE)/llama.cpp/ggml-alloc.o \
o/$(MODE)/llama.cpp/common.o: private \
CCFLAGS += -Os
o/$(MODE)/llama.cpp/common.o: \
private CCFLAGS += -Os

o/$(MODE)/llama.cpp/unicode-data.o: \
private CCFLAGS += -mgcc

o/$(MODE)/llama.cpp/ggml-quants.o: private CXXFLAGS += -Os
o/$(MODE)/llama.cpp/ggml-quants-amd-avx.o: private TARGET_ARCH += -Xx86_64-mtune=sandybridge
o/$(MODE)/llama.cpp/ggml-quants-amd-avx2.o: private TARGET_ARCH += -Xx86_64-mtune=skylake -Xx86_64-mf16c -Xx86_64-mfma -Xx86_64-mavx2
o/$(MODE)/llama.cpp/ggml-quants-amd-avx512.o: private TARGET_ARCH += -Xx86_64-mtune=cannonlake -Xx86_64-mf16c -Xx86_64-mfma -Xx86_64-mavx2 -Xx86_64-mavx512f
o/$(MODE)/llama.cpp/ggml-quants-amd-k8.o: private TARGET_ARCH += -Xx86_64-mtune=k8
o/$(MODE)/llama.cpp/ggml-quants-amd-ssse3.o: private TARGET_ARCH += -Xx86_64-mtune=core2 -Xx86_64-mssse3
o/$(MODE)/llama.cpp/ggml-quants-amd-avx.o: private TARGET_ARCH += -Xx86_64-mtune=sandybridge -Xx86_64-mavx
o/$(MODE)/llama.cpp/ggml-quants-amd-avx2.o: private TARGET_ARCH += -Xx86_64-mtune=skylake -Xx86_64-mavx -Xx86_64-mf16c -Xx86_64-mfma -Xx86_64-mavx2
o/$(MODE)/llama.cpp/ggml-quants-amd-avx512.o: private TARGET_ARCH += -Xx86_64-mtune=cannonlake -Xx86_64-mavx -Xx86_64-mf16c -Xx86_64-mfma -Xx86_64-mavx2 -Xx86_64-mavx512f
o/$(MODE)/llama.cpp/ggml-quants-amd-avx512vl.o: private TARGET_ARCH += -Xx86_64-mtune=cannonlake -Xx86_64-mavx -Xx86_64-mf16c -Xx86_64-mfma -Xx86_64-mavx2 -Xx86_64-mavx512f -Xx86_64-mavx512bw -Xx86_64-mavx512dq -Xx86_64-mavx512vl

o/$(MODE)/llama.cpp/ggml-vector.o: private CXXFLAGS += -Os
o/$(MODE)/llama.cpp/ggml-vector-amd-avx.o: private TARGET_ARCH += -Xx86_64-mtune=sandybridge
o/$(MODE)/llama.cpp/ggml-vector-amd-fma.o: private TARGET_ARCH += -Xx86_64-mtune=bdver2 -Xx86_64-mfma
o/$(MODE)/llama.cpp/ggml-vector-amd-f16c.o: private TARGET_ARCH += -Xx86_64-mtune=ivybridge -Xx86_64-mf16c
o/$(MODE)/llama.cpp/ggml-vector-amd-avx2.o: private TARGET_ARCH += -Xx86_64-mtune=skylake -Xx86_64-mf16c -Xx86_64-mfma -Xx86_64-mavx2
o/$(MODE)/llama.cpp/ggml-vector-amd-avx512.o: private TARGET_ARCH += -Xx86_64-mtune=cannonlake -Xx86_64-mf16c -Xx86_64-mfma -Xx86_64-mavx2 -Xx86_64-mavx512f
o/$(MODE)/llama.cpp/ggml-vector-amd-avx512bf16.o: private TARGET_ARCH += -Xx86_64-mtune=znver4 -Xx86_64-mf16c -Xx86_64-mfma -Xx86_64-mavx2 -Xx86_64-mavx512f -Xx86_64-mavx512vl -Xx86_64-mavx512bf16
o/$(MODE)/llama.cpp/ggml-vector-amd-k8.o: private TARGET_ARCH += -Xx86_64-mtune=k8
o/$(MODE)/llama.cpp/ggml-vector-amd-ssse3.o: private TARGET_ARCH += -Xx86_64-mtune=core2 -Xx86_64-mssse3
o/$(MODE)/llama.cpp/ggml-vector-amd-avx.o: private TARGET_ARCH += -Xx86_64-mtune=sandybridge -Xx86_64-mavx
o/$(MODE)/llama.cpp/ggml-vector-amd-fma.o: private TARGET_ARCH += -Xx86_64-mtune=bdver2 -Xx86_64-mavx -Xx86_64-mfma
o/$(MODE)/llama.cpp/ggml-vector-amd-f16c.o: private TARGET_ARCH += -Xx86_64-mtune=ivybridge -Xx86_64-mavx -Xx86_64-mf16c
o/$(MODE)/llama.cpp/ggml-vector-amd-avx2.o: private TARGET_ARCH += -Xx86_64-mtune=skylake -Xx86_64-mavx -Xx86_64-mf16c -Xx86_64-mfma -Xx86_64-mavx2
o/$(MODE)/llama.cpp/ggml-vector-amd-avx512.o: private TARGET_ARCH += -Xx86_64-mtune=cannonlake -Xx86_64-mavx -Xx86_64-mf16c -Xx86_64-mfma -Xx86_64-mavx2 -Xx86_64-mavx512f
o/$(MODE)/llama.cpp/ggml-vector-amd-avx512vl.o: private TARGET_ARCH += -Xx86_64-mtune=cannonlake -Xx86_64-mavx -Xx86_64-mf16c -Xx86_64-mfma -Xx86_64-mavx2 -Xx86_64-mavx512f -Xx86_64-mavx512bw -Xx86_64-mavx512dq -Xx86_64-mavx512vl
o/$(MODE)/llama.cpp/ggml-vector-amd-avx512bf16.o: private TARGET_ARCH += -Xx86_64-mtune=znver4 -Xx86_64-mavx -Xx86_64-mf16c -Xx86_64-mfma -Xx86_64-mavx2 -Xx86_64-mavx512f -Xx86_64-mavx512bw -Xx86_64-mavx512dq -Xx86_64-mavx512vl -Xx86_64-mavx512bf16
o/$(MODE)/llama.cpp/ggml-vector-arm82.o: private TARGET_ARCH += -Xaarch64-march=armv8.2-a+fp16

$(LLAMA_CPP_OBJS): llama.cpp/BUILD.mk
Expand Down
7 changes: 4 additions & 3 deletions llama.cpp/README.llamafile
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@ LICENSE

ORIGIN

https://github.com/ggerganov/llama.cpp/pull/4406/
152da28ae54139e3754189b9e6e1c28e11277502
2024-05-23
https://github.com/ggerganov/llama.cpp/
8b3befc0e2ed8fb18b903735831496b8b0c80949
2024-08-16

LOCAL MODIFICATIONS

- See [jart] and [kawrakow] annotations
- Remove MAP_POPULATE because it makes mmap(tinyllama) block for 100ms
- Refactor ggml.c, llama.cpp, and llava to use llamafile_open() APIs
- Unify main, server, and llava-cli into single llamafile program
Expand Down
Loading

0 comments on commit c04039e

Please sign in to comment.