Merge branch 'Mozilla-Ocho:main' into main

rubra-ai · Dec 9, 2024 · c04039e · c04039e
2 parents bfb377e + 9b03e32
commit c04039e
Show file tree

Hide file tree

Showing 762 changed files with 3,095,670 additions and 44,433 deletions.
diff --git a/.gitignore b/.gitignore
@@ -7,6 +7,7 @@
 /cosmocc
 /perf.data
 /perf.data.old
+/trace.json
 
 /*.log
 *.DS_Store

diff --git a/Makefile b/Makefile
@@ -11,36 +11,42 @@ MAKEFLAGS += --no-builtin-rules
 include build/config.mk
 include build/rules.mk
 
+include third_party/BUILD.mk
 include llamafile/BUILD.mk
 include llama.cpp/BUILD.mk
 include stable-diffusion.cpp/BUILD.mk
-include double-conversion/BUILD.mk
-include stb/BUILD.mk
+include whisper.cpp/BUILD.mk
 
 # the root package is `o//` by default
 # building a package also builds its sub-packages
 .PHONY: o/$(MODE)/
 o/$(MODE)/:	o/$(MODE)/llamafile					\
 		o/$(MODE)/llama.cpp					\
-		o/$(MODE)/stb						\
+		o/$(MODE)/stable-diffusion.cpp				\
+		o/$(MODE)/whisper.cpp					\
+		o/$(MODE)/third_party					\
 		o/$(MODE)/depend.test
 
 # for installing to `make PREFIX=/usr/local`
 .PHONY: install
 install:	llamafile/zipalign.1					\
+		llamafile/server/main.1					\
 		llama.cpp/main/main.1					\
 		llama.cpp/imatrix/imatrix.1				\
 		llama.cpp/quantize/quantize.1				\
 		llama.cpp/perplexity/perplexity.1			\
 		llama.cpp/llava/llava-quantize.1			\
+		whisper.cpp/main.1					\
 		o/$(MODE)/llamafile/zipalign				\
 		o/$(MODE)/llamafile/tokenize				\
 		o/$(MODE)/llama.cpp/main/main				\
 		o/$(MODE)/llama.cpp/imatrix/imatrix			\
 		o/$(MODE)/llama.cpp/quantize/quantize			\
 		o/$(MODE)/llama.cpp/llama-bench/llama-bench		\
 		o/$(MODE)/llama.cpp/perplexity/perplexity		\
-		o/$(MODE)/llama.cpp/llava/llava-quantize
+		o/$(MODE)/llama.cpp/llava/llava-quantize		\
+		o/$(MODE)/whisper.cpp/main				\
+		o/$(MODE)/llamafile/server/main
 	mkdir -p $(PREFIX)/bin
 	$(INSTALL) o/$(MODE)/llamafile/zipalign $(PREFIX)/bin/zipalign
 	$(INSTALL) o/$(MODE)/llamafile/tokenize $(PREFIX)/bin/llamafile-tokenize
@@ -52,13 +58,18 @@ install:	llamafile/zipalign.1					\
 	$(INSTALL) build/llamafile-upgrade-engine $(PREFIX)/bin/llamafile-upgrade-engine
 	$(INSTALL) o/$(MODE)/llama.cpp/perplexity/perplexity $(PREFIX)/bin/llamafile-perplexity
 	$(INSTALL) o/$(MODE)/llama.cpp/llava/llava-quantize $(PREFIX)/bin/llava-quantize
+	$(INSTALL) o/$(MODE)/llamafile/server/main $(PREFIX)/bin/llamafiler
+	$(INSTALL) o/$(MODE)/stable-diffusion.cpp/main $(PREFIX)/bin/sdfile
+	$(INSTALL) o/$(MODE)/whisper.cpp/main $(PREFIX)/bin/whisperfile
 	mkdir -p $(PREFIX)/share/man/man1
 	$(INSTALL) -m 0644 llamafile/zipalign.1 $(PREFIX)/share/man/man1/zipalign.1
+	$(INSTALL) -m 0644 llamafile/server/main.1 $(PREFIX)/share/man/man1/llamafiler.1
 	$(INSTALL) -m 0644 llama.cpp/main/main.1 $(PREFIX)/share/man/man1/llamafile.1
 	$(INSTALL) -m 0644 llama.cpp/imatrix/imatrix.1 $(PREFIX)/share/man/man1/llamafile-imatrix.1
 	$(INSTALL) -m 0644 llama.cpp/quantize/quantize.1 $(PREFIX)/share/man/man1/llamafile-quantize.1
 	$(INSTALL) -m 0644 llama.cpp/perplexity/perplexity.1 $(PREFIX)/share/man/man1/llamafile-perplexity.1
 	$(INSTALL) -m 0644 llama.cpp/llava/llava-quantize.1 $(PREFIX)/share/man/man1/llava-quantize.1
+	$(INSTALL) -m 0644 whisper.cpp/main.1 $(PREFIX)/share/man/man1/whisperfile.1
 
 .PHONY: check
 check: o/$(MODE)/llamafile/check

diff --git a/README.md b/README.md
diff --git a/build/config.mk b/build/config.mk
@@ -2,21 +2,21 @@
 #── vi: set noet ft=make ts=8 sw=8 fenc=utf-8 :vi ────────────────────┘
 
 PREFIX = /usr/local
-COSMOCC = .cosmocc/3.5.1
+COSMOCC = .cosmocc/3.9.7
 TOOLCHAIN = $(COSMOCC)/bin/cosmo
 
-AR = $(TOOLCHAIN)ar
 CC = $(TOOLCHAIN)cc
 CXX = $(TOOLCHAIN)c++
+AR = $(COSMOCC)/bin/ar.ape
 ZIPOBJ = $(COSMOCC)/bin/zipobj
 MKDEPS = $(COSMOCC)/bin/mkdeps
 INSTALL = install
 
 ARFLAGS = rcsD
 CXXFLAGS = -frtti -std=gnu++23
-CCFLAGS = -g -ggdb -O3 -fexceptions -fsignaling-nans -ffunction-sections -fdata-sections
+CCFLAGS = -O2 -g -fexceptions -ffunction-sections -fdata-sections -mclang
 CPPFLAGS_ = -iquote. -mcosmo -DGGML_MULTIPLATFORM -Wno-attributes -DLLAMAFILE_DEBUG
-TARGET_ARCH = -Xx86_64-mavx -Xx86_64-mtune=znver4
+TARGET_ARCH = -Xx86_64-mtune=znver4
 
 TMPDIR = o//tmp
 IGNORE := $(shell mkdir -p $(TMPDIR))
@@ -52,5 +52,5 @@ clean:; rm -rf o
 .PHONY: distclean
 distclean:; rm -rf o .cosmocc
 
-.cosmocc/3.5.1:
-	build/download-cosmocc.sh $@ 3.5.1 ea1f47cd4ead6ce3038551be164ad357bd45a4b5b7824871c561d2af23f871d6
+.cosmocc/3.9.7:
+	build/download-cosmocc.sh $@ 3.9.7 3f559555d08ece35bab1a66293a2101f359ac9841d563419756efa9c79f7a150
diff --git a/build/gperf b/build/gperf
diff --git a/build/rules.mk b/build/rules.mk
@@ -5,13 +5,14 @@ LINK.o = $(CXX) $(CCFLAGS) $(LDFLAGS)
 COMPILE.c = $(CC) $(CCFLAGS) $(CFLAGS) $(CPPFLAGS_) $(CPPFLAGS) $(TARGET_ARCH) -c
 COMPILE.cc = $(CXX) $(CCFLAGS) $(CXXFLAGS) $(CPPFLAGS_) $(CPPFLAGS) $(TARGET_ARCH) -c
 
-o/$(MODE)/%.a:
-	$(AR) $(ARFLAGS) $@ $^
-
 o/$(MODE)/%.o: %.c $(COSMOCC)
 	@mkdir -p $(@D)
 	$(COMPILE.c) -o $@ $<
 
+o/$(MODE)/%.o: o/$(MODE)/%.c $(COSMOCC)
+	@mkdir -p $(@D)
+	$(COMPILE.c) -o $@ $<
+
 o/$(MODE)/%.o: %.cc $(COSMOCC)
 	@mkdir -p $(@D)
 	$(COMPILE.cc) -o $@ $<
@@ -24,6 +25,15 @@ o/$(MODE)/%.o: %.cpp $(COSMOCC)
 	@mkdir -p $(@D)
 	$(COMPILE.cc) -o $@ $<
 
+o/$(MODE)/%.c: %.gperf
+	@mkdir -p $(@D)
+	build/gperf --output-file=$@ $<
+
+o/$(MODE)/%.a:
+	@mkdir -p $(dir $@)/.aarch64
+	$(AR) $(ARFLAGS) $@ $^
+	$(AR) $(ARFLAGS) $(dir $@)/.aarch64/$(notdir $@) $(foreach x,$^,$(dir $(x)).aarch64/$(notdir $(x)))
+
 o/$(MODE)/%: o/$(MODE)/%.o
 	$(LINK.o) $^ $(LOADLIBES) $(LDLIBS) -o $@
 

diff --git a/llama.cpp/BUILD.mk b/llama.cpp/BUILD.mk
@@ -10,10 +10,13 @@ LLAMA_CPP_SRCS_C = $(filter %.c,$(LLAMA_CPP_FILES))
 LLAMA_CPP_SRCS_CPP = $(filter %.cpp,$(LLAMA_CPP_FILES))
 LLAMA_CPP_SRCS = $(LLAMA_CPP_SRCS_C) $(LLAMA_CPP_SRCS_CPP)
 
-LLAMA_CPP_OBJS =					\
-	$(LLAMAFILE_OBJS)				\
+LLAMA_CPP_SRCS_OBJS =					\
 	$(LLAMA_CPP_SRCS_C:%.c=o/$(MODE)/%.o)		\
 	$(LLAMA_CPP_SRCS_CPP:%.cpp=o/$(MODE)/%.o)	\
+
+LLAMA_CPP_OBJS =					\
+	$(LLAMAFILE_OBJS)				\
+	$(LLAMA_CPP_SRCS_OBJS)				\
 	$(LLAMA_CPP_FILES:%=o/$(MODE)/%.zip.o)
 
 o/$(MODE)/llama.cpp/llama.cpp.a: $(LLAMA_CPP_OBJS)
@@ -26,39 +29,58 @@ include llama.cpp/quantize/BUILD.mk
 include llama.cpp/perplexity/BUILD.mk
 include llama.cpp/llama-bench/BUILD.mk
 
-$(LLAMA_CPP_OBJS): private				\
+$(LLAMA_CPP_SRCS_OBJS): private				\
 		CCFLAGS +=				\
 			-DNDEBUG			\
+
+$(LLAMA_CPP_OBJS): private				\
+		CCFLAGS +=				\
 			-DGGML_MULTIPLATFORM		\
-			-DGGML_USE_LLAMAFILE
+			-DGGML_USE_LLAMAFILE		\
 
-o/$(MODE)/llama.cpp/common.o				\
-o/$(MODE)/llama.cpp/llama.o: private			\
-		CCFLAGS += -O
+o/$(MODE)/llama.cpp/ggml.o \
+o/$(MODE)/llama.cpp/ggml-vector-amd-avx2.o \
+o/$(MODE)/llama.cpp/ggml-vector-amd-avx512bf16.o \
+o/$(MODE)/llama.cpp/ggml-vector-amd-avx512.o \
+o/$(MODE)/llama.cpp/ggml-vector-amd-avx.o \
+o/$(MODE)/llama.cpp/ggml-vector-amd-f16c.o \
+o/$(MODE)/llama.cpp/ggml-vector-amd-fma.o \
+o/$(MODE)/llama.cpp/ggml-vector-arm80.o \
+o/$(MODE)/llama.cpp/ggml-vector-arm82.o: \
+		private CCFLAGS += -O3 -mgcc
 
 o/$(MODE)/llama.cpp/ggml-alloc.o			\
 o/$(MODE)/llama.cpp/ggml-backend.o			\
 o/$(MODE)/llama.cpp/grammar-parser.o			\
 o/$(MODE)/llama.cpp/json-schema-to-grammar.o		\
-o/$(MODE)/llama.cpp/llama.o				\
+o/$(MODE)/llama.cpp/vector.o				\
 o/$(MODE)/llama.cpp/unicode.o				\
 o/$(MODE)/llama.cpp/sampling.o				\
 o/$(MODE)/llama.cpp/ggml-alloc.o			\
-o/$(MODE)/llama.cpp/common.o: private			\
-		CCFLAGS += -Os
+o/$(MODE)/llama.cpp/common.o:				\
+		private CCFLAGS += -Os
+
+o/$(MODE)/llama.cpp/unicode-data.o:			\
+		private CCFLAGS += -mgcc
 
 o/$(MODE)/llama.cpp/ggml-quants.o: private CXXFLAGS += -Os
-o/$(MODE)/llama.cpp/ggml-quants-amd-avx.o: private TARGET_ARCH += -Xx86_64-mtune=sandybridge
-o/$(MODE)/llama.cpp/ggml-quants-amd-avx2.o: private TARGET_ARCH += -Xx86_64-mtune=skylake -Xx86_64-mf16c -Xx86_64-mfma -Xx86_64-mavx2
-o/$(MODE)/llama.cpp/ggml-quants-amd-avx512.o: private TARGET_ARCH += -Xx86_64-mtune=cannonlake -Xx86_64-mf16c -Xx86_64-mfma -Xx86_64-mavx2 -Xx86_64-mavx512f
+o/$(MODE)/llama.cpp/ggml-quants-amd-k8.o: private TARGET_ARCH += -Xx86_64-mtune=k8
+o/$(MODE)/llama.cpp/ggml-quants-amd-ssse3.o: private TARGET_ARCH += -Xx86_64-mtune=core2 -Xx86_64-mssse3
+o/$(MODE)/llama.cpp/ggml-quants-amd-avx.o: private TARGET_ARCH += -Xx86_64-mtune=sandybridge -Xx86_64-mavx
+o/$(MODE)/llama.cpp/ggml-quants-amd-avx2.o: private TARGET_ARCH += -Xx86_64-mtune=skylake -Xx86_64-mavx -Xx86_64-mf16c -Xx86_64-mfma -Xx86_64-mavx2
+o/$(MODE)/llama.cpp/ggml-quants-amd-avx512.o: private TARGET_ARCH += -Xx86_64-mtune=cannonlake -Xx86_64-mavx -Xx86_64-mf16c -Xx86_64-mfma -Xx86_64-mavx2 -Xx86_64-mavx512f
+o/$(MODE)/llama.cpp/ggml-quants-amd-avx512vl.o: private TARGET_ARCH += -Xx86_64-mtune=cannonlake -Xx86_64-mavx -Xx86_64-mf16c -Xx86_64-mfma -Xx86_64-mavx2 -Xx86_64-mavx512f -Xx86_64-mavx512bw -Xx86_64-mavx512dq -Xx86_64-mavx512vl
 
 o/$(MODE)/llama.cpp/ggml-vector.o: private CXXFLAGS += -Os
-o/$(MODE)/llama.cpp/ggml-vector-amd-avx.o: private TARGET_ARCH += -Xx86_64-mtune=sandybridge
-o/$(MODE)/llama.cpp/ggml-vector-amd-fma.o: private TARGET_ARCH += -Xx86_64-mtune=bdver2 -Xx86_64-mfma
-o/$(MODE)/llama.cpp/ggml-vector-amd-f16c.o: private TARGET_ARCH += -Xx86_64-mtune=ivybridge -Xx86_64-mf16c
-o/$(MODE)/llama.cpp/ggml-vector-amd-avx2.o: private TARGET_ARCH += -Xx86_64-mtune=skylake -Xx86_64-mf16c -Xx86_64-mfma -Xx86_64-mavx2
-o/$(MODE)/llama.cpp/ggml-vector-amd-avx512.o: private TARGET_ARCH += -Xx86_64-mtune=cannonlake -Xx86_64-mf16c -Xx86_64-mfma -Xx86_64-mavx2 -Xx86_64-mavx512f
-o/$(MODE)/llama.cpp/ggml-vector-amd-avx512bf16.o: private TARGET_ARCH += -Xx86_64-mtune=znver4 -Xx86_64-mf16c -Xx86_64-mfma -Xx86_64-mavx2 -Xx86_64-mavx512f -Xx86_64-mavx512vl -Xx86_64-mavx512bf16
+o/$(MODE)/llama.cpp/ggml-vector-amd-k8.o: private TARGET_ARCH += -Xx86_64-mtune=k8
+o/$(MODE)/llama.cpp/ggml-vector-amd-ssse3.o: private TARGET_ARCH += -Xx86_64-mtune=core2 -Xx86_64-mssse3
+o/$(MODE)/llama.cpp/ggml-vector-amd-avx.o: private TARGET_ARCH += -Xx86_64-mtune=sandybridge -Xx86_64-mavx
+o/$(MODE)/llama.cpp/ggml-vector-amd-fma.o: private TARGET_ARCH += -Xx86_64-mtune=bdver2 -Xx86_64-mavx -Xx86_64-mfma
+o/$(MODE)/llama.cpp/ggml-vector-amd-f16c.o: private TARGET_ARCH += -Xx86_64-mtune=ivybridge -Xx86_64-mavx -Xx86_64-mf16c
+o/$(MODE)/llama.cpp/ggml-vector-amd-avx2.o: private TARGET_ARCH += -Xx86_64-mtune=skylake -Xx86_64-mavx -Xx86_64-mf16c -Xx86_64-mfma -Xx86_64-mavx2
+o/$(MODE)/llama.cpp/ggml-vector-amd-avx512.o: private TARGET_ARCH += -Xx86_64-mtune=cannonlake -Xx86_64-mavx -Xx86_64-mf16c -Xx86_64-mfma -Xx86_64-mavx2 -Xx86_64-mavx512f
+o/$(MODE)/llama.cpp/ggml-vector-amd-avx512vl.o: private TARGET_ARCH += -Xx86_64-mtune=cannonlake -Xx86_64-mavx -Xx86_64-mf16c -Xx86_64-mfma -Xx86_64-mavx2 -Xx86_64-mavx512f -Xx86_64-mavx512bw -Xx86_64-mavx512dq -Xx86_64-mavx512vl
+o/$(MODE)/llama.cpp/ggml-vector-amd-avx512bf16.o: private TARGET_ARCH += -Xx86_64-mtune=znver4 -Xx86_64-mavx -Xx86_64-mf16c -Xx86_64-mfma -Xx86_64-mavx2 -Xx86_64-mavx512f -Xx86_64-mavx512bw -Xx86_64-mavx512dq -Xx86_64-mavx512vl -Xx86_64-mavx512bf16
 o/$(MODE)/llama.cpp/ggml-vector-arm82.o: private TARGET_ARCH += -Xaarch64-march=armv8.2-a+fp16
 
 $(LLAMA_CPP_OBJS): llama.cpp/BUILD.mk

diff --git a/llama.cpp/README.llamafile b/llama.cpp/README.llamafile
@@ -8,12 +8,13 @@ LICENSE
 
 ORIGIN
 
-  https://github.com/ggerganov/llama.cpp/pull/4406/
-  152da28ae54139e3754189b9e6e1c28e11277502
-  2024-05-23
+  https://github.com/ggerganov/llama.cpp/
+  8b3befc0e2ed8fb18b903735831496b8b0c80949
+  2024-08-16
 
 LOCAL MODIFICATIONS
 
+  - See [jart] and [kawrakow] annotations
   - Remove MAP_POPULATE because it makes mmap(tinyllama) block for 100ms
   - Refactor ggml.c, llama.cpp, and llava to use llamafile_open() APIs
   - Unify main, server, and llava-cli into single llamafile program