support run with metal

Weaxs · Dec 12, 2023 · 8be2f3b · 8be2f3b
1 parent 29de87d
commit 8be2f3b
Show file tree

Hide file tree

Showing 4 changed files with 25 additions and 45 deletions.
diff --git a/Makefile b/Makefile
@@ -43,8 +43,8 @@ ifeq ($(UNAME_S),Darwin)
 	ifneq ($(UNAME_P),arm)
 		SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null)
 		ifeq ($(SYSCTL_M),1)
-			# UNAME_P := arm
-			# UNAME_M := arm64
+			UNAME_P := arm
+			UNAME_M := arm64
 			warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789)
 		endif
 	endif
@@ -56,30 +56,25 @@ endif
 
 BUILD_TYPE?=
 # keep standard at C17 and C++17
-CFLAGS   = -I. -O3 -DNDEBUG -std=c17 -fPIC -pthread
 CXXFLAGS = -I. -O3 -DNDEBUG -std=c++17 -fPIC -pthread
-LDFLAGS  =
-CMAKE_ARGS = -DCMAKE_C_COMPILER=$(shell which gcc) -DCMAKE_CXX_COMPILER=$(shell which g++)
+CMAKE_ARGS = -DCMAKE_C_COMPILER=$(shell which cc) -DCMAKE_CXX_COMPILER=$(shell which c++)
 
 # warnings
-CFLAGS   += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith -Wno-unused-function
-CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
+CXXFLAGS += -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -pedantic-errors
 
 # GPGPU specific
 GGML_CUDA_OBJ_PATH=third_party/ggml/src/CMakeFiles/ggml.dir/ggml-cuda.cu.o
 
 
 # Architecture specific
-# TODO: probably these flags need to be tweaked on some architectures
-#       feel free to update the Makefile for your architecture and send a pull request or issue
+# feel free to update the Makefile for your architecture and send a pull request or issue
 ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686))
 	# Use all CPU extensions that are available:
-	CFLAGS += -march=native -mtune=native
+	CXXFLAGS += -march=native -mtune=native
 endif
 ifneq ($(filter ppc64%,$(UNAME_M)),)
 	POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
 	ifneq (,$(findstring POWER9,$(POWER9_M)))
-		CFLAGS += -mcpu=power9
 		CXXFLAGS += -mcpu=power9
 	endif
 	# Require c++23's std::byteswap for big-endian support.
@@ -88,55 +83,46 @@ ifneq ($(filter ppc64%,$(UNAME_M)),)
 	endif
 endif
 ifdef CHATGLM_GPROF
-	CFLAGS   += -pg
 	CXXFLAGS += -pg
 endif
 ifneq ($(filter aarch64%,$(UNAME_M)),)
-	CFLAGS += -mcpu=native
 	CXXFLAGS += -mcpu=native
 endif
 ifneq ($(filter armv6%,$(UNAME_M)),)
 	# Raspberry Pi 1, 2, 3
-	CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
+	CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
 endif
 ifneq ($(filter armv7%,$(UNAME_M)),)
 	# Raspberry Pi 4
-	CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
+	CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
 endif
 ifneq ($(filter armv8%,$(UNAME_M)),)
 	# Raspberry Pi 4
-	CFLAGS += -mfp16-format=ieee -mno-unaligned-access
+	CXXFLAGS += -mfp16-format=ieee -mno-unaligned-access
 endif
 
-# Build Acceleration
 ifeq ($(BUILD_TYPE),cublas)
-	EXTRA_LIBS=
 	CMAKE_ARGS+=-DGGML_CUBLAS=ON
 endif
 ifeq ($(BUILD_TYPE),openblas)
-	EXTRA_LIBS=
 	CMAKE_ARGS+=-DGGML_OPENBLAS=ON
-	CFLAGS  += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
-    LDFLAGS += -lopenblas
+	CXXFLAGS  += -I/usr/local/include/openblas -lopenblas
     CGO_TAGS=-tags openblas
 endif
 ifeq ($(BUILD_TYPE),hipblas)
 	ROCM_HOME ?= "/opt/rocm"
 	CXX="$(ROCM_HOME)"/llvm/bin/clang++
 	CC="$(ROCM_HOME)"/llvm/bin/clang
-	EXTRA_LIBS=
 	GPU_TARGETS ?= gfx900,gfx90a,gfx1030,gfx1031,gfx1100
 	AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
 	CMAKE_ARGS+=-DGGML_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
 	GGML_CUDA_OBJ_PATH=CMakeFiles/ggml-rocm.dir/ggml-cuda.cu.o
 endif
 ifeq ($(BUILD_TYPE),clblas)
-	EXTRA_LIBS=
 	CMAKE_ARGS+=-DGGML_CLBLAST=ON
 	CGO_TAGS=-tags cublas
 endif
 ifeq ($(BUILD_TYPE),metal)
-	EXTRA_LIBS=
 	CMAKE_ARGS+=-DGGML_METAL=ON
 	CGO_TAGS=-tags metal
 	EXTRA_TARGETS+=ggml-metal
@@ -153,9 +139,7 @@ $(info I chatglm.cpp build info: )
 $(info I UNAME_S:  $(UNAME_S))
 $(info I UNAME_P:  $(UNAME_P))
 $(info I UNAME_M:  $(UNAME_M))
-$(info I CFLAGS:   $(CFLAGS))
 $(info I CXXFLAGS: $(CXXFLAGS))
-$(info I LDFLAGS:  $(LDFLAGS))
 $(info I BUILD_TYPE:  $(BUILD_TYPE))
 $(info I CMAKE_ARGS:  $(CMAKE_ARGS))
 $(info I EXTRA_TARGETS:  $(EXTRA_TARGETS))
@@ -182,7 +166,7 @@ chatglm.dir: build/chatglm.cpp
 # ggml.dir
 ggml.dir: build/chatglm.cpp
 	cd out && mkdir -p ggml.dir && cd ..$(DELIMITER)build && \
-	$(CP) third_party$(DELIMITER)ggml$(DELIMITER)src$(DELIMITER)CMakeFiles$(DELIMITER)ggml.dir$(DELIMITER)*.c.o ..$(DELIMITER)out$(DELIMITER)ggml.dir$(DELIMITER)
+	$(CP) third_party$(DELIMITER)ggml$(DELIMITER)src$(DELIMITER)CMakeFiles$(DELIMITER)ggml.dir$(DELIMITER)*.o ..$(DELIMITER)out$(DELIMITER)ggml.dir$(DELIMITER)
 
 # sentencepiece.dir
 sentencepiece.dir: build/chatglm.cpp
@@ -202,15 +186,15 @@ absl.dir: sentencepiece.dir
 
 # ggml-metal
 ggml-metal: ggml.dir
-	cd build && $(CP) bin$(DELIMITER)ggml-metal.metal ..$(DELIMITER)
+	cd build && $(CP) bin/ggml-metal.metal ../
 
 # binding
 binding.o: prepare build/chatglm.cpp chatglm.dir ggml.dir sentencepiece.dir protobuf-lite.dir absl.dir
 	$(CXX) $(CXXFLAGS) \
 	-I.$(DELIMITER)chatglm.cpp  \
 	-I.$(DELIMITER)chatglm.cpp$(DELIMITER)third_party$(DELIMITER)ggml$(DELIMITER)include$(DELIMITER)ggml \
 	-I.$(DELIMITER)chatglm.cpp$(DELIMITER)third_party$(DELIMITER)sentencepiece$(DELIMITER)src \
-	binding.cpp -o binding.o -c $(LDFLAGS)
+	binding.cpp -MD -MT binding.o -MF binding.d -o binding.o -c
 
 libbinding.a: prepare binding.o $(EXTRA_TARGETS)
 	ar src libbinding.a  \
@@ -221,6 +205,7 @@ libbinding.a: prepare binding.o $(EXTRA_TARGETS)
 
 clean:
 	rm -rf *.o
+	rm -rf *.d
 	rm -rf *.a
 	rm -rf out
 	rm -rf build
@@ -236,4 +221,5 @@ windows/ggllm-test-model.bin:
 	powershell -Command "Invoke-WebRequest -Uri 'https://huggingface.co/Xorbits/chatglm3-6B-GGML/resolve/main/chatglm3-ggml-q4_0.bin' -OutFile 'ggllm-test-model.bin'"
 
 test: $(DOWNLOAD_TARGETS) libbinding.a
-	TEST_MODEL=ggllm-test-model.bin go test ${CGO_TAGS} -timeout 1800s .
+	TEST_MODEL=ggllm-test-model.bin go test ${CGO_TAGS} -timeout 1800s -o $PWD/go-chatglm.cpp.test -c -cover \
+ 	$PWD/go-chatglm.cpp.test
diff --git a/binding.cpp b/binding.cpp
@@ -21,19 +21,9 @@
 #ifndef NOMINMAX
 #define NOMINMAX
 #endif
-#include <fcntl.h>
-#include <io.h>
 #include <windows.h>
 #endif
 
-#ifdef GGML_CUBLAS
-#include <ggml-cuda.h>
-#endif
-
-#ifdef GGML_METAL
-#include <ggml-metal.h>
-#endif
-
 #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32)
 void sigint_handler(int signo) {
     if (signo == SIGINT) {
@@ -225,12 +215,14 @@ void* create_function(const char* name, const char *arguments) {
 
 
 void* create_code(const char* input) {
-    return  new chatglm::CodeMessage(input);;
+    return  new chatglm::CodeMessage(input);
 }
 
 char* get_model_type(void* pipe_pr) {
     chatglm::Pipeline* pipe_p = (chatglm::Pipeline*) pipe_pr;
-    return strdup(to_string(pipe_p->model->config.model_type).data());
+    chatglm::ModelLoader loader(pipe_p->mapped_file->data, pipe_p->mapped_file->size);
+    loader.read_string(4);
+    return strdup(chatglm::to_string((chatglm::ModelType)loader.read_basic<int>()).data());
 }
 
 // copy from chatglm::TextStreamer

diff --git a/chatglm.go b/chatglm.go
@@ -4,7 +4,7 @@ package chatglm
 // #cgo CXXFLAGS: -I${SRCDIR}/chatglm.cpp
 // #cgo CXXFLAGS: -I${SRCDIR}/chatglm.cpp/third_party/ggml/include/ggml -I${SRCDIR}/chatglm.cpp/third_party/ggml/src
 // #cgo CXXFLAGS: -I${SRCDIR}/chatglm.cpp/third_party/sentencepiece/src
-// #cgo LDFLAGS: -L${SRCDIR}/ -lbinding -lm -lstdc++
+// #cgo LDFLAGS: -L${SRCDIR}/ -lbinding -lm -v
 // #cgo darwin LDFLAGS: -framework Accelerate
 // #include "binding.h"
 // #include <stdlib.h>

diff --git a/chatglm_test.go b/chatglm_test.go
@@ -15,7 +15,7 @@ var (
 func setup() {
 	testModelPath, exist := os.LookupEnv("TEST_MODEL")
 	if !exist {
-		testModelPath = "./chatglm3-ggml-q4_0.bin"
+		testModelPath = "chatglm3-ggml-q4_0.bin"
 	}
 
 	var err error
@@ -153,7 +153,9 @@ func TestCodeInterpreter(t *testing.T) {
 	if err != nil {
 		assert.Fail(t, "call code interpreter failed.")
 	}
-	messages = append(messages, NewAssistantMsg(ret, modelType))
+	msg := NewAssistantMsg(ret, modelType)
+	msg.ToolCalls = append(msg.ToolCalls, &ToolCallMessage{Type: TypeCode, Code: &CodeMessage{Input: "```python\ndef is_prime(n):\n    \"\"\"Check if a number is prime.\"\"\"\n    if n <= 1:\n        return False\n    if n <= 3:\n        return True\n    if n % 2 == 0 or n % 3 == 0:\n        return False\n    i = 5\n    while i * i <= n:\n        if n % i == 0 or n % (i + 2) == 0:\n            return False\n        i += 6\n    return True\n\n# Get all prime numbers up to 100\nprimes_upto_100 = [i for i in range(2, 101) if is_prime(i)]\nprimes_upto_100\n```"}})
+	messages = append(messages, msg)
 	assert.Contains(t, ret, "好的，我会为您列出100以内的所有质数。\n\n质数是指只能被1和它本身整除的大于1的整数。例如，2、3、5、7等都是质数。\n\n让我们开始吧！")
 	messages = append(messages, NewObservationMsg("[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97]"))