Skip to content

Commit

Permalink
support run with metal
Browse files Browse the repository at this point in the history
  • Loading branch information
zhangyikun01 authored and Weaxs committed Dec 12, 2023
1 parent 29de87d commit 8be2f3b
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 45 deletions.
46 changes: 16 additions & 30 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ ifeq ($(UNAME_S),Darwin)
ifneq ($(UNAME_P),arm)
SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null)
ifeq ($(SYSCTL_M),1)
# UNAME_P := arm
# UNAME_M := arm64
UNAME_P := arm
UNAME_M := arm64
warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789)
endif
endif
Expand All @@ -56,30 +56,25 @@ endif

BUILD_TYPE?=
# keep standard at C17 and C++17
CFLAGS = -I. -O3 -DNDEBUG -std=c17 -fPIC -pthread
CXXFLAGS = -I. -O3 -DNDEBUG -std=c++17 -fPIC -pthread
LDFLAGS =
CMAKE_ARGS = -DCMAKE_C_COMPILER=$(shell which gcc) -DCMAKE_CXX_COMPILER=$(shell which g++)
CMAKE_ARGS = -DCMAKE_C_COMPILER=$(shell which cc) -DCMAKE_CXX_COMPILER=$(shell which c++)

# warnings
CFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith -Wno-unused-function
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function
CXXFLAGS += -g -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -pedantic-errors

# GPGPU specific
GGML_CUDA_OBJ_PATH=third_party/ggml/src/CMakeFiles/ggml.dir/ggml-cuda.cu.o


# Architecture specific
# TODO: probably these flags need to be tweaked on some architectures
# feel free to update the Makefile for your architecture and send a pull request or issue
# feel free to update the Makefile for your architecture and send a pull request or issue
ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686))
# Use all CPU extensions that are available:
CFLAGS += -march=native -mtune=native
CXXFLAGS += -march=native -mtune=native
endif
ifneq ($(filter ppc64%,$(UNAME_M)),)
POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
ifneq (,$(findstring POWER9,$(POWER9_M)))
CFLAGS += -mcpu=power9
CXXFLAGS += -mcpu=power9
endif
# Require c++23's std::byteswap for big-endian support.
Expand All @@ -88,55 +83,46 @@ ifneq ($(filter ppc64%,$(UNAME_M)),)
endif
endif
ifdef CHATGLM_GPROF
CFLAGS += -pg
CXXFLAGS += -pg
endif
ifneq ($(filter aarch64%,$(UNAME_M)),)
CFLAGS += -mcpu=native
CXXFLAGS += -mcpu=native
endif
ifneq ($(filter armv6%,$(UNAME_M)),)
# Raspberry Pi 1, 2, 3
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
endif
ifneq ($(filter armv7%,$(UNAME_M)),)
# Raspberry Pi 4
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
endif
ifneq ($(filter armv8%,$(UNAME_M)),)
# Raspberry Pi 4
CFLAGS += -mfp16-format=ieee -mno-unaligned-access
CXXFLAGS += -mfp16-format=ieee -mno-unaligned-access
endif

# Build Acceleration
ifeq ($(BUILD_TYPE),cublas)
EXTRA_LIBS=
CMAKE_ARGS+=-DGGML_CUBLAS=ON
endif
ifeq ($(BUILD_TYPE),openblas)
EXTRA_LIBS=
CMAKE_ARGS+=-DGGML_OPENBLAS=ON
CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas
LDFLAGS += -lopenblas
CXXFLAGS += -I/usr/local/include/openblas -lopenblas
CGO_TAGS=-tags openblas
endif
ifeq ($(BUILD_TYPE),hipblas)
ROCM_HOME ?= "/opt/rocm"
CXX="$(ROCM_HOME)"/llvm/bin/clang++
CC="$(ROCM_HOME)"/llvm/bin/clang
EXTRA_LIBS=
GPU_TARGETS ?= gfx900,gfx90a,gfx1030,gfx1031,gfx1100
AMDGPU_TARGETS ?= "$(GPU_TARGETS)"
CMAKE_ARGS+=-DGGML_HIPBLAS=ON -DAMDGPU_TARGETS="$(AMDGPU_TARGETS)" -DGPU_TARGETS="$(GPU_TARGETS)"
GGML_CUDA_OBJ_PATH=CMakeFiles/ggml-rocm.dir/ggml-cuda.cu.o
endif
ifeq ($(BUILD_TYPE),clblas)
EXTRA_LIBS=
CMAKE_ARGS+=-DGGML_CLBLAST=ON
CGO_TAGS=-tags cublas
endif
ifeq ($(BUILD_TYPE),metal)
EXTRA_LIBS=
CMAKE_ARGS+=-DGGML_METAL=ON
CGO_TAGS=-tags metal
EXTRA_TARGETS+=ggml-metal
Expand All @@ -153,9 +139,7 @@ $(info I chatglm.cpp build info: )
$(info I UNAME_S: $(UNAME_S))
$(info I UNAME_P: $(UNAME_P))
$(info I UNAME_M: $(UNAME_M))
$(info I CFLAGS: $(CFLAGS))
$(info I CXXFLAGS: $(CXXFLAGS))
$(info I LDFLAGS: $(LDFLAGS))
$(info I BUILD_TYPE: $(BUILD_TYPE))
$(info I CMAKE_ARGS: $(CMAKE_ARGS))
$(info I EXTRA_TARGETS: $(EXTRA_TARGETS))
Expand All @@ -182,7 +166,7 @@ chatglm.dir: build/chatglm.cpp
# ggml.dir
ggml.dir: build/chatglm.cpp
cd out && mkdir -p ggml.dir && cd ..$(DELIMITER)build && \
$(CP) third_party$(DELIMITER)ggml$(DELIMITER)src$(DELIMITER)CMakeFiles$(DELIMITER)ggml.dir$(DELIMITER)*.c.o ..$(DELIMITER)out$(DELIMITER)ggml.dir$(DELIMITER)
$(CP) third_party$(DELIMITER)ggml$(DELIMITER)src$(DELIMITER)CMakeFiles$(DELIMITER)ggml.dir$(DELIMITER)*.o ..$(DELIMITER)out$(DELIMITER)ggml.dir$(DELIMITER)

# sentencepiece.dir
sentencepiece.dir: build/chatglm.cpp
Expand All @@ -202,15 +186,15 @@ absl.dir: sentencepiece.dir

# ggml-metal
ggml-metal: ggml.dir
cd build && $(CP) bin$(DELIMITER)ggml-metal.metal ..$(DELIMITER)
cd build && $(CP) bin/ggml-metal.metal ../

# binding
binding.o: prepare build/chatglm.cpp chatglm.dir ggml.dir sentencepiece.dir protobuf-lite.dir absl.dir
$(CXX) $(CXXFLAGS) \
-I.$(DELIMITER)chatglm.cpp \
-I.$(DELIMITER)chatglm.cpp$(DELIMITER)third_party$(DELIMITER)ggml$(DELIMITER)include$(DELIMITER)ggml \
-I.$(DELIMITER)chatglm.cpp$(DELIMITER)third_party$(DELIMITER)sentencepiece$(DELIMITER)src \
binding.cpp -o binding.o -c $(LDFLAGS)
binding.cpp -MD -MT binding.o -MF binding.d -o binding.o -c

libbinding.a: prepare binding.o $(EXTRA_TARGETS)
ar src libbinding.a \
Expand All @@ -221,6 +205,7 @@ libbinding.a: prepare binding.o $(EXTRA_TARGETS)

clean:
rm -rf *.o
rm -rf *.d
rm -rf *.a
rm -rf out
rm -rf build
Expand All @@ -236,4 +221,5 @@ windows/ggllm-test-model.bin:
powershell -Command "Invoke-WebRequest -Uri 'https://huggingface.co/Xorbits/chatglm3-6B-GGML/resolve/main/chatglm3-ggml-q4_0.bin' -OutFile 'ggllm-test-model.bin'"

test: $(DOWNLOAD_TARGETS) libbinding.a
TEST_MODEL=ggllm-test-model.bin go test ${CGO_TAGS} -timeout 1800s .
TEST_MODEL=ggllm-test-model.bin go test ${CGO_TAGS} -timeout 1800s -o $PWD/go-chatglm.cpp.test -c -cover \
$PWD/go-chatglm.cpp.test
16 changes: 4 additions & 12 deletions binding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,9 @@
#ifndef NOMINMAX
#define NOMINMAX
#endif
#include <fcntl.h>
#include <io.h>
#include <windows.h>
#endif

#ifdef GGML_CUBLAS
#include <ggml-cuda.h>
#endif

#ifdef GGML_METAL
#include <ggml-metal.h>
#endif

#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32)
void sigint_handler(int signo) {
if (signo == SIGINT) {
Expand Down Expand Up @@ -225,12 +215,14 @@ void* create_function(const char* name, const char *arguments) {


void* create_code(const char* input) {
return new chatglm::CodeMessage(input);;
return new chatglm::CodeMessage(input);
}

char* get_model_type(void* pipe_pr) {
chatglm::Pipeline* pipe_p = (chatglm::Pipeline*) pipe_pr;
return strdup(to_string(pipe_p->model->config.model_type).data());
chatglm::ModelLoader loader(pipe_p->mapped_file->data, pipe_p->mapped_file->size);
loader.read_string(4);
return strdup(chatglm::to_string((chatglm::ModelType)loader.read_basic<int>()).data());
}

// copy from chatglm::TextStreamer
Expand Down
2 changes: 1 addition & 1 deletion chatglm.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ package chatglm
// #cgo CXXFLAGS: -I${SRCDIR}/chatglm.cpp
// #cgo CXXFLAGS: -I${SRCDIR}/chatglm.cpp/third_party/ggml/include/ggml -I${SRCDIR}/chatglm.cpp/third_party/ggml/src
// #cgo CXXFLAGS: -I${SRCDIR}/chatglm.cpp/third_party/sentencepiece/src
// #cgo LDFLAGS: -L${SRCDIR}/ -lbinding -lm -lstdc++
// #cgo LDFLAGS: -L${SRCDIR}/ -lbinding -lm -v
// #cgo darwin LDFLAGS: -framework Accelerate
// #include "binding.h"
// #include <stdlib.h>
Expand Down
6 changes: 4 additions & 2 deletions chatglm_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ var (
func setup() {
testModelPath, exist := os.LookupEnv("TEST_MODEL")
if !exist {
testModelPath = "./chatglm3-ggml-q4_0.bin"
testModelPath = "chatglm3-ggml-q4_0.bin"
}

var err error
Expand Down Expand Up @@ -153,7 +153,9 @@ func TestCodeInterpreter(t *testing.T) {
if err != nil {
assert.Fail(t, "call code interpreter failed.")
}
messages = append(messages, NewAssistantMsg(ret, modelType))
msg := NewAssistantMsg(ret, modelType)
msg.ToolCalls = append(msg.ToolCalls, &ToolCallMessage{Type: TypeCode, Code: &CodeMessage{Input: "```python\ndef is_prime(n):\n \"\"\"Check if a number is prime.\"\"\"\n if n <= 1:\n return False\n if n <= 3:\n return True\n if n % 2 == 0 or n % 3 == 0:\n return False\n i = 5\n while i * i <= n:\n if n % i == 0 or n % (i + 2) == 0:\n return False\n i += 6\n return True\n\n# Get all prime numbers up to 100\nprimes_upto_100 = [i for i in range(2, 101) if is_prime(i)]\nprimes_upto_100\n```"}})
messages = append(messages, msg)
assert.Contains(t, ret, "好的,我会为您列出100以内的所有质数。\n\n质数是指只能被1和它本身整除的大于1的整数。例如,2、3、5、7等都是质数。\n\n让我们开始吧!")
messages = append(messages, NewObservationMsg("[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97]"))

Expand Down

0 comments on commit 8be2f3b

Please sign in to comment.