Skip to content

Commit d91d89c

Browse files
authored
Merge pull request #87 from InfiniTensor/dev-mlu-runtime
Dev mlu runtime
2 parents 605c4c0 + b3b7d09 commit d91d89c

File tree

14 files changed

+203
-12
lines changed

14 files changed

+203
-12
lines changed

CMakeLists.txt

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ message(STATUS "Project " ${PROJECT_NAME} " version " ${PROJECT_VERSION})
55
option(ABSL_PROPAGATE_CXX_STD "Abseil need this option" ON)
66
option(USE_CUDA "Support Nvidia GPU" OFF)
77
option(USE_KUNLUN "Support Baidu Kunlunxin" OFF)
8+
option(USE_BANG "Support Hanwuji MLU" OFF)
89

910
set(CMAKE_CXX_STANDARD 20)
1011
set(CMAKE_CXX_STANDARD_REQUIRED ON)
@@ -41,6 +42,38 @@ if(USE_KUNLUN)
4142
message(STATUS "KUNLUN_HOME: ${KUNLUN_HOME}")
4243
endif()
4344

45+
if (USE_BANG)
46+
add_compile_definitions(USE_BANG)
47+
include_directories(src/kernels/mlu/include)
48+
49+
# Neuware Evironment
50+
if ((NOT DEFINED NEUWARE_HOME) AND (NOT DEFINED ENV{NEUWARE_HOME}))
51+
message(FATAL_ERROR "NEUWARE_HOME is not defined from cmake or env")
52+
elseif (DEFINED NEUWARE_HOME)
53+
set(NEUWARE_HOME ${NEUWARE_HOME} CACHE STRING "NEUWARE_HOME directory for Cambricon Neuware development")
54+
else()
55+
set(NEUWARE_HOME $ENV{NEUWARE_HOME} CACHE STRING "NEUWARE_HOME directory for Cambricon Neuware development")
56+
endif()
57+
message(STATUS "NEUWARE_HOME: ${NEUWARE_HOME}")
58+
59+
# cnrt cndrv cnnl
60+
include_directories("${NEUWARE_HOME}/include")
61+
find_library(CAMBRICON_CNNL libcnnl.so "${NEUWARE_HOME}/lib64")
62+
find_library(CAMBRICON_CNRT libcnrt.so "${NEUWARE_HOME}/lib64")
63+
find_library(CAMBRICON_CNDRV libcndrv.so "${NEUWARE_HOME}/lib64")
64+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -lstdc++ -Wall")
65+
66+
if ((NOT DEFINED TARGET_CPU_ARCH) AND (NOT DEFINED ENV{TARGET_CPU_ARCH}))
67+
execute_process(COMMAND uname -m OUTPUT_VARIABLE _uname_m OUTPUT_STRIP_TRAILING_WHITESPACE)
68+
set(TARGET_CPU_ARCH "${_uname_m}" CACHE STRING "Target CPU ARCH")
69+
elseif(DEFINED TARGET_CPU_ARCH)
70+
set(TARGET_CPU_ARCH ${TARGET_CPU_ARCH} CACHE STRING "Target CPU ARCH")
71+
else()
72+
set(TARGET_CPU_ARCH $ENV{TARGET_CPU_ARCH} CACHE STRING "Target CPU ARCH")
73+
endif()
74+
message(STATUS "TARGET_CPU_ARCH: ${TARGET_CPU_ARCH}")
75+
endif()
76+
4477
add_compile_options(-march=native) # this will cause error in some machine
4578
add_compile_options(-mtune=native)
4679
add_compile_options(-Wall)

Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,14 @@
33
TYPE ?= Debug
44
CUDA ?= OFF
55
KUNLUN ?= OFF
6+
BANG ?= OFF
67

78
CMAKE_EXTRA =
89
# CMAKE_EXTRA += -DCMAKE_CXX_COMPILER=
910

1011
build:
1112
mkdir -p build
12-
cmake -Bbuild -DCMAKE_BUILD_TYPE=$(TYPE) -DUSE_CUDA=$(CUDA) -DUSE_KUNLUN=$(KUNLUN) $(CMAKE_EXTRA)
13+
cmake -Bbuild -DCMAKE_BUILD_TYPE=$(TYPE) -DUSE_CUDA=$(CUDA) -DUSE_KUNLUN=$(KUNLUN) -DUSE_BANG=$(BANG) $(CMAKE_EXTRA)
1314
make -j -C build
1415

1516
install-python: build

src/02hardware/include/hardware/device.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ namespace refactor::hardware {
1111
enum class Type : int32_t {
1212
Cpu,
1313
Nvidia,
14+
Mlu,
15+
Kunlun,
1416
};
1517

1618
protected:
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
#ifndef HARDWARE_DEVICES_MLU_H
2+
#define HARDWARE_DEVICES_MLU_H
3+
4+
#include "../device.h"
5+
6+
namespace refactor::hardware {
7+
8+
class Mlu final : public Device {
9+
public:
10+
explicit Mlu(int32_t card);
11+
void setContext() const noexcept final;
12+
Type type() const noexcept final {
13+
return Type::Mlu;
14+
}
15+
};
16+
17+
}// namespace refactor::hardware
18+
19+
#endif// HARDWARE_DEVICES_MLU_H

src/02hardware/src/device_manager.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "hardware/device_manager.h"
22
#include "hardware/devices/cpu.h"
3+
#include "hardware/devices/mlu.h"
34
#include "hardware/devices/nvidia.h"
45

56
namespace refactor::hardware::device {
@@ -37,6 +38,7 @@ namespace refactor::hardware::device {
3738
using T = Device::Type;
3839
// clang-format off
3940
auto device = type == T::Nvidia ? std::make_shared<Nvidia>(card)
41+
: type == T::Mlu ? std::make_shared<Mlu>(card)
4042
: UNREACHABLEX(Arc<Device>, "");
4143
// clang-format on
4244
auto [kind, ok] = DEVICES.try_emplace(static_cast<int32_t>(type));

src/02hardware/src/devices/cpu/memory.cc

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,19 @@
55
namespace refactor::hardware {
66
using M = CpuMemory;
77

8-
void *M::malloc(size_t size) noexcept {
8+
void *M::malloc(size_t size) {
99
return std::malloc(size);
1010
}
11-
void M::free(void *ptr) noexcept {
11+
void M::free(void *ptr) {
1212
std::free(ptr);
1313
}
14-
void *M::copyHD(void *dst, void const *src, size_t bytes) const noexcept {
14+
void *M::copyHD(void *dst, void const *src, size_t bytes) const {
1515
return std::memcpy(dst, src, bytes);
1616
}
17-
void *M::copyDH(void *dst, void const *src, size_t bytes) const noexcept {
17+
void *M::copyDH(void *dst, void const *src, size_t bytes) const {
1818
return std::memcpy(dst, src, bytes);
1919
}
20-
void *M::copyDD(void *dst, void const *src, size_t bytes) const noexcept {
20+
void *M::copyDD(void *dst, void const *src, size_t bytes) const {
2121
return std::memcpy(dst, src, bytes);
2222
}
2323

src/02hardware/src/devices/cpu/memory.hh

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@
66
namespace refactor::hardware {
77

88
class CpuMemory final : public Memory {
9-
void *malloc(size_t) noexcept final;
10-
void free(void *) noexcept final;
11-
void *copyHD(void *dst, void const *src, size_t bytes) const noexcept final;
12-
void *copyDH(void *dst, void const *src, size_t bytes) const noexcept final;
13-
void *copyDD(void *dst, void const *src, size_t bytes) const noexcept final;
9+
void *malloc(size_t) final;
10+
void free(void *) final;
11+
void *copyHD(void *dst, void const *src, size_t bytes) const final;
12+
void *copyDH(void *dst, void const *src, size_t bytes) const final;
13+
void *copyDD(void *dst, void const *src, size_t bytes) const final;
1414
};
1515

1616
}// namespace refactor::hardware
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#include "functions.hh"
2+
#include "hardware/devices/mlu.h"
3+
#include "hardware/mem_pool.h"
4+
#include "memory.hh"
5+
6+
namespace refactor::hardware {
7+
8+
static Arc<Memory> bangMemory(int32_t card) {
9+
#ifdef USE_BANG
10+
ASSERT(0 <= card && card < getDeviceCount(), "Invalid card id: {}", card);
11+
setDevice(card);
12+
auto [free, total] = getMemInfo();
13+
auto size = std::min(free, std::max(5ul << 30, total * 4 / 5));
14+
fmt::println("initializing Cambricon MLU {}, memory {} / {}, alloc {}",
15+
card, free, total, size);
16+
return std::make_shared<MemPool>(
17+
std::make_shared<MluMemory>(),
18+
size,
19+
256ul);
20+
#else
21+
return nullptr;
22+
#endif
23+
}
24+
25+
Mlu::Mlu(int32_t card) : Device(card, bangMemory(card)) {}
26+
27+
void Mlu::setContext() const noexcept {
28+
setDevice(_card);
29+
}
30+
31+
}// namespace refactor::hardware
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#include "functions.hh"
2+
3+
namespace refactor::hardware {
4+
5+
#ifdef USE_BANG
6+
int getDeviceCount() {
7+
unsigned deviceCount;
8+
BANG_ASSERT(cnrtGetDeviceCount(&deviceCount));
9+
return static_cast<int>(deviceCount);
10+
}
11+
void setDevice(int device) {
12+
BANG_ASSERT(cnrtSetDevice(device));
13+
}
14+
MemInfo getMemInfo() {
15+
MemInfo memInfo;
16+
BANG_ASSERT(cnrtMemGetInfo(&memInfo.free, &memInfo.total));
17+
return memInfo;
18+
}
19+
#endif
20+
21+
}// namespace refactor::hardware
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#ifndef HARDWARE_DEVICES_MLU_FUNCTIONS_CUH
2+
#define HARDWARE_DEVICES_MLU_FUNCTIONS_CUH
3+
4+
#include "common.h"
5+
6+
#ifdef USE_BANG
7+
#include "cnrt.h"
8+
9+
#define BANG_ASSERT(STATUS) \
10+
if (auto status = (STATUS); status != CNRT_RET_SUCCESS) { \
11+
RUNTIME_ERROR(fmt::format("bang failed on \"" #STATUS "\" with \"{}\" ({})", \
12+
cnrtGetErrorStr(status), (int) status)); \
13+
}
14+
#endif
15+
16+
namespace refactor::hardware {
17+
18+
struct MemInfo {
19+
size_t free, total;
20+
};
21+
22+
int getDeviceCount();
23+
void setDevice(int device);
24+
MemInfo getMemInfo();
25+
26+
}// namespace refactor::hardware
27+
28+
#endif// HARDWARE_DEVICES_NVIDIA_FUNCTIONS_CUH

0 commit comments

Comments
 (0)