Skip to content

Commit 5b25aa1

Browse files
committed
Rename ConvBiasAct to ConvAct, make bias optional for both conv and conAct, add WARN, etc.
1 parent 9e71f53 commit 5b25aa1

32 files changed

+891
-669
lines changed

.github/workflows/main.yaml

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,15 @@ jobs:
3333
- name: configure xmake
3434
run: xmake f --cpu=true -cv
3535

36-
- name: Build with XMake
37-
run: xmake
38-
39-
- name: Find and Set INFINI_ROOT
40-
id: set_infini_root
36+
- name: Set INFINI_ROOT
4137
run: |
42-
export INFINI_ROOT=$GITHUB_WORKSPACE
38+
export INFINI_ROOT=$GITHUB_WORKSPACE/.infini
39+
mkdir -p $INFINI_ROOT
4340
echo "INFINI_ROOT=$INFINI_ROOT" >> $GITHUB_ENV
4441
42+
- name: Build with XMake
43+
run: xmake build && xmake install
44+
4545
- name: Run Python Tests
4646
run: |
4747
GREEN='\033[0;32m'
@@ -88,3 +88,4 @@ jobs:
8888
fi
8989
env:
9090
INFINI_ROOT: ${{ env.INFINI_ROOT }}
91+

include/infini_operators.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,11 @@
33
#include "ops/attention/attention.h"
44
#include "ops/avg_pool/avg_pool.h"
55
#include "ops/causal_softmax/causal_softmax.h"
6-
#include "ops/global_avg_pool/global_avg_pool.h"
6+
#include "ops/conv/conv.h"
7+
#include "ops/conv_act/conv_act.h"
78
#include "ops/expand/expand.h"
89
#include "ops/gemm/gemm.h"
9-
#include "ops/conv/conv.h"
10+
#include "ops/global_avg_pool/global_avg_pool.h"
1011
#include "ops/matmul/matmul.h"
1112
#include "ops/max_pool/max_pool.h"
1213
#include "ops/mlp/mlp.h"

include/ops/conv/conv.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,15 @@ __C __export infiniopStatus_t infiniopCreateConvDescriptor(infiniopHandle_t hand
1515
infiniopTensorDescriptor_t y,
1616
infiniopTensorDescriptor_t x,
1717
infiniopTensorDescriptor_t w,
18+
infiniopTensorDescriptor_t b,
1819
uint64_t const *pads,
1920
int64_t const *strides,
2021
uint64_t const *dilations,
2122
uint64_t n);
2223

2324
__C __export infiniopStatus_t infiniopGetConvWorkspaceSize(infiniopConvDescriptor_t desc, uint64_t *size);
2425

25-
__C __export infiniopStatus_t infiniopConv(infiniopConvDescriptor_t desc, void *workspace, uint64_t workspace_size, void *y, void const *x, void const *w, void *stream);
26+
__C __export infiniopStatus_t infiniopConv(infiniopConvDescriptor_t desc, void *workspace, uint64_t workspace_size, void *y, void const *x, void const *w, void const *b, void *stream);
2627

2728
__C __export infiniopStatus_t infiniopDestroyConvDescriptor(infiniopConvDescriptor_t desc);
2829

include/ops/conv_act/conv_act.h

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
#ifndef CONV_ACT_H
2+
#define CONV_ACT_H
3+
4+
#include "../../export.h"
5+
#include "../../operators.h"
6+
#include <cstddef>
7+
8+
/**
9+
* @brief Specifies the type of activation function
10+
*/
11+
struct ActivationMode {
12+
13+
enum Mode {
14+
// activation functions
15+
IDENTITY,
16+
RELU,
17+
SIGMOID,
18+
19+
// Count
20+
// NOTE: new activation functions should add before "Count"
21+
Count,
22+
};
23+
constexpr static size_t numOfActivationFunctions = Mode::Count;
24+
};
25+
26+
typedef struct ConvActDescriptor {
27+
Device device;
28+
} ConvActDescriptor;
29+
30+
typedef ConvActDescriptor *infiniopConvActDescriptor_t;
31+
32+
__C __export infiniopStatus_t infiniopCreateConvActDescriptor(infiniopHandle_t handle,
33+
infiniopConvActDescriptor_t *desc_ptr,
34+
infiniopTensorDescriptor_t y,
35+
infiniopTensorDescriptor_t x,
36+
infiniopTensorDescriptor_t w,
37+
infiniopTensorDescriptor_t b,
38+
uint64_t const *pads,
39+
int64_t const *strides,
40+
uint64_t const *dilations,
41+
uint64_t n,
42+
ActivationMode::Mode activation_mode,
43+
double clip_coef = 0.0);
44+
45+
__C __export infiniopStatus_t infiniopGetConvActWorkspaceSize(infiniopConvActDescriptor_t desc, uint64_t *size);
46+
47+
__C __export infiniopStatus_t infiniopConvAct(infiniopConvActDescriptor_t desc, void *workspace, uint64_t workspace_size, void *y, void const *x, void const *w, void const *b, void *stream);
48+
49+
__C __export infiniopStatus_t infiniopDestroyConvActDescriptor(infiniopConvActDescriptor_t desc);
50+
51+
52+
#endif

include/ops/conv_bias_act/conv_bias_act.h

Lines changed: 0 additions & 33 deletions
This file was deleted.

operatorspy/tests/conv.py

Lines changed: 54 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ class ConvDescriptor(Structure):
3838
infiniopConvDescriptor_t = POINTER(ConvDescriptor)
3939

4040

41-
def conv(x, w, stride, padding, dilation):
41+
def conv(x, w, b, stride, padding, dilation):
4242
ndim = len(x.shape) - 2
4343
conv_func_map = {
4444
1: F.conv1d,
@@ -54,10 +54,10 @@ def conv(x, w, stride, padding, dilation):
5454
conv_func = conv_func_map[ndim]
5555

5656
if PROFILE:
57-
ans = conv_func(x, w, stride=stride, padding=padding, dilation=dilation)
57+
ans = conv_func(x, w, b, stride=stride, padding=padding, dilation=dilation)
5858
torch.cuda.synchronize()
5959
return ans
60-
return conv_func(x, w, stride=stride, padding=padding, dilation=dilation)
60+
return conv_func(x, w, b, stride=stride, padding=padding, dilation=dilation)
6161

6262

6363
# infer the shape of the output given the inputs for a N-ary convolution
@@ -98,31 +98,34 @@ def test(
9898
pads,
9999
strides,
100100
dilations,
101-
tensor_stride=None,
101+
add_bias,
102102
tensor_dtype=torch.float16,
103103
):
104104
assert len(pads) == len(strides) == len(dilations)
105105
print(
106-
f"Testing Conv on {torch_device} with x_shape: {x_shape}, w_shape: {w_shape}, b_shape: {w_shape[0]}, pads: {pads}, strides: {strides}, dilations: {dilations}, x_stride: {tensor_stride} dtype:{tensor_dtype}"
106+
f"Testing Conv on {torch_device} with x_shape: {x_shape}, w_shape: {w_shape}, add_bias: {add_bias}, "
107+
f"b_shape: {w_shape[0]}, pads: {pads}, strides: {strides}, dilations: {dilations}, dtype:{tensor_dtype}"
107108
)
108109
x = torch.rand(x_shape, dtype=tensor_dtype).to(torch_device)
109110
w = torch.rand(w_shape, dtype=tensor_dtype).to(torch_device)
111+
b = torch.round((torch.rand(w_shape[0], dtype=tensor_dtype).to(torch_device) * 2 - 1) * 1000) / 1000 if add_bias else None
110112
y = torch.zeros(
111113
inferShape(x.shape, w.shape, pads, strides, dilations), dtype=tensor_dtype
112114
).to(torch_device)
113115

114116
for i in range(NUM_PRERUN if PROFILE else 1):
115-
ans = conv(x, w, strides, pads, dilations)
117+
ans = conv(x, w, b, strides, pads, dilations)
116118
if PROFILE:
117119
start_time = time.time()
118120
for i in range(NUM_ITERATIONS):
119-
_ = conv(x, w, strides, pads, dilations)
121+
_ = conv(x, w, b, strides, pads, dilations)
120122
elapsed = (time.time() - start_time) / NUM_ITERATIONS
121123
print(f"pytorch time: {elapsed :6f}")
122124

123125

124126
x_tensor = to_tensor(x, lib)
125127
w_tensor = to_tensor(w, lib)
128+
b_tensor = to_tensor(b, lib) if b is not None else None
126129
y_tensor = to_tensor(y, lib)
127130
descriptor = infiniopConvDescriptor_t()
128131

@@ -133,6 +136,7 @@ def test(
133136
y_tensor.descriptor,
134137
x_tensor.descriptor,
135138
w_tensor.descriptor,
139+
b_tensor.descriptor if b_tensor else None,
136140
tuple_to_void_p(pads),
137141
tuple_to_void_p(strides),
138142
tuple_to_void_p(dilations),
@@ -147,27 +151,33 @@ def test(
147151
workspace_ptr = ctypes.cast(workspace.data_ptr(), ctypes.POINTER(ctypes.c_uint8))
148152

149153
for i in range(NUM_PRERUN if PROFILE else 1):
150-
lib.infiniopConv(
151-
descriptor,
152-
workspace_ptr,
153-
workspaceSize,
154-
y_tensor.data,
155-
x_tensor.data,
156-
w_tensor.data,
157-
None,
158-
)
159-
if PROFILE:
160-
start_time = time.time()
161-
for i in range(NUM_ITERATIONS):
154+
check_error(
162155
lib.infiniopConv(
163156
descriptor,
164157
workspace_ptr,
165158
workspaceSize,
166159
y_tensor.data,
167160
x_tensor.data,
168161
w_tensor.data,
162+
b_tensor.data if b_tensor else None,
169163
None,
170164
)
165+
)
166+
if PROFILE:
167+
start_time = time.time()
168+
for i in range(NUM_ITERATIONS):
169+
check_error(
170+
lib.infiniopConv(
171+
descriptor,
172+
workspace_ptr,
173+
workspaceSize,
174+
y_tensor.data,
175+
x_tensor.data,
176+
w_tensor.data,
177+
b_tensor.data if b_tensor else None,
178+
None,
179+
)
180+
)
171181
elapsed = (time.time() - start_time) / NUM_ITERATIONS
172182
print(f" lib time: {elapsed :6f}")
173183

@@ -181,18 +191,18 @@ def test(
181191
def test_cpu(lib, test_cases):
182192
device = DeviceEnum.DEVICE_CPU
183193
handle = create_handle(lib, device)
184-
for x_shape, w_shape, pads, strides, dilations, x_strides in test_cases:
185-
test(lib, handle, "cpu", x_shape, w_shape, pads, strides, dilations, x_strides, tensor_dtype=torch.float16)
186-
test(lib, handle, "cpu", x_shape, w_shape, pads, strides, dilations, x_strides, tensor_dtype=torch.float32)
194+
for x_shape, w_shape, pads, strides, dilations, add_bias in test_cases:
195+
test(lib, handle, "cpu", x_shape, w_shape, pads, strides, dilations, add_bias, tensor_dtype=torch.float16)
196+
test(lib, handle, "cpu", x_shape, w_shape, pads, strides, dilations, add_bias, tensor_dtype=torch.float32)
187197
destroy_handle(lib, handle)
188198

189199

190200
def test_cuda(lib, test_cases):
191201
device = DeviceEnum.DEVICE_CUDA
192202
handle = create_handle(lib, device)
193-
for x_shape, w_shape, pads, strides, dilations, x_strides in test_cases:
194-
test(lib, handle, "cuda", x_shape, w_shape, pads, strides, dilations, x_strides, tensor_dtype=torch.float16)
195-
test(lib, handle, "cuda", x_shape, w_shape, pads, strides, dilations, x_strides, tensor_dtype=torch.float32)
203+
for x_shape, w_shape, pads, strides, dilations, add_bias in test_cases:
204+
test(lib, handle, "cuda", x_shape, w_shape, pads, strides, dilations, add_bias, tensor_dtype=torch.float16)
205+
test(lib, handle, "cuda", x_shape, w_shape, pads, strides, dilations, add_bias, tensor_dtype=torch.float32)
196206
destroy_handle(lib, handle)
197207

198208

@@ -201,54 +211,62 @@ def test_bang(lib, test_cases):
201211

202212
device = DeviceEnum.DEVICE_BANG
203213
handle = create_handle(lib, device)
204-
for x_shape, w_shape, pads, strides, dilations, x_strides in test_cases:
205-
test(lib, handle, "mlu", x_shape, w_shape, pads, strides, dilations, x_strides, tensor_dtype=torch.float16)
206-
test(lib, handle, "mlu", x_shape, w_shape, pads, strides, dilations, x_strides, tensor_dtype=torch.float32)
214+
for x_shape, w_shape, pads, strides, dilations, add_bias in test_cases:
215+
test(lib, handle, "mlu", x_shape, w_shape, pads, strides, dilations, add_bias, tensor_dtype=torch.float16)
216+
test(lib, handle, "mlu", x_shape, w_shape, pads, strides, dilations, add_bias, tensor_dtype=torch.float32)
207217
destroy_handle(lib, handle)
208218

209219

210220
if __name__ == "__main__":
211221
test_cases = [
212-
# x_shape, w_shape, pads, strides, dilations, x_strides
222+
# x_shape, w_shape, pads, strides, dilations, add_bias
213223
(
214224
(32, 3, 4),
215225
(32, 3, 5),
216226
(1,),
217227
(1,),
218228
(1,),
219-
None,
229+
False,
230+
),
231+
(
232+
(3, 7, 4),
233+
(3, 7, 5),
234+
(1,),
235+
(1,),
236+
(1,),
237+
True,
220238
),
221239
(
222240
(1, 3, 4, 4),
223241
(2, 3, 3, 3),
224242
(1, 1),
225243
(1, 2),
226244
(2, 1),
227-
None,
245+
True,
228246
),
229247
(
230248
(32, 3, 128, 128),
231249
(64, 3, 5, 5),
232250
(2, 2),
233251
(2, 2),
234252
(1, 1),
235-
None,
253+
False,
236254
),
237255
(
238256
(1, 1, 4, 4, 4),
239257
(1, 1, 5, 5, 5),
240258
(1, 1, 1),
241259
(1, 1, 1),
242260
(1, 1, 1),
243-
None,
261+
True,
244262
),
245263
(
246264
(32, 3, 32, 32, 32),
247265
(64, 3, 5, 5, 5),
248266
(3, 2, 2),
249267
(4, 3, 3),
250268
(2, 2, 1),
251-
None,
269+
False,
252270
),
253271
]
254272
args = get_args()
@@ -260,6 +278,7 @@ def test_bang(lib, test_cases):
260278
infiniopTensorDescriptor_t,
261279
infiniopTensorDescriptor_t,
262280
infiniopTensorDescriptor_t,
281+
infiniopTensorDescriptor_t,
263282
c_void_p,
264283
c_void_p,
265284
c_void_p,
@@ -274,6 +293,7 @@ def test_bang(lib, test_cases):
274293
c_void_p,
275294
c_void_p,
276295
c_void_p,
296+
c_void_p,
277297
]
278298
lib.infiniopDestroyConvDescriptor.restype = c_int32
279299
lib.infiniopDestroyConvDescriptor.argtypes = [

0 commit comments

Comments
 (0)