Skip to content

Commit d98e6a9

Browse files
committed
恢复文件权限
1 parent 7a9f0a5 commit d98e6a9

39 files changed

+1898
-207
lines changed

include/ops/clip/clip.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ __C __export infiniopStatus_t infiniopCreateClipDescriptor(infiniopHandle_t hand
1515
infiniopTensorDescriptor_t y
1616
);
1717

18-
__C __export infiniopStatus_t infiniopClip(infiniopClipDescriptor_t desc, void const *x, void *min, void *max, void *y, void *stream);
18+
__C __export infiniopStatus_t infiniopClip(infiniopClipDescriptor_t desc, void *x, float *min, float *max, void *y, void *stream);
1919

2020
__C __export infiniopStatus_t infiniopDestroyClipDescriptor(infiniopClipDescriptor_t desc);
2121

include/ops/gather/gather.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ __C __export infiniopStatus_t infiniopCreateGatherDescriptor(infiniopHandle_t ha
1717
int64_t axis
1818
);
1919

20-
__C __export infiniopStatus_t infiniopGather(infiniopGatherDescriptor_t desc, void const *x, void *indices, void *y, void *stream);
20+
__C __export infiniopStatus_t infiniopGather(infiniopGatherDescriptor_t desc, void *x, void *indices, void *y, void *stream);
2121

2222
__C __export infiniopStatus_t infiniopDestroyGatherDescriptor(infiniopGatherDescriptor_t desc);
2323

include/ops/reducemax/reducemax.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ __C __export infiniopStatus_t infiniopCreateReducemaxDescriptor(infiniopHandle_t
1919
bool noop_with_empty_axes
2020
);
2121

22-
__C __export infiniopStatus_t infiniopReducemax(infiniopReducemaxDescriptor_t desc, void *y, void const *x, void const *dynamic_axes, uint64_t dynamic_axes_size, void *stream);
22+
__C __export infiniopStatus_t infiniopReducemax(infiniopReducemaxDescriptor_t desc, void *y, void *x, void *dynamic_axes, uint64_t dynamic_axes_size, void *stream);
2323

2424
__C __export infiniopStatus_t infiniopDestroyReducemaxDescriptor(infiniopReducemaxDescriptor_t desc);
2525
#endif

include/ops/reducemean/reducemean.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ __C __export infiniopStatus_t infiniopCreateReducemeanDescriptor(infiniopHandle_
1919
bool noop_with_empty_axes
2020
);
2121

22-
__C __export infiniopStatus_t infiniopReducemean(infiniopReducemeanDescriptor_t desc, void *dst, void const *src, void const *dynamic_axes, uint64_t dynamic_axes_size, void *stream);
22+
__C __export infiniopStatus_t infiniopReducemean(infiniopReducemeanDescriptor_t desc, void *dst, void *src, void *dynamic_axes, uint64_t dynamic_axes_size, void *stream);
2323

2424
__C __export infiniopStatus_t infiniopDestroyReducemeanDescriptor(infiniopReducemeanDescriptor_t desc);
2525
#endif

include/ops/reducemin/reducemin.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ __C __export infiniopStatus_t infiniopCreateReduceminDescriptor(infiniopHandle_t
1919
bool noop_with_empty_axes
2020
);
2121

22-
__C __export infiniopStatus_t infiniopReducemin(infiniopReduceminDescriptor_t desc, void *dst, void const *src, void const *dynamic_axes, uint64_t dynamic_axes_size, void *stream);
22+
__C __export infiniopStatus_t infiniopReducemin(infiniopReduceminDescriptor_t desc, void *dst, void *src, void *dynamic_axes, uint64_t dynamic_axes_size, void *stream);
2323

2424
__C __export infiniopStatus_t infiniopDestroyReduceminDescriptor(infiniopReduceminDescriptor_t desc);
2525
#endif

operatorspy/tests/clip.py

Lines changed: 48 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from ctypes import POINTER, Structure, c_int32, c_void_p, c_uint64, c_bool
1+
from ctypes import POINTER, Structure, c_int32, c_void_p, c_uint64, c_bool, c_float
22
import ctypes
33
import sys
44
import os
@@ -21,7 +21,7 @@
2121
from typing import Tuple
2222
import numpy as np
2323

24-
PROFILE = False
24+
PROFILE = True
2525
NUM_PRERUN = 10
2626
NUM_ITERATIONS = 1000
2727

@@ -46,29 +46,30 @@ def test(
4646
x_shape,
4747
min,
4848
max,
49-
tensor_dtype=torch.float16
49+
tensor_dtype=torch.float32
5050
):
5151
print(
5252
f"Testing clip on {torch_device} with x_shape:{x_shape} dtype:{tensor_dtype} max:{max} min:{min}"
5353
)
54-
x = torch.randn(x_shape, dtype=tensor_dtype, device=torch_device)
55-
output = torch.randn(x_shape, dtype=tensor_dtype, device=torch_device)
54+
x = torch.randn(x_shape, dtype=torch.float32, device=torch_device)
55+
56+
output = torch.randn(x_shape, dtype=torch.float32, device=torch_device)
5657
if min != None:
57-
min = torch.tensor(min, dtype=torch.float32, device=torch_device)
58+
min_t = torch.tensor(min, dtype=torch.float32, device=torch_device)
5859
else:
59-
min = torch.tensor(float("-inf"), dtype=torch.float32, device=torch_device)
60+
min_t = torch.tensor(float("-inf"), dtype=torch.float32, device=torch_device)
6061
if max != None:
61-
max = torch.tensor(max, dtype=torch.float32, device=torch_device)
62+
max_t = torch.tensor(max, dtype=torch.float32, device=torch_device)
6263
else:
63-
max = torch.tensor(float("inf"), dtype=torch.float32, device=torch_device)
64+
max_t = torch.tensor(float("inf"), dtype=torch.float32, device=torch_device)
6465
for i in range(NUM_PRERUN if PROFILE else 1):
6566
if min == None and max == None:
6667
break
67-
ans = clip(x, min, max)
68+
ans = clip(x, min_t, max_t)
6869
if PROFILE:
6970
start_time = time.time()
7071
for i in range(NUM_ITERATIONS):
71-
_ = clip(x, min, max)
72+
_ = clip(x, min_t, max_t)
7273
elapsed = (time.time() - start_time) / NUM_ITERATIONS
7374
print(f"pytorch time: {elapsed :10f}")
7475
x_tensor = to_tensor(x, lib)
@@ -82,15 +83,16 @@ def test(
8283
y_tensor.descriptor,
8384
)
8485
)
86+
#Ss = [1024, 2048, 4096]
8587
x_tensor.descriptor.contents.invalidate()
8688
y_tensor.descriptor.contents.invalidate()
8789
for i in range(NUM_PRERUN if PROFILE else 1):
8890
check_error(
8991
lib.infiniopClip(
9092
descriptor,
9193
x_tensor.data,
92-
min.data_ptr() if min != None else None,
93-
max.data_ptr() if max != None else None,
94+
ctypes.byref(c_float(min)) if min != None else None,
95+
ctypes.byref(c_float(max)) if max != None else None,
9496
y_tensor.data,
9597
None,
9698
)
@@ -102,37 +104,50 @@ def test(
102104
lib.infiniopClip(
103105
descriptor,
104106
x_tensor.data,
105-
min.data_ptr() if min != None else None,
106-
max.data_ptr() if max != None else None,
107+
ctypes.byref(c_float(min)) if min != None else None,
108+
ctypes.byref(c_float(max)) if max != None else None,
107109
y_tensor.data,
108110
None,
109111
)
110112
)
111113
elapsed = (time.time() - start_time) / NUM_ITERATIONS
112114
print(f"lib time: {elapsed :10f}")
113-
print("x:", x)
114-
print("custom op ans:", output)
115-
print("ans:", ans) if max != None or min != None else print("ans:", x)
116115
assert torch.allclose(output, ans, atol=0, rtol=0) if max != None or min != None else torch.allclose(output, x, atol=0, rtol=0)
117116
check_error(lib.infiniopDestroyClipDescriptor(descriptor))
118117

119118
def test_cpu(lib, test_cases):
120119
device = DeviceEnum.DEVICE_CPU
121120
handle = create_handle(lib, device)
122-
for x_shape, min, max in test_cases:
123-
test(lib, handle, "cpu", x_shape, min, max, tensor_dtype=torch.float16)
124-
print("\n")
125-
#test(lib, handle, "cpu", x_shape, axes, tensor_dtype=torch.float32)
121+
for x_shape, min, max, tensor_type in test_cases:
122+
test(lib, handle, "cpu", x_shape, min, max, tensor_dtype=tensor_type)
123+
destroy_handle(lib, handle)
124+
125+
def test_cuda(lib, test_cases):
126+
device = DeviceEnum.DEVICE_CUDA
127+
handle = create_handle(lib, device)
128+
for x_shape, min, max, tensor_type in test_cases:
129+
test(lib, handle, "cuda", x_shape, min, max, tensor_dtype=tensor_type)
126130
destroy_handle(lib, handle)
127131

128132

129133
if __name__ == "__main__":
130134
test_cases = [
131-
((3, 4), -1, 1),
132-
((3, 4), None, 1),
133-
((3, 4), -1, None),
134-
((3, 4), None, None)
135-
# stride =
135+
((3, 4), -1, 1, torch.float32),
136+
((3, 4), None, 1, torch.float32),
137+
((3, 4), -1, None, torch.float32),
138+
((3, 4), None, None, torch.float32),
139+
((16), -1, 1, torch.float32),
140+
((1024, 1024), -1, 1, torch.float32),
141+
((4096, 4096), -1, 1, torch.float32),
142+
143+
((13), -1, 1, torch.float32),
144+
((3, 4), -1, 1, torch.float16),
145+
((3, 4), None, 1, torch.float16),
146+
((3, 4), -1, None, torch.float16),
147+
((3, 4), None, None, torch.float16),
148+
((16), -1, 1, torch.float16),
149+
((1024, 1024), -1, 1, torch.float16),
150+
((4096, 4096), -1, 1, torch.float16),
136151
]
137152
args = get_args()
138153
lib = open_lib()
@@ -141,6 +156,7 @@ def test_cpu(lib, test_cases):
141156
infiniopHandle_t,
142157
POINTER(infiniopClipDescriptor_t),
143158
infiniopTensorDescriptor_t,
159+
infiniopTensorDescriptor_t
144160
]
145161
lib.infiniopClip.restype = c_int32
146162
lib.infiniopClip.argtypes = [
@@ -149,8 +165,12 @@ def test_cpu(lib, test_cases):
149165
c_void_p,
150166
c_void_p,
151167
c_void_p,
168+
c_void_p
152169
]
153170
lib.infiniopDestroyClipDescriptor.restype = c_int32
154171
lib.infiniopDestroyClipDescriptor.argtypes = [infiniopClipDescriptor_t]
155-
test_cpu(lib, test_cases)
172+
if args.cuda:
173+
test_cuda(lib, test_cases)
174+
if args.cpu:
175+
test_cpu(lib, test_cases)
156176
print("All tests passed!")

operatorspy/tests/gather.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ def test(
107107
elapsed = (time.time() - start_time) / NUM_ITERATIONS
108108
print(f"lib time: {elapsed :10f}")
109109
print(f"pytorch ans: {ans}")
110-
print(f"lib ans: {dst_tensor.data}")
110+
print(f"lib ans: {dst}")
111111
assert torch.allclose(dst, ans, atol=0, rtol=0)
112112
check_error(lib.infiniopDestroyGatherDescriptor(descriptor))
113113

operatorspy/tests/reducemax.py

Lines changed: 43 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@
2121
from typing import Tuple
2222
import numpy as np
2323

24-
PROFILE = False
24+
PROFILE = True
2525
NUM_PRERUN = 1
26-
NUM_ITERATIONS = 1
26+
NUM_ITERATIONS = 50
2727

2828
class ReducemaxDescriptor(Structure):
2929
_fields_ = [("device", c_int32)]
@@ -113,7 +113,6 @@ def test(
113113
c_bool(noop_with_empty_axes),
114114
)
115115
)
116-
print(f"op desctiptor created")
117116
x_tensor.descriptor.contents.invalidate()
118117
y_tensor.descriptor.contents.invalidate()
119118
for i in range(NUM_PRERUN if PROFILE else 1):
@@ -142,41 +141,57 @@ def test(
142141
)
143142
elapsed = (time.time() - start_time) / NUM_ITERATIONS
144143
print(f"lib time: {elapsed :10f}")
145-
print(f"custom op output:{y}")
146-
print(f"pytorch output:{ans}")
147-
assert torch.allclose(y, ans, atol=0, rtol=1e-3)
148-
144+
# print(f"input : {x}")
145+
# print(f"custom op output:{y}")
146+
# print(f"pytorch output:{ans}")
149147
check_error(lib.infiniopDestroyReducemaxDescriptor(descriptor))
148+
assert torch.allclose(y, ans, atol=0, rtol=1e-3)
150149

151150
def test_cpu(lib, test_cases):
152151
device = DeviceEnum.DEVICE_CPU
153152
handle = create_handle(lib, device)
154-
for x_shape, axes, noop_with_empty_axes, keepdims, dynamic_axes in test_cases:
155-
print(dynamic_axes)
156-
test(lib, handle, "cpu", x_shape, axes, dynamic_axes, noop_with_empty_axes, keepdims, tensor_dtype=torch.float16)
153+
for x_shape, axes, noop_with_empty_axes, keepdims, dynamic_axes, tensor_dtype in test_cases:
154+
test(lib, handle, "cpu", x_shape, axes, dynamic_axes, noop_with_empty_axes, keepdims, tensor_dtype=tensor_dtype)
157155
print("\n")
158156
#test(lib, handle, "cpu", x_shape, axes, tensor_dtype=torch.float32)
159157
destroy_handle(lib, handle)
160158

159+
def test_cuda(lib, test_cases):
160+
device = DeviceEnum.DEVICE_CUDA
161+
handle = create_handle(lib, device)
162+
for x_shape, axes, noop_with_empty_axes, keepdims, dynamic_axes, tensor_dtype in test_cases:
163+
test(lib, handle, "cuda", x_shape, axes, dynamic_axes, noop_with_empty_axes, keepdims, tensor_dtype=tensor_dtype)
164+
print("\n")
165+
destroy_handle(lib, handle)
161166

162167
if __name__ == "__main__":
163168
test_cases = [
164169
# dynamic calc test eg
165-
((2, 3, 4, 5), [0, 2], False, True, None),
166-
((2, 3, 4, 5), [0, 2], False, True, None),
167-
#(input_shape, axis, noop_with_empty_axes, keepdims, dynamic_axes)
168-
((2, 10, 24, 10), [0, 2], False, True, None),
169-
# stride =
170-
((2, 10, 24, 10), [0, 1], False, True, None),
171-
((2, 10, 24, 10), [2, 3], False , True, None),
172-
((2, 10, 24, 10), [0, 1, 2, 3], False, True, None),
173-
# validate attribute noop_with_empty_axes and keepdims
174-
((2, 10, 24, 10), None, True, True, None),
175-
((2, 10, 24, 10), None, True, False, None),
176-
((2, 10, 24, 10), None, False, True, None),
177-
((2, 10, 24, 10), None, False, False, None),
178-
((2, 3, 4), [0, 1], False, False, None),
170+
# ((2, 3, 4, 5), [0, 2], False, True, None),
171+
# ((2, 3, 4, 5), [0, 2], False, True, None),
172+
# #(input_shape, axis, noop_with_empty_axes, keepdims, dynamic_axes)
173+
# ((2, 10, 24, 10), [0, 2], False, True, None),
174+
# # stride =
175+
# ((2, 10, 24, 10), [0, 1], False, True, None),
176+
# ((2, 10, 24, 10), [2, 3], False , True, None),
177+
# ((2, 10, 24, 10), [0, 1, 2, 3], False, True, None),
178+
# # validate attribute noop_with_empty_axes and keepdims
179+
# ((2, 10, 24, 10), None, True, True, None),
180+
# ((2, 10, 24, 10), None, True, False, None),
181+
# ((2, 10, 24, 10), None, False, True, None),
182+
# ((2, 10, 24, 10), None, False, False, None),
183+
# ((2, 3, 4), [0, 1], False, False, None),
179184
#((2, 10, 24, 10), [], True),
185+
#((4,), [0], False, False, None, torch.float32),
186+
((1000, 300), [0, 1], False, False, None, torch.float16),
187+
((50, 3), [0, 1], False, False, None, torch.float16),
188+
((1000, 300), [0, 1], False, False, None, torch.float16),
189+
((2000, 200, 50), [0, 1], False, True, None, torch.float32),
190+
((1000, 200, 500), [0, 1], False, True, None, torch.float16),
191+
((1000, 200, 50), [0, 1], False, True, None, torch.float32),
192+
((20, 3, 4, 5), [0, 2], False, False, None, torch.float32),
193+
((20, 30, 40, 5), [0, 2, 3], False, False, None, torch.float32),
194+
((200, 3, 40, 5), [0, 3], False, False, None, torch.float32),
180195
]
181196
args = get_args()
182197
lib = open_lib()
@@ -202,5 +217,8 @@ def test_cpu(lib, test_cases):
202217
]
203218
lib.infiniopDestroyReducemaxDescriptor.restype = c_int32
204219
lib.infiniopDestroyReducemaxDescriptor.argtypes = [infiniopReducemaxDescriptor_t]
205-
test_cpu(lib, test_cases)
220+
if args.cpu:
221+
test_cpu(lib, test_cases)
222+
if args.cuda:
223+
test_cuda(lib, test_cases)
206224
print("All tests passed!")

0 commit comments

Comments
 (0)