Skip to content

Commit 8068970

Browse files
committed
modified uint64 bang
1 parent f8f6214 commit 8068970

File tree

2 files changed

+8
-5
lines changed

2 files changed

+8
-5
lines changed

operatorspy/tests/random_sample.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,10 @@ class RandomSampleDescriptor(Structure):
3030

3131

3232
def random_sample(data, random_val, topp, topk, voc, temperature, torch_device):
33-
indices = torch.zeros([topk], dtype = torch.int64)
33+
if(torch_device == "cuda"):
34+
indices = torch.zeros([topk], dtype = torch.uint64)
35+
else:
36+
indices = torch.zeros([topk], dtype = torch.int64)
3437
dataNp = data.clone().detach()
3538
sorted_indices = torch.arange(voc)
3639

@@ -52,7 +55,7 @@ def random_sample(data, random_val, topp, topk, voc, temperature, torch_device):
5255

5356
globalM = dataNp[0]
5457
dataNp = (dataNp - globalM) / temperature
55-
dataNp = torch.softmax(dataNp.float(), dim = 0)
58+
dataNp = torch.softmax(dataNp, dim = 0)
5659
sum_s = 0
5760
for end in range(topk):
5861
sum_s += dataNp[end]
@@ -88,15 +91,15 @@ def test(lib, handle, torch_device, voc, random_val, topp, topk, temperature, x_
8891
ans = random_sample(data.to("cpu"), random_val, topp, topk, voc, temperature, "cpu")
8992
else:
9093
ans = random_sample_0(data)
91-
if(torch_device == 'mlu' or torch_device == 'npu'):
94+
if(torch_device != "cuda"):
9295

9396
indices = torch.zeros([1], dtype = torch.int64).to(torch_device)
9497
else:
9598

9699
indices = torch.zeros([1], dtype = torch.uint64).to(torch_device)
97100
x_tensor = to_tensor(data, lib)
98101
indices_tensor = to_tensor(indices, lib)
99-
if(torch_device == 'mlu' or torch_device == 'npu'):
102+
if(torch_device == 'mlu'):
100103
indices_tensor.descriptor.contents.dt = U64 # treat int64 as uint64
101104

102105

src/ops/utils.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ inline infiniopTensorDescriptor_t dim_merge(infiniopTensorDescriptor_t desc, uin
224224
// split the dimension dim of a tensor descriptor into multiple dimensions
225225
inline infiniopTensorDescriptor_t dim_split(infiniopTensorDescriptor_t desc, uint64_t dim, const std::vector<uint64_t> &dims) {
226226
uint64_t ndim = desc->ndim;
227-
if (desc->shape[dim] != std::accumulate(dims.begin(), dims.end(), (uint64_t)1, std::multiplies{})) {
227+
if (desc->shape[dim] != std::accumulate(dims.begin(), dims.end(), (uint64_t) 1, std::multiplies{})) {
228228
return nullptr;
229229
}
230230
uint64_t new_ndim = ndim + dims.size() - 1;

0 commit comments

Comments
 (0)