modified uint64 bang

xgqdut2016 · xgqdut2016 · commit 80689708d304 · 2024-12-18T14:48:14.000+08:00
diff --git a/operatorspy/tests/random_sample.py b/operatorspy/tests/random_sample.py
@@ -30,7 +30,10 @@ class RandomSampleDescriptor(Structure):
 
 
 def random_sample(data, random_val, topp, topk, voc, temperature, torch_device):
-    indices = torch.zeros([topk], dtype = torch.int64)
+    if(torch_device == "cuda"):
+        indices = torch.zeros([topk], dtype = torch.uint64)
+    else:
+        indices = torch.zeros([topk], dtype = torch.int64)
     dataNp = data.clone().detach()
     sorted_indices = torch.arange(voc)
     
@@ -52,7 +55,7 @@ def random_sample(data, random_val, topp, topk, voc, temperature, torch_device):
     
     globalM = dataNp[0]
     dataNp = (dataNp - globalM) / temperature
-    dataNp = torch.softmax(dataNp.float(), dim = 0)
+    dataNp = torch.softmax(dataNp, dim = 0)
     sum_s = 0
     for end in range(topk):
         sum_s += dataNp[end]
@@ -88,15 +91,15 @@ def test(lib, handle, torch_device, voc, random_val, topp, topk, temperature, x_
         ans = random_sample(data.to("cpu"), random_val, topp, topk, voc, temperature, "cpu")
     else:
         ans = random_sample_0(data)
-    if(torch_device == 'mlu' or torch_device == 'npu'):
+    if(torch_device != "cuda"):
         
         indices = torch.zeros([1], dtype = torch.int64).to(torch_device)
     else:
         
         indices = torch.zeros([1], dtype = torch.uint64).to(torch_device)
     x_tensor = to_tensor(data, lib)
     indices_tensor = to_tensor(indices, lib)
-    if(torch_device == 'mlu' or torch_device == 'npu'):
+    if(torch_device == 'mlu'):
         indices_tensor.descriptor.contents.dt = U64 # treat int64 as uint64
     
     
diff --git a/src/ops/utils.h b/src/ops/utils.h
@@ -224,7 +224,7 @@ inline infiniopTensorDescriptor_t dim_merge(infiniopTensorDescriptor_t desc, uin
 // split the dimension dim of a tensor descriptor into multiple dimensions
 inline infiniopTensorDescriptor_t dim_split(infiniopTensorDescriptor_t desc, uint64_t dim, const std::vector<uint64_t> &dims) {
     uint64_t ndim = desc->ndim;
-    if (desc->shape[dim] != std::accumulate(dims.begin(), dims.end(), (uint64_t)1, std::multiplies{})) {
+    if (desc->shape[dim] != std::accumulate(dims.begin(), dims.end(), (uint64_t) 1, std::multiplies{})) {
         return nullptr;
     }
     uint64_t new_ndim = ndim + dims.size() - 1;

Original file line number	Diff line number	Diff line change
`@@ -224,7 +224,7 @@ inline infiniopTensorDescriptor_t dim_merge(infiniopTensorDescriptor_t desc, uin`
`224`	`224`	`// split the dimension dim of a tensor descriptor into multiple dimensions`
`225`	`225`	`inline infiniopTensorDescriptor_t dim_split(infiniopTensorDescriptor_t desc, uint64_t dim, const std::vector<uint64_t> &dims) {`
`226`	`226`	`uint64_t ndim = desc->ndim;`
`227`		`- if (desc->shape[dim] != std::accumulate(dims.begin(), dims.end(), (uint64_t)1, std::multiplies{})) {`
	`227`	`+ if (desc->shape[dim] != std::accumulate(dims.begin(), dims.end(), (uint64_t) 1, std::multiplies{})) {`
`228`	`228`	`return nullptr;`
`229`	`229`	`}`
`230`	`230`	`uint64_t new_ndim = ndim + dims.size() - 1;`