diff --git a/include/core/allocator.h b/include/core/allocator.h index 002601d..12c06b2 100644 --- a/include/core/allocator.h +++ b/include/core/allocator.h @@ -27,6 +27,7 @@ namespace infini { // TODO:可能需要设计一个数据结构来存储free block,以便于管理和合并 // HINT: 可以使用一个 map 来存储 free block,key 为 block 的起始/结尾地址,value 为 block 的大小 // =================================== 作业 =================================== + std::map free_block; public: Allocator(Runtime runtime); diff --git a/my_test/Makefile b/my_test/Makefile new file mode 100644 index 0000000..1e83905 --- /dev/null +++ b/my_test/Makefile @@ -0,0 +1,11 @@ +FUZDIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) + +test: test.o + $(FUZDIR)/test.o + +test.o: + echo $(FUZDIR) + g++ $(FUZDIR)/test.cpp -o $(FUZDIR)/test.o + +clean: + rm $(FUZDIR)/test.o \ No newline at end of file diff --git a/my_test/test.cpp b/my_test/test.cpp new file mode 100644 index 0000000..9c1b3c1 --- /dev/null +++ b/my_test/test.cpp @@ -0,0 +1,10 @@ +#include +#include + +int main(){ + std::map free_block{{0,20}, {123,40}, {200,100}}; + int addr = 40; + auto next = free_block.lower_bound(addr); + std::cout << std::prev(next)->first << std::endl; + return 0; +} \ No newline at end of file diff --git a/src/core/allocator.cc b/src/core/allocator.cc index ff593ae..3cd5785 100644 --- a/src/core/allocator.cc +++ b/src/core/allocator.cc @@ -5,14 +5,16 @@ namespace infini { Allocator::Allocator(Runtime runtime) : runtime(runtime) { - used = 0; - peak = 0; - ptr = nullptr; + used = 0; // 当前使用的内存大小 + peak = 0; // 峰值使用内存 + ptr = nullptr; // 实际分配的大内存块起始指针 // 'alignment' defaults to sizeof(uint64_t), because it is the length of // the longest data type currently supported by the DataType field of // the tensor alignment = sizeof(uint64_t); + + free_block[0] = 1024 * 1024 * 1024; } Allocator::~Allocator() @@ -32,8 +34,26 @@ namespace infini // =================================== 作业 =================================== // TODO: 设计一个算法来分配内存,返回起始地址偏移量 // =================================== 作业 =================================== + for(auto it=free_block.begin();it!=free_block.end();++it){ + size_t addr = it->first; + size_t blk_size = it->second; + if(blk_size >= size){ + free_block.erase(it); + if(blk_size > size){ + free_block[addr + size] = blk_size + size; + } + used = size; + peak = std::max(peak, used); + return addr; + } + } - return 0; + // 若没有空闲块,从used开始分配 + size_t addr = used; + used += size; + peak = std::max(peak, used); + + return addr; } void Allocator::free(size_t addr, size_t size) @@ -44,6 +64,30 @@ namespace infini // =================================== 作业 =================================== // TODO: 设计一个算法来回收内存 // =================================== 作业 =================================== + auto next = free_block.lower_bound(addr); // 找到第一个 >= addr 的块 + auto prev = next == free_block.begin() ? free_block.end() : std::prev(next); + + bool merge_prev = (prev != free_block.end() && prev->first + prev->second == addr); + bool merge_next = (next != free_block.end() && addr + size == next->first); + + if (merge_prev && merge_next) { + // 向前向后都能合并 + prev->second += size + next->second; + free_block.erase(next); + } else if (merge_prev) { + // 只能向前合并 + prev->second += size; + } else if (merge_next) { + // 只能向后合并 + size += next->second; + free_block.erase(next); + free_block[addr] = size; + } else { + // 不能合并,新插入一个空闲块 + free_block[addr] = size; + } + + used -= size; } void *Allocator::getPtr() diff --git a/src/core/graph.cc b/src/core/graph.cc index 3a90637..7703cd2 100644 --- a/src/core/graph.cc +++ b/src/core/graph.cc @@ -1,4 +1,6 @@ #include "core/graph.h" +#include "operators/matmul.h" +#include "operators/transpose.h" #include #include #include @@ -38,6 +40,7 @@ namespace infini string GraphObj::toString() const { + std::cout << "Call toString\n"; std::ostringstream oss; oss << "Graph Tensors:\n"; for (const auto &tensor : tensors) @@ -56,6 +59,7 @@ namespace infini oss << ", succ " << vecToString(succs); oss << ", " << op << "\n"; } + std::cout << "end\n"; return oss.str(); } @@ -106,6 +110,110 @@ namespace infini // 1. 去除冗余的算子(例如,两个相邻的算子都是 transpose 算子,且做的是相反的操作,可以将其全部删除) // 2. 合并算子(例如,矩阵乘算子中含有属性transA、transB,如果其输入存在transpose,且对最后两个维度做交换,就可以将transpose融入到矩阵乘算子的属性中去) // =================================== 作业 =================================== + const OpVec &operators = getOperators(); + + std::set> opsToRemove; + std::set> tensorsToRemove; + + // 替换输入连接的待处理记录 + std::vector, std::shared_ptr>> toReplace; + for (auto &op : operators) { + if (opsToRemove.count(op)) continue; + + auto outTensor = op->getOutput(); + if (!outTensor) continue; + auto targets = outTensor->getTargets(); + if (targets.empty()) continue; + + auto targetOp = targets[0]; // 仅处理第一个后继 + + // === 1. 相邻 transpose 互逆 + if (op->getOpType().underlying() == 10 && targetOp->getOpType().underlying() == 10) { + auto t1 = std::dynamic_pointer_cast(op); + auto t2 = std::dynamic_pointer_cast(targetOp); + if (!t1 || !t2) continue; + + const auto &perm1 = t1->getPermute(); + const auto &perm2 = t2->getPermute(); + if (perm1.size() != perm2.size()) continue; + + bool inverse = true; + for (size_t i = 0; i < perm1.size(); ++i) { + if ((size_t)perm2[perm1[i]] != i) { + inverse = false; + break; + } + } + + if (inverse) { + opsToRemove.insert(t1); + opsToRemove.insert(t2); + tensorsToRemove.insert(outTensor); + tensorsToRemove.insert(targetOp->getOutput()); + + toReplace.emplace_back(t2->getOutput(), t1->getInputs(0)); + } + } + + // === 2. 融合 transpose 到 matmul + if (op->getOpType().underlying() == 10 && targetOp->getOpType().underlying() == 7) { + auto trans = std::dynamic_pointer_cast(op); + auto matmul = std::dynamic_pointer_cast(targetOp); + if (!trans || !matmul) continue; + + const auto &perm = trans->getPermute(); + size_t sz = perm.size(); + if (sz >= 2 && perm[sz - 1] == static_cast(sz - 2) && + perm[sz - 2] == static_cast(sz - 1)) { + if (matmul->getInputs(0) == trans->getOutput()) { + matmul->setTransA(true); + toReplace.emplace_back(trans->getOutput(), trans->getInputs(0)); + } else if (matmul->getInputs(1) == trans->getOutput()) { + matmul->setTransB(true); + toReplace.emplace_back(trans->getOutput(), trans->getInputs(0)); + } + + opsToRemove.insert(trans); + tensorsToRemove.insert(trans->getOutput()); + } + } + } + + // === 替换连接 + for (auto &pair : toReplace) { + auto &oldT = pair.first; + auto &newT = pair.second; + + std::vector> targetOps = oldT->getTargets(); + + for (auto &op : targetOps) { + op->replaceInput(oldT, newT); + oldT->removeTarget(op); + + newT->addTarget(op); + + // 用 find/erase 操作原始 oldT->targets(先复制成可变 vector) + auto allTargets = oldT->getTargets(); + auto it = std::find(allTargets.begin(), allTargets.end(), op); + if (it != allTargets.end()) { + allTargets.erase(it); + } + } + } + + // === 删除算子 + for (auto &op : opsToRemove) { + auto out = op->getOutput(); + if (out && out->getSource() == op) { + out->setSource(nullptr); + } + removeOperator(op); + } + + // === 删除张量 + for (auto &tensor : tensorsToRemove) { + removeTensor(tensor); + } } Tensor GraphObj::getTensor(int fuid) const @@ -152,6 +260,31 @@ namespace infini // TODO:利用 allocator 给计算图分配内存 // HINT: 获取分配好的内存指针后,可以调用 tensor 的 setDataBlob 函数给 tensor 绑定内存 // =================================== 作业 =================================== + size_t all_size = 0; + std::vector, size_t>> tensor_sizes; + for(auto &tensor : tensors){ + size_t tensor_size = tensor->getBytes(); // 获取张量大小 + all_size += tensor_size; + tensor_sizes.emplace_back(tensor, tensor_size); + } + + // 一次性分配所有内存 + size_t base_addr = allocator.alloc(all_size); + void* base_ptr = allocator.getPtr(); + + + // 为每个tensor分配内存并绑定 + size_t current_offset = 0; + for(auto &[tensor, size] : tensor_sizes) { + // 计算当前tensor的内存地址 + void* tensor_data_ptr = static_cast(base_ptr) + current_offset; + + // 创建blob并绑定到tensor + Blob blob = Ref(new BlobObj(runtime, tensor_data_ptr)); + tensor->setDataBlob(blob); + + current_offset += size; + } allocator.info(); } diff --git a/src/core/runtime.cc b/src/core/runtime.cc index bd88d90..797d126 100644 --- a/src/core/runtime.cc +++ b/src/core/runtime.cc @@ -2,7 +2,6 @@ #include "core/blob.h" #include "core/kernel.h" #include "core/graph.h" -#include "core/kernel.h" #include #include #include diff --git a/src/operators/concat.cc b/src/operators/concat.cc index d196330..439eb39 100644 --- a/src/operators/concat.cc +++ b/src/operators/concat.cc @@ -17,6 +17,12 @@ optional> ConcatObj::inferShape(const TensorVec &inputs) { // TODO:修改 dims,返回正确的 concat 后的 shape // REF: https://onnx.ai/onnx/operators/onnx__Concat.html#concat-13 // =================================== 作业 =================================== + for (size_t i = 1; i < inputs.size(); ++i) { + const auto& input_dims = inputs[i]->getDims(); + + // 累加连接维度的大小 + dims[dim] += input_dims[dim]; + } return {{dims}}; } diff --git a/src/operators/matmul.cc b/src/operators/matmul.cc index 7a16ca2..aa1042e 100644 --- a/src/operators/matmul.cc +++ b/src/operators/matmul.cc @@ -27,7 +27,35 @@ namespace infini // TODO:返回经过 matmul 操作后的 shape // REF: https://github.com/onnx/onnx/blob/main/docs/Operators.md#gemm // =================================== 作业 =================================== - return std::nullopt; + Tensor A = inputs[0]; + Tensor B = inputs[1]; + + int rankA = A->getRank(); + int rankB = B->getRank(); + + _IT_ASSERT_2(rankA>=2 || rankB>=2,"The input tensor dimension for matrix multiplication should be at least 2 dimensions"); + + Shape outputshape(rankA); + for(int i=0;igetDims()[i]; + int dimB = B->getDims()[i]; + _IT_ASSERT_2((dimA == dimB || dimA == 1 || dimB == 1), "Dimension does not meet broadcast requirements"); + outputshape[i] = std::max(dimA, dimB); + } + + // 获取乘法操作时A的行、列 + int A_M = transA?A->getDims()[rankA-1]:A->getDims()[rankA-2]; + int A_N = transA?A->getDims()[rankA-2]:A->getDims()[rankA-1]; + // 获取乘法操作时B的行、列 + int B_M = transB?B->getDims()[rankA-1]:B->getDims()[rankA-2]; + int B_N = transB?B->getDims()[rankA-2]:B->getDims()[rankA-1]; + + _IT_ASSERT_2(A_N == B_M, "Matrix multiplication dimension does not match"); + + outputshape[rankA - 2] = A_M; + outputshape[rankA - 1] = B_N; + + return std::vector{outputshape}; } } // namespace infini \ No newline at end of file diff --git a/src/operators/transpose.cc b/src/operators/transpose.cc index faab2b6..a8a98e8 100644 --- a/src/operators/transpose.cc +++ b/src/operators/transpose.cc @@ -33,8 +33,11 @@ namespace infini // TODO:修改 output_dim,返回正确的 transpose 后的 shape // REF: https://onnx.ai/onnx/operators/onnx__Transpose.html#transpose-21 // =================================== 作业 =================================== + for(int i=0;i{output_dim}; } std::string TransposeObj::toString() const diff --git a/src/operators/unary.cc b/src/operators/unary.cc index 3daad36..e446597 100644 --- a/src/operators/unary.cc +++ b/src/operators/unary.cc @@ -39,7 +39,13 @@ namespace infini // TODO:返回经过 clip 操作后的 shape // REF: https://onnx.ai/onnx/operators/onnx__Clip.html#clip-13 // =================================== 作业 =================================== - return std::nullopt; + auto A = inputs[0]; + + // 获取输入张量的维度 + Shape inputShape = A->getDims(); + + // 返回包含这个单一输出形状的 vector + return std::vector{inputShape}; } std::string ClipObj::toString() const @@ -66,7 +72,90 @@ namespace infini // REF_FILE: src/core/operator.cc // REF: https://onnx.ai/onnx/operators/onnx__Cast.html#cast-21 // =================================== 作业 =================================== - return {}; + DataType data_type; + switch (castType) + { + case CastType::Float2Float16: + data_type = DataType::Float16; + break; + case CastType::Float2Int64: + data_type = DataType::Int64; + break; + case CastType::Float2Int32: + data_type = DataType::Int32; + break; + case CastType::Float2Int16: + data_type = DataType::Int16; + break; + case CastType::Float2Int8: + data_type = DataType::Int8; + break; + case CastType::Float2BFloat16: + data_type = DataType::Float16; + break; + case CastType::Int322Float: + data_type = DataType::Float32; + break; + case CastType::Int322Int8: + data_type = DataType::Int8; + break; + case CastType::Int322Int16: + data_type = DataType::Int16; + break; + case CastType::Int322Int64: + data_type = DataType::Int64; + break; + case CastType::Int162Float: + data_type = DataType::Float32; + break; + case CastType::Int162Int32: + data_type = DataType::Int32; + break; + case CastType::Int82Float: + data_type = DataType::Float32; + break; + case CastType::Int82Int16: + data_type = DataType::Int16; + break; + case CastType::Int82Int32: + data_type = DataType::Int32; + break; + case CastType::Uint82Float: + data_type = DataType::Float32; + break; + case CastType::Uint82Int32: + data_type = DataType::Int32; + break; + case CastType::Uint82Int64: + data_type = DataType::Int64; + break; + case CastType::Int642Int32: + data_type = DataType::Int32; + break; + case CastType::Int642Uint32: + data_type = DataType::UInt32; + break; + case CastType::Int642Float: + data_type = DataType::Float32; + break; + case CastType::Uint322Int64: + data_type = DataType::Int64; + break; + case CastType::Float162Float: + data_type = DataType::Float32; + break; + case CastType::BFloat162Float: + data_type = DataType::Float32; + break; + case CastType::Float2Float: + data_type = DataType::Float32; + break; + default: + data_type = DataType::Undefine; + break; + } + + return {data_type}; } optional> CastObj::inferShape(const TensorVec &inputs) @@ -75,7 +164,9 @@ namespace infini // TODO:返回经过 cast 操作后的 shape // REF: https://onnx.ai/onnx/operators/onnx__Cast.html#cast-21 // =================================== 作业 =================================== - return std::nullopt; + auto A = inputs[0]; + Shape outputshape = A->getDims(); + return std::vector{outputshape}; } std::string CastObj::toString() const diff --git a/src/utils/operator_utils.cc b/src/utils/operator_utils.cc index edbd2c8..5bbb16b 100644 --- a/src/utils/operator_utils.cc +++ b/src/utils/operator_utils.cc @@ -9,8 +9,36 @@ Shape infer_broadcast(const Shape &A, const Shape &B) { // TODO:对 A 和 B 进行双向广播,返回广播后的形状。 // REF: https://github.com/onnx/onnx/blob/main/docs/Broadcasting.md // =================================== 作业 =================================== + int rankA = A.size(); + int rankB = B.size(); + int maxrank = std::max(rankA, rankB); + vector alignedA, alignedB; + + for(int i=0;iprint(); g->optimize(); // 优化后 - g->print(); + // g->print(); + EXPECT_EQ(g->getOperators().size(), 1); EXPECT_EQ(g->getTensors().size(), 3); EXPECT_EQ(g->getOperators()[0]->getOpType().underlying(), 7); diff --git a/test/kernels/nativecpu/test_nativecpu_concat.cc b/test/kernels/nativecpu/test_nativecpu_concat.cc index fc87fb1..472b269 100644 --- a/test/kernels/nativecpu/test_nativecpu_concat.cc +++ b/test/kernels/nativecpu/test_nativecpu_concat.cc @@ -15,14 +15,27 @@ TEST(Concat, NativeCpu) { auto t3 = g->addTensor({2, 2, 2, 1}, DataType::Float32); auto op = g->addOp(TensorVec{t1, t2, t3}, nullptr, 2); g->dataMalloc(); + std::cout << "Tensor sizes: " + << "t1=" << t1->getBytes() << " bytes, " + << "t2=" << t2->getBytes() << " bytes, " + << "t3=" << t3->getBytes() << " bytes" << std::endl; t1->setData(IncrementalGenerator()); t2->setData(OneGenerator()); t3->setData(OneGenerator()); + // 调试:检查输入数据 + std::cout << "Input data check:" << std::endl; + t1->printData(); + t2->printData(); + t3->printData(); runtime->run(g); + std::cout << "Output data:" << std::endl; + op->getOutput()->printData(); + std::cout << "testing" << std::endl; EXPECT_TRUE(op->getOutput()->equalData( vector{0, 1, 2, 1, 1, 1, 3, 4, 5, 1, 1, 1, 6, 7, 8, 1, 1, 1, 9, 10, 11, 1, 1, 1})); + std::cout << "tested" << std::endl; } } // namespace infini