diff --git a/cudaq/include/cudaq/Target/CompileTarget.h b/cudaq/include/cudaq/Target/CompileTarget.h index 95c1a9d9fdc..ffd1371f394 100644 --- a/cudaq/include/cudaq/Target/CompileTarget.h +++ b/cudaq/include/cudaq/Target/CompileTarget.h @@ -100,11 +100,6 @@ class CompileTarget { /// Whether to fully specialize the kernel. bool fullySpecialize = true; - /// Whether this target is a local simulator (not remote, not emulated). On - /// this path `i1` vector arguments are packed as bit-packed - /// `std::vector`. - bool isLocalSimulator = false; - /// Set the `changeSemantics` flag for the argument synthesis pass. bool argumentSynthChangeSemantics = true; diff --git a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp index 0175111c607..cd7edd0877c 100644 --- a/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp +++ b/python/runtime/cudaq/platform/py_alt_launch_kernel.cpp @@ -1010,7 +1010,21 @@ cudaq::OpaqueArguments cudaq::marshal_arguments_for_module_launch( unsigned pos) { return linkResolvedCallable(mod, kernelFunc, pos, pyArg); }; - if (isLocalSimulator) + // Two encodings, one per execution mode (see PackingStyle): + // - Direct launch (argsCreator): the kernel keeps live argument uses that + // are supplied at runtime through the generated `.argsCreator`/thunk, + // whose "C++ side magic" understands a host `std::vector` for an + // `i1` vector. Used only for local simulators with un-synthesized args. + // - Argument synthesis (the default): the arguments are folded into the + // kernel as constants by `ArgumentConverter`, which reads every vector as + // the universal `{begin, end, capacity}` triple and therefore must be + // given the triple-compatible `std::vector` for an `i1` vector + // (never the bit-packed `std::vector` specialization). + // A kernel whose formal arguments are all unused is synthesized + // (`isFullySynthesized`); otherwise a local simulator direct-launches it. + const bool directLaunch = + isLocalSimulator && !cudaq::opt::factory::isFullySynthesized(kernelFunc); + if (directLaunch) cudaq::packArgs(args, runtimeArgs, kernelFunc, handler); else diff --git a/runtime/cudaq/platform/quantum_platform.cpp b/runtime/cudaq/platform/quantum_platform.cpp index 45ceba25a31..bd04812a2bc 100644 --- a/runtime/cudaq/platform/quantum_platform.cpp +++ b/runtime/cudaq/platform/quantum_platform.cpp @@ -114,7 +114,6 @@ getDefaultPythonCompileTargetImpl() { bool isLocalSimulator = !(platform->is_remote() || platform->is_emulated()); ct->fullySpecialize = !isLocalSimulator; - ct->isLocalSimulator = isLocalSimulator; ct->supportDeviceCalls = true; ct->argumentSynthChangeSemantics = false; ct->pipelineConfig.codegenTranslation = "qir:"; diff --git a/runtime/internal/compiler/ArgumentConversion.cpp b/runtime/internal/compiler/ArgumentConversion.cpp index 32ec4e72864..dcb0f09efab 100644 --- a/runtime/internal/compiler/ArgumentConversion.cpp +++ b/runtime/internal/compiler/ArgumentConversion.cpp @@ -97,15 +97,15 @@ static Value genConstant(OpBuilder &builder, const std::string &v, // Forward declare aggregate type builder as they can be recursive. static Value genRecursiveSpan(OpBuilder &, cudaq::cc::StdvecType, void *, - ModuleOp, llvm::DataLayout &, bool); + ModuleOp, llvm::DataLayout &); static Value genConstant(OpBuilder &, cudaq::cc::StdvecType, void *, ModuleOp, - llvm::DataLayout &, bool); + llvm::DataLayout &); static Value genConstant(OpBuilder &, cudaq::cc::StructType, void *, ModuleOp, - llvm::DataLayout &, bool); + llvm::DataLayout &); static Value genConstant(OpBuilder &, cudaq::cc::ArrayType, void *, ModuleOp, - llvm::DataLayout &, bool); + llvm::DataLayout &); static Value genConstant(OpBuilder &, cudaq::cc::CallableType, void *, ModuleOp, - llvm::DataLayout &, bool); + llvm::DataLayout &); /// Create callee.init_N that initializes the state /// @@ -524,7 +524,7 @@ static bool isSupportedRecursiveSpan(cudaq::cc::StdvecType ty) { // Recursive step processing of aggregates. Value dispatchSubtype(OpBuilder &builder, Type ty, void *p, ModuleOp substMod, - llvm::DataLayout &layout, bool boolVecBitPacked = false) { + llvm::DataLayout &layout) { auto *ctx = builder.getContext(); return TypeSwitch(ty) .Case([&](IntegerType intTy) -> Value { @@ -567,16 +567,16 @@ Value dispatchSubtype(OpBuilder &builder, Type ty, void *p, ModuleOp substMod, substMod); }) .Case([&](cudaq::cc::StdvecType ty) { - return genConstant(builder, ty, p, substMod, layout, boolVecBitPacked); + return genConstant(builder, ty, p, substMod, layout); }) .Case([&](cudaq::cc::StructType ty) { - return genConstant(builder, ty, p, substMod, layout, boolVecBitPacked); + return genConstant(builder, ty, p, substMod, layout); }) .Case([&](cudaq::cc::ArrayType ty) { - return genConstant(builder, ty, p, substMod, layout, boolVecBitPacked); + return genConstant(builder, ty, p, substMod, layout); }) .Case([&](cudaq::cc::CallableType ty) { - return genConstant(builder, ty, p, substMod, layout, boolVecBitPacked); + return genConstant(builder, ty, p, substMod, layout); }) .Default({}); } @@ -597,40 +597,21 @@ static std::size_t getHostSideElementSize(Type eleTy, } /// Recursively builds an `ArrayAttr` containing the constants. -/// -/// Set \p boolVecBitPacked when an `i1` vector arg is a host -/// `std::vector` (bit-packed; not the `{begin, end, capacity}` triple). ArrayAttr genRecursiveConstantArray(OpBuilder &builder, cudaq::cc::StdvecType vecTy, void *p, - llvm::DataLayout &layout, - bool boolVecBitPacked = false) { - auto eleTy = vecTy.getElementType(); - - // Bit-packed `std::vector`: read via the container API, not a triple. - if (boolVecBitPacked && eleTy.isInteger(1)) { - auto *boolVec = reinterpret_cast *>(p); - if (boolVec->empty()) - return {}; - auto intTy = cast(eleTy); - SmallVector members; - members.reserve(boolVec->size()); - for (bool bit : *boolVec) - members.push_back(IntegerAttr::get(intTy, bit ? 1 : 0)); - return ArrayAttr::get(builder.getContext(), members); - } - + llvm::DataLayout &layout) { typedef const char *VectorType[3]; VectorType *vecPtr = static_cast(p); auto delta = (*vecPtr)[1] - (*vecPtr)[0]; if (!delta) return {}; + auto eleTy = vecTy.getElementType(); unsigned stepBy = 0; std::function genAttr; if (auto innerTy = dyn_cast(eleTy)) { stepBy = sizeof(VectorType); genAttr = [&, innerTy](char *p) -> Attribute { - return genRecursiveConstantArray(builder, innerTy, p, layout, - boolVecBitPacked); + return genRecursiveConstantArray(builder, innerTy, p, layout); }; } else if (auto stringTy = dyn_cast(eleTy)) { stepBy = sizeof(std::string); @@ -707,10 +688,8 @@ static Type convertRecursiveSpanType(Type ty) { /// constant propagation through the recursive span structure. The reify /// operation will be lowered to more primitive ops on an as-needed basis. Value genRecursiveSpan(OpBuilder &builder, cudaq::cc::StdvecType ty, void *p, - ModuleOp substMod, llvm::DataLayout &layout, - bool boolVecBitPacked = false) { - ArrayAttr constants = - genRecursiveConstantArray(builder, ty, p, layout, boolVecBitPacked); + ModuleOp substMod, llvm::DataLayout &layout) { + ArrayAttr constants = genRecursiveConstantArray(builder, ty, p, layout); auto loc = builder.getUnknownLoc(); if (!constants) { // Empty vector. Not much to contemplate here. @@ -726,11 +705,9 @@ Value genRecursiveSpan(OpBuilder &builder, cudaq::cc::StdvecType ty, void *p, } Value genConstant(OpBuilder &builder, cudaq::cc::StdvecType vecTy, void *p, - ModuleOp substMod, llvm::DataLayout &layout, - bool boolVecBitPacked = false) { + ModuleOp substMod, llvm::DataLayout &layout) { if (isSupportedRecursiveSpan(vecTy)) - return genRecursiveSpan(builder, vecTy, p, substMod, layout, - boolVecBitPacked); + return genRecursiveSpan(builder, vecTy, p, substMod, layout); typedef const char *VectorType[3]; VectorType *vecPtr = static_cast(p); auto delta = (*vecPtr)[1] - (*vecPtr)[0]; @@ -750,7 +727,7 @@ Value genConstant(OpBuilder &builder, cudaq::cc::StdvecType vecTy, void *p, for (std::int32_t i = 0; i < vecSize; ++i) { if (Value val = dispatchSubtype( builder, eleTy, static_cast(const_cast(cursor)), - substMod, layout, boolVecBitPacked)) { + substMod, layout)) { auto atLoc = cudaq::cc::ComputePtrOp::create( builder, loc, elePtrTy, buffer, ArrayRef{i}); @@ -763,8 +740,7 @@ Value genConstant(OpBuilder &builder, cudaq::cc::StdvecType vecTy, void *p, } Value genConstant(OpBuilder &builder, cudaq::cc::StructType strTy, void *p, - ModuleOp substMod, llvm::DataLayout &layout, - bool boolVecBitPacked = false) { + ModuleOp substMod, llvm::DataLayout &layout) { if (strTy.getMembers().empty()) return {}; const char *cursor = static_cast(p); @@ -776,7 +752,7 @@ Value genConstant(OpBuilder &builder, cudaq::cc::StructType strTy, void *p, builder, iter.value(), static_cast(const_cast( cursor + cudaq::opt::getDataOffset(layout, strTy, i))), - substMod, layout, boolVecBitPacked)) + substMod, layout)) aggie = cudaq::cc::InsertValueOp::create(builder, loc, strTy, aggie, v, i); } @@ -784,8 +760,7 @@ Value genConstant(OpBuilder &builder, cudaq::cc::StructType strTy, void *p, } Value genConstant(OpBuilder &builder, cudaq::cc::CallableType callTy, void *p, - ModuleOp substMod, llvm::DataLayout &layout, - bool boolVecBitPacked = false) { + ModuleOp substMod, llvm::DataLayout &layout) { if (!p) return {}; auto loc = builder.getUnknownLoc(); @@ -812,7 +787,7 @@ Value genConstant(OpBuilder &builder, cudaq::cc::CallableType callTy, void *p, if (hasLiftedArgs) { for (unsigned i = liftedPos, j = 0; i < liftedArity; ++i, ++j) { Value v = dispatchSubtype(builder, calleeInpTys[i], closureArgs[j], - substMod, layout, boolVecBitPacked); + substMod, layout); assert(v && "lifted argument must be handled"); args.push_back(v); } @@ -831,8 +806,7 @@ Value genConstant(OpBuilder &builder, cudaq::cc::CallableType callTy, void *p, } Value genConstant(OpBuilder &builder, cudaq::cc::ArrayType arrTy, void *p, - ModuleOp substMod, llvm::DataLayout &layout, - bool boolVecBitPacked = false) { + ModuleOp substMod, llvm::DataLayout &layout) { if (arrTy.isUnknownSize()) return {}; auto eleTy = arrTy.getElementType(); @@ -844,7 +818,7 @@ Value genConstant(OpBuilder &builder, cudaq::cc::ArrayType arrTy, void *p, for (std::size_t i = 0; i < arrSize; ++i) { if (Value v = dispatchSubtype( builder, eleTy, static_cast(const_cast(cursor)), - substMod, layout, boolVecBitPacked)) + substMod, layout)) aggie = cudaq::cc::InsertValueOp::create(builder, loc, arrTy, aggie, v, i); cursor += eleSize; @@ -880,9 +854,8 @@ Value genConstant(OpBuilder &builder, cudaq::cc::IndirectCallableType indCallTy, //===----------------------------------------------------------------------===// cudaq_internal::compiler::ArgumentConverter::ArgumentConverter( - StringRef kernelName, ModuleOp sourceModule, bool boolVecBitPacked) - : sourceModule(sourceModule), kernelName(kernelName), - boolVecBitPacked(boolVecBitPacked) {} + StringRef kernelName, ModuleOp sourceModule) + : sourceModule(sourceModule), kernelName(kernelName) {} void cudaq_internal::compiler::ArgumentConverter::gen( std::span arguments) { @@ -981,23 +954,19 @@ void cudaq_internal::compiler::ArgumentConverter::gen( return {}; }) .Case([&](cudaq::cc::StdvecType ty) { - return buildSubst(ty, argPtr, substModule, dataLayout, - boolVecBitPacked); + return buildSubst(ty, argPtr, substModule, dataLayout); }) .Case([&](cudaq::cc::StructType ty) { - return buildSubst(ty, argPtr, substModule, dataLayout, - boolVecBitPacked); + return buildSubst(ty, argPtr, substModule, dataLayout); }) .Case([&](cudaq::cc::ArrayType ty) { - return buildSubst(ty, argPtr, substModule, dataLayout, - boolVecBitPacked); + return buildSubst(ty, argPtr, substModule, dataLayout); }) .Case([&](cudaq::cc::IndirectCallableType ty) { return buildSubst(ty, argPtr, substModule, dataLayout); }) .Case([&](cudaq::cc::CallableType ty) { - return buildSubst(ty, argPtr, substModule, dataLayout, - boolVecBitPacked); + return buildSubst(ty, argPtr, substModule, dataLayout); }) .Default({}); if (subst) diff --git a/runtime/internal/compiler/Compiler.cpp b/runtime/internal/compiler/Compiler.cpp index 7bf586081ce..15959742790 100644 --- a/runtime/internal/compiler/Compiler.cpp +++ b/runtime/internal/compiler/Compiler.cpp @@ -224,11 +224,7 @@ cudaq_internal::compiler::Compiler::prepareModule(const std::string &kernelName, // For quantum devices, we generate a collection of `init` and // `num_qubits` functions and their substitutions created // from a kernel and arguments that generated a state argument. - // Local simulators marshal `i1` vectors as bit-packed `std::vector` - // (argsCreator); remote/emulated targets use `std::vector`. - const bool boolVecBitPacked = target->isLocalSimulator; - cudaq_internal::compiler::ArgumentConverter argCon(kernelName, moduleOp, - boolVecBitPacked); + cudaq_internal::compiler::ArgumentConverter argCon(kernelName, moduleOp); // Must stay in scope as `eraseNonCallableArguments` may populate it std::vector closureArgs; if (cudaq::opt::factory::isFullySynthesized(epFunc)) { diff --git a/runtime/internal/compiler/include/cudaq_internal/compiler/ArgumentConversion.h b/runtime/internal/compiler/include/cudaq_internal/compiler/ArgumentConversion.h index 20148e89fe9..8abf11f7096 100644 --- a/runtime/internal/compiler/include/cudaq_internal/compiler/ArgumentConversion.h +++ b/runtime/internal/compiler/include/cudaq_internal/compiler/ArgumentConversion.h @@ -51,12 +51,7 @@ class ArgumentConverter { public: /// Build an instance to create argument substitutions for a specified \p /// kernelName in \p sourceModule. - /// - /// Set \p boolVecBitPacked when `i1` vector arguments are host - /// `std::vector` (local-simulator launch path), not - /// `std::vector`. - ArgumentConverter(mlir::StringRef kernelName, mlir::ModuleOp sourceModule, - bool boolVecBitPacked = false); + ArgumentConverter(mlir::StringRef kernelName, mlir::ModuleOp sourceModule); ~ArgumentConverter() { for (auto *kInfo : kernelSubstitutions) { @@ -115,10 +110,6 @@ class ArgumentConverter { /// Kernel we are substituting the arguments for. mlir::StringRef kernelName; - - /// Whether `i1` vector arguments are bit-packed `std::vector` (vs - /// `std::vector`). See the constructor. - bool boolVecBitPacked; }; /// Merge modules from any CallableClosureArgument arguments into \p intoModule. diff --git a/runtime/test/test_argument_conversion.cpp b/runtime/test/test_argument_conversion.cpp index 863a0d710bc..82f3e3f814a 100644 --- a/runtime/test/test_argument_conversion.cpp +++ b/runtime/test/test_argument_conversion.cpp @@ -152,8 +152,7 @@ void dumpSubstitutionModules(ArgumentConverter &con) { void doSimpleTest(mlir::MLIRContext *ctx, const std::string &typeName, std::vector args, - const std::string &additionalCode = "", - bool boolVecBitPacked = false) { + const std::string &additionalCode = "") { std::string code = additionalCode + R"#( func.func private @callee(%0: )#" + typeName + R"#() @@ -167,7 +166,7 @@ func.func @__nvqpp__mlirgen__testy(%0: )#" + // Create the Module auto mod = mlir::parseSourceString(code, ctx); llvm::outs() << "Source module:\n" << *mod << '\n'; - ArgumentConverter ab{"testy", *mod, boolVecBitPacked}; + ArgumentConverter ab{"testy", *mod}; // Create the argument conversions ab.gen(args); // Dump all conversions @@ -401,22 +400,6 @@ void test_vectors(mlir::MLIRContext *ctx) { // CHECK: } // clang-format on - { - // Real bit-packed `std::vector`, as the local-simulator launch path - // passes it. Reading this as a `{begin, end, capacity}` triple corrupts the - // heap; `boolVecBitPacked` selects the correct reader. - std::vector x = {true, false, true, true}; - std::vector v = {static_cast(&x)}; - doSimpleTest(ctx, "!cc.stdvec", v, /*additionalCode=*/"", - /*boolVecBitPacked=*/true); - } - // clang-format off -// CHECK-LABEL: cc.arg_subst[0] { -// CHECK: %[[VAL_0:.*]] = cc.const_array [true, false, true, true] : !cc.array -// CHECK: %[[VAL_1:.*]] = cc.reify_span %[[VAL_0]] : (!cc.array) -> !cc.stdvec - // CHECK: } - // clang-format on - { std::vector> x = { {cudaq::pauli_word{"XX"}, cudaq::pauli_word{"XY"}},