Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 0 additions & 5 deletions cudaq/include/cudaq/Target/CompileTarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,6 @@ class CompileTarget {
/// Whether to fully specialize the kernel.
bool fullySpecialize = true;

/// Whether this target is a local simulator (not remote, not emulated). On
/// this path `i1` vector arguments are packed as bit-packed
/// `std::vector<bool>`.
bool isLocalSimulator = false;

/// Set the `changeSemantics` flag for the argument synthesis pass.
bool argumentSynthChangeSemantics = true;

Expand Down
16 changes: 15 additions & 1 deletion python/runtime/cudaq/platform/py_alt_launch_kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1010,7 +1010,21 @@ cudaq::OpaqueArguments cudaq::marshal_arguments_for_module_launch(
unsigned pos) {
return linkResolvedCallable(mod, kernelFunc, pos, pyArg);
};
if (isLocalSimulator)
// Two encodings, one per execution mode (see PackingStyle):
// - Direct launch (argsCreator): the kernel keeps live argument uses that
// are supplied at runtime through the generated `.argsCreator`/thunk,
// whose "C++ side magic" understands a host `std::vector<bool>` for an
// `i1` vector. Used only for local simulators with un-synthesized args.
// - Argument synthesis (the default): the arguments are folded into the
// kernel as constants by `ArgumentConverter`, which reads every vector as
// the universal `{begin, end, capacity}` triple and therefore must be
// given the triple-compatible `std::vector<char>` for an `i1` vector
// (never the bit-packed `std::vector<bool>` specialization).
// A kernel whose formal arguments are all unused is synthesized
// (`isFullySynthesized`); otherwise a local simulator direct-launches it.
const bool directLaunch =
isLocalSimulator && !cudaq::opt::factory::isFullySynthesized(kernelFunc);
if (directLaunch)
cudaq::packArgs<cudaq::PackingStyle::argsCreator>(args, runtimeArgs,
kernelFunc, handler);
else
Expand Down
1 change: 0 additions & 1 deletion runtime/cudaq/platform/quantum_platform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,6 @@ getDefaultPythonCompileTargetImpl() {
bool isLocalSimulator = !(platform->is_remote() || platform->is_emulated());

ct->fullySpecialize = !isLocalSimulator;
ct->isLocalSimulator = isLocalSimulator;
ct->supportDeviceCalls = true;
ct->argumentSynthChangeSemantics = false;
ct->pipelineConfig.codegenTranslation = "qir:";
Expand Down
91 changes: 30 additions & 61 deletions runtime/internal/compiler/ArgumentConversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,15 +97,15 @@ static Value genConstant(OpBuilder &builder, const std::string &v,

// Forward declare aggregate type builder as they can be recursive.
static Value genRecursiveSpan(OpBuilder &, cudaq::cc::StdvecType, void *,
ModuleOp, llvm::DataLayout &, bool);
ModuleOp, llvm::DataLayout &);
static Value genConstant(OpBuilder &, cudaq::cc::StdvecType, void *, ModuleOp,
llvm::DataLayout &, bool);
llvm::DataLayout &);
static Value genConstant(OpBuilder &, cudaq::cc::StructType, void *, ModuleOp,
llvm::DataLayout &, bool);
llvm::DataLayout &);
static Value genConstant(OpBuilder &, cudaq::cc::ArrayType, void *, ModuleOp,
llvm::DataLayout &, bool);
llvm::DataLayout &);
static Value genConstant(OpBuilder &, cudaq::cc::CallableType, void *, ModuleOp,
llvm::DataLayout &, bool);
llvm::DataLayout &);

/// Create callee.init_N that initializes the state
///
Expand Down Expand Up @@ -524,7 +524,7 @@ static bool isSupportedRecursiveSpan(cudaq::cc::StdvecType ty) {

// Recursive step processing of aggregates.
Value dispatchSubtype(OpBuilder &builder, Type ty, void *p, ModuleOp substMod,
llvm::DataLayout &layout, bool boolVecBitPacked = false) {
llvm::DataLayout &layout) {
auto *ctx = builder.getContext();
return TypeSwitch<Type, Value>(ty)
.Case([&](IntegerType intTy) -> Value {
Expand Down Expand Up @@ -567,16 +567,16 @@ Value dispatchSubtype(OpBuilder &builder, Type ty, void *p, ModuleOp substMod,
substMod);
})
.Case([&](cudaq::cc::StdvecType ty) {
return genConstant(builder, ty, p, substMod, layout, boolVecBitPacked);
return genConstant(builder, ty, p, substMod, layout);
})
.Case([&](cudaq::cc::StructType ty) {
return genConstant(builder, ty, p, substMod, layout, boolVecBitPacked);
return genConstant(builder, ty, p, substMod, layout);
})
.Case([&](cudaq::cc::ArrayType ty) {
return genConstant(builder, ty, p, substMod, layout, boolVecBitPacked);
return genConstant(builder, ty, p, substMod, layout);
})
.Case([&](cudaq::cc::CallableType ty) {
return genConstant(builder, ty, p, substMod, layout, boolVecBitPacked);
return genConstant(builder, ty, p, substMod, layout);
})
.Default({});
}
Expand All @@ -597,40 +597,21 @@ static std::size_t getHostSideElementSize(Type eleTy,
}

/// Recursively builds an `ArrayAttr` containing the constants.
///
/// Set \p boolVecBitPacked when an `i1` vector arg is a host
/// `std::vector<bool>` (bit-packed; not the `{begin, end, capacity}` triple).
ArrayAttr genRecursiveConstantArray(OpBuilder &builder,
cudaq::cc::StdvecType vecTy, void *p,
llvm::DataLayout &layout,
bool boolVecBitPacked = false) {
auto eleTy = vecTy.getElementType();

// Bit-packed `std::vector<bool>`: read via the container API, not a triple.
if (boolVecBitPacked && eleTy.isInteger(1)) {
auto *boolVec = reinterpret_cast<const std::vector<bool> *>(p);
if (boolVec->empty())
return {};
auto intTy = cast<IntegerType>(eleTy);
SmallVector<Attribute> members;
members.reserve(boolVec->size());
for (bool bit : *boolVec)
members.push_back(IntegerAttr::get(intTy, bit ? 1 : 0));
return ArrayAttr::get(builder.getContext(), members);
}

llvm::DataLayout &layout) {
typedef const char *VectorType[3];
VectorType *vecPtr = static_cast<VectorType *>(p);
auto delta = (*vecPtr)[1] - (*vecPtr)[0];
if (!delta)
return {};
auto eleTy = vecTy.getElementType();
unsigned stepBy = 0;
std::function<Attribute(char *)> genAttr;
if (auto innerTy = dyn_cast<cudaq::cc::StdvecType>(eleTy)) {
stepBy = sizeof(VectorType);
genAttr = [&, innerTy](char *p) -> Attribute {
return genRecursiveConstantArray(builder, innerTy, p, layout,
boolVecBitPacked);
return genRecursiveConstantArray(builder, innerTy, p, layout);
};
} else if (auto stringTy = dyn_cast<cudaq::cc::CharspanType>(eleTy)) {
stepBy = sizeof(std::string);
Expand Down Expand Up @@ -707,10 +688,8 @@ static Type convertRecursiveSpanType(Type ty) {
/// constant propagation through the recursive span structure. The reify
/// operation will be lowered to more primitive ops on an as-needed basis.
Value genRecursiveSpan(OpBuilder &builder, cudaq::cc::StdvecType ty, void *p,
ModuleOp substMod, llvm::DataLayout &layout,
bool boolVecBitPacked = false) {
ArrayAttr constants =
genRecursiveConstantArray(builder, ty, p, layout, boolVecBitPacked);
ModuleOp substMod, llvm::DataLayout &layout) {
ArrayAttr constants = genRecursiveConstantArray(builder, ty, p, layout);
auto loc = builder.getUnknownLoc();
if (!constants) {
// Empty vector. Not much to contemplate here.
Expand All @@ -726,11 +705,9 @@ Value genRecursiveSpan(OpBuilder &builder, cudaq::cc::StdvecType ty, void *p,
}

Value genConstant(OpBuilder &builder, cudaq::cc::StdvecType vecTy, void *p,
ModuleOp substMod, llvm::DataLayout &layout,
bool boolVecBitPacked = false) {
ModuleOp substMod, llvm::DataLayout &layout) {
if (isSupportedRecursiveSpan(vecTy))
return genRecursiveSpan(builder, vecTy, p, substMod, layout,
boolVecBitPacked);
return genRecursiveSpan(builder, vecTy, p, substMod, layout);
typedef const char *VectorType[3];
VectorType *vecPtr = static_cast<VectorType *>(p);
auto delta = (*vecPtr)[1] - (*vecPtr)[0];
Expand All @@ -750,7 +727,7 @@ Value genConstant(OpBuilder &builder, cudaq::cc::StdvecType vecTy, void *p,
for (std::int32_t i = 0; i < vecSize; ++i) {
if (Value val = dispatchSubtype(
builder, eleTy, static_cast<void *>(const_cast<char *>(cursor)),
substMod, layout, boolVecBitPacked)) {
substMod, layout)) {
auto atLoc = cudaq::cc::ComputePtrOp::create(
builder, loc, elePtrTy, buffer,
ArrayRef<cudaq::cc::ComputePtrArg>{i});
Expand All @@ -763,8 +740,7 @@ Value genConstant(OpBuilder &builder, cudaq::cc::StdvecType vecTy, void *p,
}

Value genConstant(OpBuilder &builder, cudaq::cc::StructType strTy, void *p,
ModuleOp substMod, llvm::DataLayout &layout,
bool boolVecBitPacked = false) {
ModuleOp substMod, llvm::DataLayout &layout) {
if (strTy.getMembers().empty())
return {};
const char *cursor = static_cast<const char *>(p);
Expand All @@ -776,16 +752,15 @@ Value genConstant(OpBuilder &builder, cudaq::cc::StructType strTy, void *p,
builder, iter.value(),
static_cast<void *>(const_cast<char *>(
cursor + cudaq::opt::getDataOffset(layout, strTy, i))),
substMod, layout, boolVecBitPacked))
substMod, layout))
aggie =
cudaq::cc::InsertValueOp::create(builder, loc, strTy, aggie, v, i);
}
return aggie;
}

Value genConstant(OpBuilder &builder, cudaq::cc::CallableType callTy, void *p,
ModuleOp substMod, llvm::DataLayout &layout,
bool boolVecBitPacked = false) {
ModuleOp substMod, llvm::DataLayout &layout) {
if (!p)
return {};
auto loc = builder.getUnknownLoc();
Expand All @@ -812,7 +787,7 @@ Value genConstant(OpBuilder &builder, cudaq::cc::CallableType callTy, void *p,
if (hasLiftedArgs) {
for (unsigned i = liftedPos, j = 0; i < liftedArity; ++i, ++j) {
Value v = dispatchSubtype(builder, calleeInpTys[i], closureArgs[j],
substMod, layout, boolVecBitPacked);
substMod, layout);
assert(v && "lifted argument must be handled");
args.push_back(v);
}
Expand All @@ -831,8 +806,7 @@ Value genConstant(OpBuilder &builder, cudaq::cc::CallableType callTy, void *p,
}

Value genConstant(OpBuilder &builder, cudaq::cc::ArrayType arrTy, void *p,
ModuleOp substMod, llvm::DataLayout &layout,
bool boolVecBitPacked = false) {
ModuleOp substMod, llvm::DataLayout &layout) {
if (arrTy.isUnknownSize())
return {};
auto eleTy = arrTy.getElementType();
Expand All @@ -844,7 +818,7 @@ Value genConstant(OpBuilder &builder, cudaq::cc::ArrayType arrTy, void *p,
for (std::size_t i = 0; i < arrSize; ++i) {
if (Value v = dispatchSubtype(
builder, eleTy, static_cast<void *>(const_cast<char *>(cursor)),
substMod, layout, boolVecBitPacked))
substMod, layout))
aggie =
cudaq::cc::InsertValueOp::create(builder, loc, arrTy, aggie, v, i);
cursor += eleSize;
Expand Down Expand Up @@ -880,9 +854,8 @@ Value genConstant(OpBuilder &builder, cudaq::cc::IndirectCallableType indCallTy,
//===----------------------------------------------------------------------===//

cudaq_internal::compiler::ArgumentConverter::ArgumentConverter(
StringRef kernelName, ModuleOp sourceModule, bool boolVecBitPacked)
: sourceModule(sourceModule), kernelName(kernelName),
boolVecBitPacked(boolVecBitPacked) {}
StringRef kernelName, ModuleOp sourceModule)
: sourceModule(sourceModule), kernelName(kernelName) {}

void cudaq_internal::compiler::ArgumentConverter::gen(
std::span<void *const> arguments) {
Expand Down Expand Up @@ -981,23 +954,19 @@ void cudaq_internal::compiler::ArgumentConverter::gen(
return {};
})
.Case([&](cudaq::cc::StdvecType ty) {
return buildSubst(ty, argPtr, substModule, dataLayout,
boolVecBitPacked);
return buildSubst(ty, argPtr, substModule, dataLayout);
})
.Case([&](cudaq::cc::StructType ty) {
return buildSubst(ty, argPtr, substModule, dataLayout,
boolVecBitPacked);
return buildSubst(ty, argPtr, substModule, dataLayout);
})
.Case([&](cudaq::cc::ArrayType ty) {
return buildSubst(ty, argPtr, substModule, dataLayout,
boolVecBitPacked);
return buildSubst(ty, argPtr, substModule, dataLayout);
})
.Case([&](cudaq::cc::IndirectCallableType ty) {
return buildSubst(ty, argPtr, substModule, dataLayout);
})
.Case([&](cudaq::cc::CallableType ty) {
return buildSubst(ty, argPtr, substModule, dataLayout,
boolVecBitPacked);
return buildSubst(ty, argPtr, substModule, dataLayout);
})
.Default({});
if (subst)
Expand Down
6 changes: 1 addition & 5 deletions runtime/internal/compiler/Compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,11 +224,7 @@ cudaq_internal::compiler::Compiler::prepareModule(const std::string &kernelName,
// For quantum devices, we generate a collection of `init` and
// `num_qubits` functions and their substitutions created
// from a kernel and arguments that generated a state argument.
// Local simulators marshal `i1` vectors as bit-packed `std::vector<bool>`
// (argsCreator); remote/emulated targets use `std::vector<char>`.
const bool boolVecBitPacked = target->isLocalSimulator;
cudaq_internal::compiler::ArgumentConverter argCon(kernelName, moduleOp,
boolVecBitPacked);
cudaq_internal::compiler::ArgumentConverter argCon(kernelName, moduleOp);
// Must stay in scope as `eraseNonCallableArguments` may populate it
std::vector<void *> closureArgs;
if (cudaq::opt::factory::isFullySynthesized(epFunc)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,7 @@ class ArgumentConverter {
public:
/// Build an instance to create argument substitutions for a specified \p
/// kernelName in \p sourceModule.
///
/// Set \p boolVecBitPacked when `i1` vector arguments are host
/// `std::vector<bool>` (local-simulator launch path), not
/// `std::vector<char>`.
ArgumentConverter(mlir::StringRef kernelName, mlir::ModuleOp sourceModule,
bool boolVecBitPacked = false);
ArgumentConverter(mlir::StringRef kernelName, mlir::ModuleOp sourceModule);

~ArgumentConverter() {
for (auto *kInfo : kernelSubstitutions) {
Expand Down Expand Up @@ -115,10 +110,6 @@ class ArgumentConverter {

/// Kernel we are substituting the arguments for.
mlir::StringRef kernelName;

/// Whether `i1` vector arguments are bit-packed `std::vector<bool>` (vs
/// `std::vector<char>`). See the constructor.
bool boolVecBitPacked;
};

/// Merge modules from any CallableClosureArgument arguments into \p intoModule.
Expand Down
21 changes: 2 additions & 19 deletions runtime/test/test_argument_conversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,7 @@ void dumpSubstitutionModules(ArgumentConverter &con) {

void doSimpleTest(mlir::MLIRContext *ctx, const std::string &typeName,
std::vector<void *> args,
const std::string &additionalCode = "",
bool boolVecBitPacked = false) {
const std::string &additionalCode = "") {
std::string code = additionalCode + R"#(
func.func private @callee(%0: )#" +
typeName + R"#()
Expand All @@ -167,7 +166,7 @@ func.func @__nvqpp__mlirgen__testy(%0: )#" +
// Create the Module
auto mod = mlir::parseSourceString<mlir::ModuleOp>(code, ctx);
llvm::outs() << "Source module:\n" << *mod << '\n';
ArgumentConverter ab{"testy", *mod, boolVecBitPacked};
ArgumentConverter ab{"testy", *mod};
// Create the argument conversions
ab.gen(args);
// Dump all conversions
Expand Down Expand Up @@ -401,22 +400,6 @@ void test_vectors(mlir::MLIRContext *ctx) {
// CHECK: }
// clang-format on

{
// Real bit-packed `std::vector<bool>`, as the local-simulator launch path
// passes it. Reading this as a `{begin, end, capacity}` triple corrupts the
// heap; `boolVecBitPacked` selects the correct reader.
std::vector<bool> x = {true, false, true, true};
std::vector<void *> v = {static_cast<void *>(&x)};
doSimpleTest(ctx, "!cc.stdvec<i1>", v, /*additionalCode=*/"",
/*boolVecBitPacked=*/true);
}
// clang-format off
// CHECK-LABEL: cc.arg_subst[0] {
// CHECK: %[[VAL_0:.*]] = cc.const_array [true, false, true, true] : !cc.array<i1 x ?>
// CHECK: %[[VAL_1:.*]] = cc.reify_span %[[VAL_0]] : (!cc.array<i1 x ?>) -> !cc.stdvec<i1>
// CHECK: }
// clang-format on

{
std::vector<std::vector<cudaq::pauli_word>> x = {
{cudaq::pauli_word{"XX"}, cudaq::pauli_word{"XY"}},
Expand Down
Loading