Skip to content

Commit ff9f528

Browse files
[CIR] Added support for psrldqi
1 parent 31700c8 commit ff9f528

File tree

1 file changed

+43
-1
lines changed

1 file changed

+43
-1
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,48 @@ static mlir::Value emitX86SExtMask(CIRGenFunction &cgf, mlir::Value op,
158158
return cgf.getBuilder().createCast(loc, cir::CastKind::integral, mask, dstTy);
159159
}
160160

161+
static mlir::Value emitX86PSRLDQIByteShift(CIRGenFunction &cgf,
162+
const CallExpr *E,
163+
ArrayRef<mlir::Value> Ops) {
164+
auto &builder = cgf.getBuilder();
165+
auto resultType = cast<cir::VectorType>(Ops[0].getType());
166+
auto loc = cgf.getLoc(E->getExprLoc());
167+
unsigned shiftVal = getIntValueFromConstOp(Ops[1]) & 0xff;
168+
169+
// If psrldq is shifting the vector more than 15 bytes, emit zero.
170+
if (shiftVal >= 16)
171+
return builder.getZero(loc, resultType);
172+
173+
auto numElts = resultType.getSize() * 8;
174+
assert(numElts % 16 == 0 && "Expected a multiple of 16");
175+
176+
llvm::SmallVector<int64_t, 64> indices;
177+
178+
// This correlates to the OG CodeGen
179+
// As stated in the OG, 256/512-bit psrldq operates on 128-bit lanes.
180+
// So we have to make sure we handle it.
181+
for (unsigned l = 0; l < numElts; l += 16) {
182+
for (unsigned i = 0; i < 16; ++i) {
183+
unsigned idx = i + shiftVal;
184+
if (idx >= 16)
185+
idx += numElts - 16;
186+
indices.push_back(idx + l);
187+
}
188+
}
189+
190+
auto byteVecTy = cir::VectorType::get(builder.getSInt8Ty(), numElts);
191+
mlir::Value byteCast = builder.createBitcast(Ops[0], byteVecTy);
192+
mlir::Value zero = builder.getZero(loc, byteVecTy);
193+
194+
// Perform the shuffle (right shift by inserting zeros from the left)
195+
mlir::Value shuffleResult =
196+
builder.createVecShuffle(loc, byteCast, zero, indices);
197+
198+
// Cast back to original type
199+
return builder.createBitcast(shuffleResult, resultType);
200+
}
201+
202+
161203
mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID,
162204
const CallExpr *E) {
163205
if (BuiltinID == Builtin::BI__builtin_cpu_is)
@@ -1112,7 +1154,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID,
11121154
case X86::BI__builtin_ia32_psrldqi128_byteshift:
11131155
case X86::BI__builtin_ia32_psrldqi256_byteshift:
11141156
case X86::BI__builtin_ia32_psrldqi512_byteshift:
1115-
llvm_unreachable("psrldqi NYI");
1157+
emitX86PSRLDQIByteShift(*this, E, Ops);
11161158
case X86::BI__builtin_ia32_kshiftliqi:
11171159
case X86::BI__builtin_ia32_kshiftlihi:
11181160
case X86::BI__builtin_ia32_kshiftlisi:

0 commit comments

Comments
 (0)