@@ -158,6 +158,48 @@ static mlir::Value emitX86SExtMask(CIRGenFunction &cgf, mlir::Value op,
158158  return  cgf.getBuilder ().createCast (loc, cir::CastKind::integral, mask, dstTy);
159159}
160160
161+ static  mlir::Value emitX86PSRLDQIByteShift (CIRGenFunction &cgf,
162+                                            const  CallExpr *E,
163+                                            ArrayRef<mlir::Value> Ops) {
164+   auto  &builder = cgf.getBuilder ();
165+   auto  resultType = cast<cir::VectorType>(Ops[0 ].getType ());
166+   auto  loc = cgf.getLoc (E->getExprLoc ());
167+   unsigned  shiftVal = getIntValueFromConstOp (Ops[1 ]) & 0xff ;
168+ 
169+   //  If psrldq is shifting the vector more than 15 bytes, emit zero.
170+   if  (shiftVal >= 16 )
171+     return  builder.getZero (loc, resultType); 
172+ 
173+   auto  numElts = resultType.getSize () * 8 ;
174+   assert (numElts % 16  == 0  && " Expected a multiple of 16" 
175+ 
176+   llvm::SmallVector<int64_t , 64 > indices;
177+ 
178+   //  This correlates to the OG CodeGen
179+   //  As stated in the OG, 256/512-bit psrldq operates on 128-bit lanes.
180+   //  So we have to make sure we handle it. 
181+   for  (unsigned  l = 0 ; l < numElts; l += 16 ) {
182+     for  (unsigned  i = 0 ; i < 16 ; ++i) {
183+       unsigned  idx = i + shiftVal;
184+       if  (idx >= 16 )
185+         idx += numElts - 16 ;
186+       indices.push_back (idx + l);
187+     }
188+   }
189+ 
190+   auto  byteVecTy = cir::VectorType::get (builder.getSInt8Ty (), numElts);
191+   mlir::Value byteCast = builder.createBitcast (Ops[0 ], byteVecTy);
192+   mlir::Value zero = builder.getZero (loc, byteVecTy);
193+ 
194+   //  Perform the shuffle (right shift by inserting zeros from the left)
195+   mlir::Value shuffleResult =
196+       builder.createVecShuffle (loc, byteCast, zero, indices);
197+ 
198+   //  Cast back to original type
199+   return  builder.createBitcast (shuffleResult, resultType);
200+ }
201+ 
202+ 
161203mlir::Value CIRGenFunction::emitX86BuiltinExpr (unsigned  BuiltinID,
162204                                               const  CallExpr *E) {
163205  if  (BuiltinID == Builtin::BI__builtin_cpu_is)
@@ -1112,7 +1154,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned BuiltinID,
11121154  case  X86::BI__builtin_ia32_psrldqi128_byteshift:
11131155  case  X86::BI__builtin_ia32_psrldqi256_byteshift:
11141156  case  X86::BI__builtin_ia32_psrldqi512_byteshift:
1115-     llvm_unreachable ( " psrldqi NYI " 
1157+     emitX86PSRLDQIByteShift (* this , E, Ops );
11161158  case  X86::BI__builtin_ia32_kshiftliqi:
11171159  case  X86::BI__builtin_ia32_kshiftlihi:
11181160  case  X86::BI__builtin_ia32_kshiftlisi:
0 commit comments