@@ -8379,34 +8379,6 @@ void Lowering::WidenSIMD12IfNecessary(GenTreeLclVarCommon* node)
83798379#ifdef FEATURE_SIMD
83808380 if (node->TypeIs (TYP_SIMD12))
83818381 {
8382- // Assumption 1:
8383- // RyuJit backend depends on the assumption that on 64-Bit targets Vector3 size is rounded off
8384- // to TARGET_POINTER_SIZE and hence Vector3 locals on stack can be treated as TYP_SIMD16 for
8385- // reading and writing purposes.
8386- //
8387- // Assumption 2:
8388- // RyuJit backend is making another implicit assumption that Vector3 type args when passed in
8389- // registers or on stack, the upper most 4-bytes will be zero.
8390- //
8391- // For P/Invoke return and Reverse P/Invoke argument passing, native compiler doesn't guarantee
8392- // that upper 4-bytes of a Vector3 type struct is zero initialized and hence assumption 2 is
8393- // invalid.
8394- //
8395- // RyuJIT x64 Windows: arguments are treated as passed by ref and hence read/written just 12
8396- // bytes. In case of Vector3 returns, Caller allocates a zero initialized Vector3 local and
8397- // passes it retBuf arg and Callee method writes only 12 bytes to retBuf. For this reason,
8398- // there is no need to clear upper 4-bytes of Vector3 type args.
8399- //
8400- // RyuJIT x64 Unix: arguments are treated as passed by value and read/writen as if TYP_SIMD16.
8401- // Vector3 return values are returned two return registers and Caller assembles them into a
8402- // single xmm reg. Hence RyuJIT explicitly generates code to clears upper 4-bytes of Vector3
8403- // type args in prolog and Vector3 type return value of a call
8404- //
8405- // RyuJIT x86 Windows: all non-param Vector3 local vars are allocated as 16 bytes. Vector3 arguments
8406- // are pushed as 12 bytes. For return values, a 16-byte local is allocated and the address passed
8407- // as a return buffer pointer. The callee doesn't write the high 4 bytes, and we don't need to clear
8408- // it either.
8409-
84108382 if (comp->lvaMapSimd12ToSimd16 (node->AsLclVarCommon ()->GetLclNum ()))
84118383 {
84128384 JITDUMP (" Mapping TYP_SIMD12 lclvar node to TYP_SIMD16:\n " );
@@ -8706,20 +8678,22 @@ void Lowering::FindInducedParameterRegisterLocals()
87068678 assert (fld->GetLclOffs () <= comp->lvaLclExactSize (fld->GetLclNum ()));
87078679 unsigned structAccessedSize =
87088680 min (genTypeSize (fld), comp->lvaLclExactSize (fld->GetLclNum ()) - fld->GetLclOffs ());
8709- if ((segment. Offset != fld->GetLclOffs ()) || (structAccessedSize > segment.Size ) ||
8710- (varTypeUsesIntReg ( fld) != genIsValidIntReg ( segment.GetRegister ()) ))
8681+ if ((fld->GetLclOffs () < segment.Offset ) ||
8682+ (fld-> GetLclOffs () + structAccessedSize > segment.Offset + segment. Size ))
87118683 {
87128684 continue ;
87138685 }
87148686
8715- // This is a match, but check if it is already remapped.
8716- // TODO-CQ: If it is already remapped, we can reuse the value from
8717- // the remapping.
8718- if (comp-> FindParameterRegisterLocalMappingByRegister (segment. GetRegister ()) == nullptr )
8687+ // TODO-CQ: Float -> !float extractions are not supported
8688+ // TODO-CQ: Float -> float extractions with non-zero offset is not supported
8689+ if ( genIsValidFloatReg (segment. GetRegister ()) &&
8690+ (! varTypeUsesFloatReg (fld) || (fld-> GetLclOffs () != segment. Offset )) )
87198691 {
8720- regSegment = &segment ;
8692+ continue ;
87218693 }
87228694
8695+ // Found a register segment this field is contained in
8696+ regSegment = &segment;
87238697 break ;
87248698 }
87258699
@@ -8728,7 +8702,7 @@ void Lowering::FindInducedParameterRegisterLocals()
87288702 continue ;
87298703 }
87308704
8731- JITDUMP (" LCL_FLD use [%06u] of unenregisterable parameter corresponds to " , Compiler::dspTreeID (fld));
8705+ JITDUMP (" LCL_FLD use [%06u] of unenregisterable parameter is contained in " , Compiler::dspTreeID (fld));
87328706 DBEXEC (VERBOSE, regSegment->Dump ());
87338707 JITDUMP (" \n " );
87348708
@@ -8742,76 +8716,109 @@ void Lowering::FindInducedParameterRegisterLocals()
87428716 continue ;
87438717 }
87448718
8745- unsigned remappedLclNum = TryReuseLocalForParameterAccess (use, storedToLocals);
8719+ const ParameterRegisterLocalMapping* existingMapping =
8720+ comp->FindParameterRegisterLocalMappingByRegister (regSegment->GetRegister ());
87468721
8747- if (remappedLclNum == BAD_VAR_NUM)
8722+ unsigned remappedLclNum = BAD_VAR_NUM;
8723+ if (existingMapping == nullptr )
87488724 {
8749- // If we have seen register kills then avoid creating a new local.
8750- // The local is going to have to move from the parameter register
8751- // into a callee saved register, and the callee saved register will
8752- // need to be saved/restored to/from stack anyway.
8753- if (hasRegisterKill)
8725+ remappedLclNum = comp->lvaGrabTemp (
8726+ false DEBUGARG (comp->printfAlloc (" V%02u.%s" , fld->GetLclNum (), getRegName (regSegment->GetRegister ()))));
8727+
8728+ // We always use the full width for integer registers even if the
8729+ // width is shorter, because various places in the JIT will type
8730+ // accesses larger to generate smaller code.
8731+ var_types registerType =
8732+ genIsValidIntReg (regSegment->GetRegister ()) ? TYP_I_IMPL : regSegment->GetRegisterType ();
8733+ if ((registerType == TYP_I_IMPL) && varTypeIsGC (fld))
87548734 {
8755- JITDUMP (" ..but use happens after a call and is deemed unprofitable to create a local for\n " );
8756- continue ;
8735+ registerType = fld->TypeGet ();
87578736 }
87588737
8759- remappedLclNum = comp->lvaGrabTemp (
8760- false DEBUGARG (comp->printfAlloc (" V%02u.%s" , fld->GetLclNum (), getRegName (regSegment->GetRegister ()))));
8761- comp->lvaGetDesc (remappedLclNum)->lvType = fld->TypeGet ();
8738+ LclVarDsc* varDsc = comp->lvaGetDesc (remappedLclNum);
8739+ varDsc->lvType = genActualType (registerType);
87628740 JITDUMP (" Created new local V%02u for the mapping\n " , remappedLclNum);
8741+
8742+ comp->m_paramRegLocalMappings ->Emplace (regSegment, remappedLclNum, 0 );
8743+ varDsc->lvIsParamRegTarget = true ;
8744+
8745+ JITDUMP (" New mapping: " );
8746+ DBEXEC (VERBOSE, regSegment->Dump ());
8747+ JITDUMP (" -> V%02u\n " , remappedLclNum);
87638748 }
87648749 else
87658750 {
8766- JITDUMP (" Reusing local V%02u for store from struct parameter register %s. Store:\n " , remappedLclNum,
8767- getRegName (regSegment->GetRegister ()));
8768- DISPTREERANGE (LIR::AsRange (comp->fgFirstBB ), use.User ());
8751+ remappedLclNum = existingMapping->LclNum ;
8752+ }
87698753
8770- // The store will be a no-op, so get rid of it
8771- LIR::AsRange (comp->fgFirstBB ).Remove (use.User (), true );
8772- use = LIR::Use ();
8754+ GenTree* value = comp->gtNewLclVarNode (remappedLclNum);
87738755
8774- #ifdef DEBUG
8775- LclVarDsc* remappedLclDsc = comp->lvaGetDesc (remappedLclNum);
8776- remappedLclDsc->lvReason =
8777- comp->printfAlloc (" %s%sV%02u.%s" , remappedLclDsc->lvReason == nullptr ? " " : remappedLclDsc->lvReason ,
8778- remappedLclDsc->lvReason == nullptr ? " " : " " , fld->GetLclNum (),
8779- getRegName (regSegment->GetRegister ()));
8756+ if (varTypeUsesFloatReg (value))
8757+ {
8758+ assert (fld->GetLclOffs () == regSegment->Offset );
8759+
8760+ value->gtType = fld->TypeGet ();
8761+
8762+ #ifdef FEATURE_SIMD
8763+ // SIMD12s should be widened. We cannot do that with
8764+ // WidenSIMD12IfNecessary as it does not expect to see SIMD12
8765+ // accesses of SIMD16 locals here.
8766+ if (value->TypeIs (TYP_SIMD12))
8767+ {
8768+ value->gtType = TYP_SIMD16;
8769+ }
87808770#endif
87818771 }
8772+ else
8773+ {
8774+ var_types registerType = value->TypeGet ();
87828775
8783- comp->m_paramRegLocalMappings ->Emplace (regSegment, remappedLclNum, 0 );
8784- comp->lvaGetDesc (remappedLclNum)->lvIsParamRegTarget = true ;
8776+ if (fld->GetLclOffs () > regSegment->Offset )
8777+ {
8778+ assert (value->TypeIs (TYP_INT, TYP_LONG));
8779+ GenTree* shiftAmount = comp->gtNewIconNode ((fld->GetLclOffs () - regSegment->Offset ) * 8 , TYP_INT);
8780+ value = comp->gtNewOperNode (varTypeIsSmall (fld) && varTypeIsSigned (fld) ? GT_RSH : GT_RSZ,
8781+ value->TypeGet (), value, shiftAmount);
8782+ }
87858783
8786- JITDUMP (" New mapping: " );
8787- DBEXEC (VERBOSE, regSegment->Dump ());
8788- JITDUMP (" -> V%02u\n " , remappedLclNum);
8784+ // Insert explicit normalization for small types (the LCL_FLD we
8785+ // are replacing comes with this normalization). This is only required
8786+ // if we didn't get the normalization via a right shift.
8787+ if (varTypeIsSmall (fld) && (regSegment->Offset + genTypeSize (fld) != genTypeSize (registerType)))
8788+ {
8789+ value = comp->gtNewCastNode (TYP_INT, value, false , fld->TypeGet ());
8790+ }
87898791
8790- // Insert explicit normalization for small types (the LCL_FLD we
8791- // are replacing comes with this normalization).
8792- if (varTypeIsSmall (fld))
8793- {
8794- GenTree* lcl = comp->gtNewLclvNode (remappedLclNum, genActualType (fld));
8795- GenTree* normalizeLcl = comp->gtNewCastNode (TYP_INT, lcl, false , fld->TypeGet ());
8796- GenTree* storeNormalizedLcl = comp->gtNewStoreLclVarNode (remappedLclNum, normalizeLcl);
8797- LIR::AsRange (comp->fgFirstBB ).InsertAtBeginning (LIR::SeqTree (comp, storeNormalizedLcl));
8792+ // If the node is still too large then get it to the right size
8793+ if (genTypeSize (value) != genTypeSize (genActualType ((fld))))
8794+ {
8795+ assert (genTypeSize (value) == 8 );
8796+ assert (genTypeSize (genActualType (fld)) == 4 );
87988797
8799- JITDUMP (" Parameter normalization:\n " );
8800- DISPTREERANGE (LIR::AsRange (comp->fgFirstBB ), storeNormalizedLcl);
8801- }
8798+ if (value->OperIsScalarLocal ())
8799+ {
8800+ // We can use lower bits directly
8801+ value->gtType = TYP_INT;
8802+ }
8803+ else
8804+ {
8805+ value = comp->gtNewCastNode (TYP_INT, value, false , TYP_INT);
8806+ }
8807+ }
88028808
8803- // If we still have a valid use, then replace the LCL_FLD with a
8804- // LCL_VAR of the remapped parameter register local.
8805- if (use.IsInitialized ())
8806- {
8807- GenTree* lcl = comp->gtNewLclvNode (remappedLclNum, genActualType (fld));
8808- LIR::AsRange (comp->fgFirstBB ).InsertAfter (fld, lcl);
8809- use.ReplaceWith (lcl);
8810- LowerNode (lcl);
8811- JITDUMP (" New user tree range:\n " );
8812- DISPTREERANGE (LIR::AsRange (comp->fgFirstBB ), use.User ());
8809+ // Finally insert a bitcast if necessary
8810+ if (value->TypeGet () != genActualType (fld))
8811+ {
8812+ value = comp->gtNewBitCastNode (genActualType (fld), value);
8813+ }
88138814 }
88148815
8816+ // Now replace the LCL_FLD.
8817+ LIR::AsRange (comp->fgFirstBB ).InsertAfter (fld, LIR::SeqTree (comp, value));
8818+ use.ReplaceWith (value);
8819+ JITDUMP (" New user tree range:\n " );
8820+ DISPTREERANGE (LIR::AsRange (comp->fgFirstBB ), use.User ());
8821+
88158822 fld->gtBashToNOP ();
88168823 }
88178824}
0 commit comments