Skip to content

Commit

Permalink
JIT ARM64-SVE: Add Sve.LoadVector*FirstFaulting APIs (#104964)
Browse files Browse the repository at this point in the history
  • Loading branch information
mikabl-arm authored Aug 8, 2024
1 parent da757a1 commit ca8e63e
Show file tree
Hide file tree
Showing 11 changed files with 458 additions and 33 deletions.
6 changes: 6 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26729,29 +26729,35 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const
case NI_Sve_LoadVector:
case NI_Sve_LoadVectorNonTemporal:
case NI_Sve_LoadVector128AndReplicateToVector:
case NI_Sve_LoadVectorByteZeroExtendFirstFaulting:
case NI_Sve_LoadVectorByteZeroExtendToInt16:
case NI_Sve_LoadVectorByteZeroExtendToInt32:
case NI_Sve_LoadVectorByteZeroExtendToInt64:
case NI_Sve_LoadVectorByteZeroExtendToUInt16:
case NI_Sve_LoadVectorByteZeroExtendToUInt32:
case NI_Sve_LoadVectorByteZeroExtendToUInt64:
case NI_Sve_LoadVectorFirstFaulting:
case NI_Sve_LoadVectorInt16SignExtendFirstFaulting:
case NI_Sve_LoadVectorInt16SignExtendToInt32:
case NI_Sve_LoadVectorInt16SignExtendToInt64:
case NI_Sve_LoadVectorInt16SignExtendToUInt32:
case NI_Sve_LoadVectorInt16SignExtendToUInt64:
case NI_Sve_LoadVectorInt32SignExtendFirstFaulting:
case NI_Sve_LoadVectorInt32SignExtendToInt64:
case NI_Sve_LoadVectorInt32SignExtendToUInt64:
case NI_Sve_LoadVectorSByteSignExtendFirstFaulting:
case NI_Sve_LoadVectorSByteSignExtendToInt16:
case NI_Sve_LoadVectorSByteSignExtendToInt32:
case NI_Sve_LoadVectorSByteSignExtendToInt64:
case NI_Sve_LoadVectorSByteSignExtendToUInt16:
case NI_Sve_LoadVectorSByteSignExtendToUInt32:
case NI_Sve_LoadVectorSByteSignExtendToUInt64:
case NI_Sve_LoadVectorUInt16ZeroExtendFirstFaulting:
case NI_Sve_LoadVectorUInt16ZeroExtendToInt32:
case NI_Sve_LoadVectorUInt16ZeroExtendToInt64:
case NI_Sve_LoadVectorUInt16ZeroExtendToUInt32:
case NI_Sve_LoadVectorUInt16ZeroExtendToUInt64:
case NI_Sve_LoadVectorUInt32ZeroExtendFirstFaulting:
case NI_Sve_LoadVectorUInt32ZeroExtendToInt64:
case NI_Sve_LoadVectorUInt32ZeroExtendToUInt64:
case NI_Sve_Load2xVectorAndUnzip:
Expand Down
18 changes: 18 additions & 0 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2392,6 +2392,10 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
}

case NI_Sve_LoadVectorFirstFaulting:
case NI_Sve_LoadVectorInt16SignExtendFirstFaulting:
case NI_Sve_LoadVectorInt32SignExtendFirstFaulting:
case NI_Sve_LoadVectorUInt16ZeroExtendFirstFaulting:
case NI_Sve_LoadVectorUInt32ZeroExtendFirstFaulting:
{
if (intrin.numOperands == 3)
{
Expand All @@ -2405,6 +2409,20 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
break;
}

case NI_Sve_LoadVectorByteZeroExtendFirstFaulting:
case NI_Sve_LoadVectorSByteSignExtendFirstFaulting:
{
if (intrin.numOperands == 3)
{
// We have extra argument which means there is a "use" of FFR here. Restore it back in FFR register.
assert(op3Reg != REG_NA);
GetEmitter()->emitIns_R(INS_sve_wrffr, emitSize, op3Reg, opt);
}

GetEmitter()->emitIns_R_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, REG_ZR, opt);
break;
}

case NI_Sve_SetFfr:
{
assert(targetReg == REG_NA);
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h

Large diffs are not rendered by default.

12 changes: 12 additions & 0 deletions src/coreclr/jit/lowerarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1825,7 +1825,13 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
StoreFFRValue(node);
break;
}
case NI_Sve_LoadVectorByteZeroExtendFirstFaulting:
case NI_Sve_LoadVectorFirstFaulting:
case NI_Sve_LoadVectorInt16SignExtendFirstFaulting:
case NI_Sve_LoadVectorInt32SignExtendFirstFaulting:
case NI_Sve_LoadVectorSByteSignExtendFirstFaulting:
case NI_Sve_LoadVectorUInt16ZeroExtendFirstFaulting:
case NI_Sve_LoadVectorUInt32ZeroExtendFirstFaulting:
{
LIR::Use use;
bool foundUse = BlockRange().TryGetUse(node, &use);
Expand Down Expand Up @@ -4140,7 +4146,13 @@ void Lowering::StoreFFRValue(GenTreeHWIntrinsic* node)
switch (node->GetHWIntrinsicId())
{
case NI_Sve_GatherVectorFirstFaulting:
case NI_Sve_LoadVectorByteZeroExtendFirstFaulting:
case NI_Sve_LoadVectorFirstFaulting:
case NI_Sve_LoadVectorInt16SignExtendFirstFaulting:
case NI_Sve_LoadVectorInt32SignExtendFirstFaulting:
case NI_Sve_LoadVectorSByteSignExtendFirstFaulting:
case NI_Sve_LoadVectorUInt16ZeroExtendFirstFaulting:
case NI_Sve_LoadVectorUInt32ZeroExtendFirstFaulting:
case NI_Sve_SetFfr:

break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4844,6 +4844,43 @@ internal Arm64() { }
public static unsafe Vector<ulong> LoadVectorByteNonFaultingZeroExtendToUInt64(byte* address) { throw new PlatformNotSupportedException(); }


/// <summary>
/// svint16_t svldff1ub_s16(svbool_t pg, const uint8_t *base)
/// LDFF1B Zresult.H, Pg/Z, [Xbase, XZR]
/// </summary>
public static unsafe Vector<short> LoadVectorByteZeroExtendFirstFaulting(Vector<short> mask, byte* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint32_t svldff1ub_s32(svbool_t pg, const uint8_t *base)
/// LDFF1B Zresult.S, Pg/Z, [Xbase, XZR]
/// </summary>
public static unsafe Vector<int> LoadVectorByteZeroExtendFirstFaulting(Vector<int> mask, byte* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint64_t svldff1ub_s64(svbool_t pg, const uint8_t *base)
/// LDFF1B Zresult.D, Pg/Z, [Xbase, XZR]
/// </summary>
public static unsafe Vector<long> LoadVectorByteZeroExtendFirstFaulting(Vector<long> mask, byte* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint16_t svldff1ub_u16(svbool_t pg, const uint8_t *base)
/// LDFF1B Zresult.H, Pg/Z, [Xbase, XZR]
/// </summary>
public static unsafe Vector<ushort> LoadVectorByteZeroExtendFirstFaulting(Vector<ushort> mask, byte* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint32_t svldff1ub_u32(svbool_t pg, const uint8_t *base)
/// LDFF1B Zresult.S, Pg/Z, [Xbase, XZR]
/// </summary>
public static unsafe Vector<uint> LoadVectorByteZeroExtendFirstFaulting(Vector<uint> mask, byte* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint64_t svldff1ub_u64(svbool_t pg, const uint8_t *base)
/// LDFF1B Zresult.D, Pg/Z, [Xbase, XZR]
/// </summary>
public static unsafe Vector<ulong> LoadVectorByteZeroExtendFirstFaulting(Vector<ulong> mask, byte* address) { throw new PlatformNotSupportedException(); }


// Load 8-bit data and zero-extend

/// <summary>
Expand Down Expand Up @@ -4995,6 +5032,33 @@ internal Arm64() { }
public static unsafe Vector<ulong> LoadVectorInt16NonFaultingSignExtendToUInt64(short* address) { throw new PlatformNotSupportedException(); }


/// Load 16-bit data and sign-extend, first-faulting

/// <summary>
/// svint32_t svldff1sh_s32(svbool_t pg, const int16_t *base)
/// LDFF1SH Zresult.S, Pg/Z, [Xbase, XZR, LSL #1]
/// </summary>
public static unsafe Vector<int> LoadVectorInt16SignExtendFirstFaulting(Vector<int> mask, short* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint64_t svldff1sh_s64(svbool_t pg, const int16_t *base)
/// LDFF1SH Zresult.D, Pg/Z, [Xbase, XZR, LSL #1]
/// </summary>
public static unsafe Vector<long> LoadVectorInt16SignExtendFirstFaulting(Vector<long> mask, short* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint32_t svldff1sh_u32(svbool_t pg, const int16_t *base)
/// LDFF1SH Zresult.S, Pg/Z, [Xbase, XZR, LSL #1]
/// </summary>
public static unsafe Vector<uint> LoadVectorInt16SignExtendFirstFaulting(Vector<uint> mask, short* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint64_t svldff1sh_u64(svbool_t pg, const int16_t *base)
/// LDFF1SH Zresult.D, Pg/Z, [Xbase, XZR, LSL #1]
/// </summary>
public static unsafe Vector<ulong> LoadVectorInt16SignExtendFirstFaulting(Vector<ulong> mask, short* address) { throw new PlatformNotSupportedException(); }


// Load 16-bit data and sign-extend

/// <summary>
Expand Down Expand Up @@ -5049,6 +5113,21 @@ internal Arm64() { }
public static unsafe Vector<ulong> LoadVectorInt32NonFaultingSignExtendToUInt64(int* address) { throw new PlatformNotSupportedException(); }


/// Load 32-bit data and sign-extend, first-faulting

/// <summary>
/// svint64_t svldff1sw_s64(svbool_t pg, const int32_t *base)
/// LDFF1SW Zresult.D, Pg/Z, [Xbase, XZR, LSL #2]
/// </summary>
public static unsafe Vector<long> LoadVectorInt32SignExtendFirstFaulting(Vector<long> mask, int* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint64_t svldff1sw_u64(svbool_t pg, const int32_t *base)
/// LDFF1SW Zresult.D, Pg/Z, [Xbase, XZR, LSL #2]
/// </summary>
public static unsafe Vector<ulong> LoadVectorInt32SignExtendFirstFaulting(Vector<ulong> mask, int* address) { throw new PlatformNotSupportedException(); }


// Load 32-bit data and sign-extend

/// <summary>
Expand Down Expand Up @@ -5247,6 +5326,45 @@ internal Arm64() { }
public static unsafe Vector<ulong> LoadVectorSByteNonFaultingSignExtendToUInt64(sbyte* address) { throw new PlatformNotSupportedException(); }


/// Load 8-bit data and sign-extend, first-faulting

/// <summary>
/// svint16_t svldff1sb_s16(svbool_t pg, const int8_t *base)
/// LDFF1SB Zresult.H, Pg/Z, [Xbase, XZR]
/// </summary>
public static unsafe Vector<short> LoadVectorSByteSignExtendFirstFaulting(Vector<short> mask, sbyte* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint32_t svldff1sb_s32(svbool_t pg, const int8_t *base)
/// LDFF1SB Zresult.S, Pg/Z, [Xbase, XZR]
/// </summary>
public static unsafe Vector<int> LoadVectorSByteSignExtendFirstFaulting(Vector<int> mask, sbyte* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint64_t svldff1sb_s64(svbool_t pg, const int8_t *base)
/// LDFF1SB Zresult.D, Pg/Z, [Xbase, XZR]
/// </summary>
public static unsafe Vector<long> LoadVectorSByteSignExtendFirstFaulting(Vector<long> mask, sbyte* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint16_t svldff1sb_u16(svbool_t pg, const int8_t *base)
/// LDFF1SB Zresult.H, Pg/Z, [Xbase, XZR]
/// </summary>
public static unsafe Vector<ushort> LoadVectorSByteSignExtendFirstFaulting(Vector<ushort> mask, sbyte* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint32_t svldff1sb_u32(svbool_t pg, const int8_t *base)
/// LDFF1SB Zresult.S, Pg/Z, [Xbase, XZR]
/// </summary>
public static unsafe Vector<uint> LoadVectorSByteSignExtendFirstFaulting(Vector<uint> mask, sbyte* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint64_t svldff1sb_u64(svbool_t pg, const int8_t *base)
/// LDFF1SB Zresult.D, Pg/Z, [Xbase, XZR]
/// </summary>
public static unsafe Vector<ulong> LoadVectorSByteSignExtendFirstFaulting(Vector<ulong> mask, sbyte* address) { throw new PlatformNotSupportedException(); }


// Load 8-bit data and sign-extend

/// <summary>
Expand Down Expand Up @@ -5337,6 +5455,33 @@ internal Arm64() { }
public static unsafe Vector<ulong> LoadVectorUInt16NonFaultingZeroExtendToUInt64(ushort* address) { throw new PlatformNotSupportedException(); }


/// Load 16-bit data and zero-extend, first-faulting

/// <summary>
/// svint32_t svldff1uh_s32(svbool_t pg, const uint16_t *base)
/// LDFF1H Zresult.S, Pg/Z, [Xbase, XZR, LSL #1]
/// </summary>
public static unsafe Vector<int> LoadVectorUInt16ZeroExtendFirstFaulting(Vector<int> mask, ushort* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint64_t svldff1uh_s64(svbool_t pg, const uint16_t *base)
/// LDFF1H Zresult.D, Pg/Z, [Xbase, XZR, LSL #1]
/// </summary>
public static unsafe Vector<long> LoadVectorUInt16ZeroExtendFirstFaulting(Vector<long> mask, ushort* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint32_t svldff1uh_u32(svbool_t pg, const uint16_t *base)
/// LDFF1H Zresult.S, Pg/Z, [Xbase, XZR, LSL #1]
/// </summary>
public static unsafe Vector<uint> LoadVectorUInt16ZeroExtendFirstFaulting(Vector<uint> mask, ushort* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint64_t svldff1uh_u64(svbool_t pg, const uint16_t *base)
/// LDFF1H Zresult.D, Pg/Z, [Xbase, XZR, LSL #1]
/// </summary>
public static unsafe Vector<ulong> LoadVectorUInt16ZeroExtendFirstFaulting(Vector<ulong> mask, ushort* address) { throw new PlatformNotSupportedException(); }


// Load 16-bit data and zero-extend

/// <summary>
Expand Down Expand Up @@ -5391,6 +5536,21 @@ internal Arm64() { }
public static unsafe Vector<ulong> LoadVectorUInt32NonFaultingZeroExtendToUInt64(uint* address) { throw new PlatformNotSupportedException(); }


/// Load 32-bit data and zero-extend, first-faulting

/// <summary>
/// svint64_t svldff1uw_s64(svbool_t pg, const uint32_t *base)
/// LDFF1W Zresult.D, Pg/Z, [Xbase, XZR, LSL #2]
/// </summary>
public static unsafe Vector<long> LoadVectorUInt32ZeroExtendFirstFaulting(Vector<long> mask, uint* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint64_t svldff1uw_u64(svbool_t pg, const uint32_t *base)
/// LDFF1W Zresult.D, Pg/Z, [Xbase, XZR, LSL #2]
/// </summary>
public static unsafe Vector<ulong> LoadVectorUInt32ZeroExtendFirstFaulting(Vector<ulong> mask, uint* address) { throw new PlatformNotSupportedException(); }


// Load 32-bit data and zero-extend

/// <summary>
Expand Down
Loading

0 comments on commit ca8e63e

Please sign in to comment.