From 6b831103f865ac865c9f3d7ccd6f7e23e003c846 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Thu, 9 May 2024 10:06:14 -0700 Subject: [PATCH] Enable AVX512 embedded masking for most other intrinsics (#101886) * Remove HW_Flag_MultiIns in favor of using HW_Flag_SpecialCodeGen * Add a new flag HW_Flag_InvalidNodeId * Change HW_Flag_EmbMaskingIncompatible to be HW_Flag_EmbMaskingCompatible * Mark various compare intrinsics with HW_Flag_NoEvexSemantics * Marking various intrinsics as EmbBroadcastCompatible, EmbMaskingCompatible, or Commutative * Applying formatting patch * Ensure WithLower/WithUpper are not marked as InvalidNodeId * Ensure that instOptions are being passed down all relevant hwintrinsic code paths * Ensure the insOpts are plumbed through for EVEX instructions * Ensure EVEX instructions are properly annotated with EmbeddedBroadcastSupported * Ensure that embedded broadcast/masking is displayed in the disassembly * Applying formatting patch * Updating the hwintrinsic tests to cover embedded broadcast/masking * Fix some handling in the JIT related to embedded broadcast/masking * Fixup some tests where validating embedded masking is non-trivial * Cleanup some cases found by SPMI * Ensure that CompareLessThan has its operands swapped back if its being converted to the AVX512 form * Don't regress a scenario around op_Equality and TYP_MASK * Adjusting hardware intrinsic tests to test non-zero masks * Avoid some messiness around operand swapping * Ensure embedded masks mark TYP_SIMD16 and TYP_SIMD32 instructions as needing EVEX * Mark Sse2_r/Sse2_ro as AotIncompatible due to runtime/102037 --- src/coreclr/jit/codegen.h | 47 +- src/coreclr/jit/codegencommon.cpp | 4 +- src/coreclr/jit/codegenxarch.cpp | 68 +- src/coreclr/jit/compiler.h | 10 + src/coreclr/jit/emit.h | 48 +- src/coreclr/jit/emitxarch.cpp | 832 ++++++---- src/coreclr/jit/emitxarch.h | 254 ++- src/coreclr/jit/gentree.cpp | 56 +- src/coreclr/jit/hwintrinsic.h | 29 +- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 1 - src/coreclr/jit/hwintrinsiccodegenxarch.cpp | 292 ++-- src/coreclr/jit/hwintrinsiclistarm64.h | 378 ++--- src/coreclr/jit/hwintrinsiclistxarch.h | 1378 +++++++++-------- src/coreclr/jit/hwintrinsicxarch.cpp | 147 +- src/coreclr/jit/instr.cpp | 75 +- src/coreclr/jit/instr.h | 4 +- src/coreclr/jit/instrsxarch.h | 438 +++--- src/coreclr/jit/jitconfigvalues.h | 9 +- src/coreclr/jit/lowerxarch.cpp | 301 +++- src/coreclr/jit/lsrabuild.cpp | 2 +- src/coreclr/jit/lsraxarch.cpp | 15 +- src/coreclr/jit/simdcodegenxarch.cpp | 9 +- .../GenerateHWIntrinsicTests_X86.cs | 302 ++-- .../Shared/VectorBinaryOpTest.template | 102 +- .../Shared/VectorBinaryOperatorTest.template | 89 +- .../Shared/VectorTernaryOpTest.template | 105 +- .../General/Shared/VectorUnaryOpTest.template | 97 +- .../Shared/VectorUnaryOperatorTest.template | 89 +- .../X86/Shared/BroadcastTest.template | 283 ++++ .../X86/Shared/ComplexBinOpTest.template | 302 ++++ .../X86/Shared/ComplexTernOpTest.template | 321 ++++ .../X86/Shared/ComplexUnOpTest.template | 283 ++++ .../X86/Shared/_BinaryOpTestTemplate.template | 97 +- .../Shared/_TernaryOpTestTemplate.template | 101 +- .../X86/Shared/_UnaryOpTestTemplate.template | 94 +- .../HardwareIntrinsics/X86/Sse2/Sse2_r.csproj | 2 + .../X86/Sse2/Sse2_ro.csproj | 2 + 37 files changed, 4723 insertions(+), 1943 deletions(-) create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/Shared/BroadcastTest.template create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/Shared/ComplexBinOpTest.template create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/Shared/ComplexTernOpTest.template create mode 100644 src/tests/JIT/HardwareIntrinsics/X86/Shared/ComplexUnOpTest.template diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 2a2ede6bae9ac0..af324ce6e8ba3c 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -959,38 +959,35 @@ class CodeGen final : public CodeGenInterface #ifdef FEATURE_HW_INTRINSICS void genHWIntrinsic(GenTreeHWIntrinsic* node); #if defined(TARGET_XARCH) - void genHWIntrinsic_R_RM(GenTreeHWIntrinsic* node, - instruction ins, - emitAttr attr, - regNumber reg, - GenTree* rmOp, - insOpts instOptions = INS_OPTS_NONE); - void genHWIntrinsic_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, int8_t ival); + void genHWIntrinsic_R_RM( + GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, regNumber reg, GenTree* rmOp, insOpts instOptions); + void genHWIntrinsic_R_RM_I( + GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, int8_t ival, insOpts instOptions); void genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, insOpts instOptions); - void genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, int8_t ival); - void genHWIntrinsic_R_R_RM_R(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr); + void genHWIntrinsic_R_R_RM_I( + GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, int8_t ival, insOpts instOptions); + void genHWIntrinsic_R_R_RM_R(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, insOpts instOptions); void genHWIntrinsic_R_R_R_RM(instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, GenTree* op3, - insOpts instOptions = INS_OPTS_NONE); - void genHWIntrinsic_R_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, int8_t ival); + insOpts instOptions); + void genHWIntrinsic_R_R_R_RM_I( + GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, int8_t ival, insOpts instOptions); - void genBaseIntrinsic(GenTreeHWIntrinsic* node); - void genX86BaseIntrinsic(GenTreeHWIntrinsic* node); + void genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions); + void genX86BaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions); void genSSEIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions); void genSSE2Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions); - void genSSE41Intrinsic(GenTreeHWIntrinsic* node); - void genSSE42Intrinsic(GenTreeHWIntrinsic* node); + void genSSE41Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions); + void genSSE42Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions); void genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions); - void genAESIntrinsic(GenTreeHWIntrinsic* node); void genBMI1OrBMI2Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions); void genFMAIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions); - void genPermuteVar2x(GenTreeHWIntrinsic* node); + void genPermuteVar2x(GenTreeHWIntrinsic* node, insOpts instOptions); void genLZCNTIntrinsic(GenTreeHWIntrinsic* node); - void genPCLMULQDQIntrinsic(GenTreeHWIntrinsic* node); void genPOPCNTIntrinsic(GenTreeHWIntrinsic* node); void genXCNTIntrinsic(GenTreeHWIntrinsic* node, instruction ins); void genX86SerializeIntrinsic(GenTreeHWIntrinsic* node); @@ -1003,6 +1000,8 @@ class CodeGen final : public CodeGenInterface HWIntrinsicSwitchCaseBody emitSwCase); void genNonTableDrivenHWIntrinsicsJumpTableFallback(GenTreeHWIntrinsic* node, GenTree* lastOp); + + static insOpts AddEmbBroadcastMode(insOpts instOptions); #endif // defined(TARGET_XARCH) #ifdef TARGET_ARM64 @@ -1576,7 +1575,7 @@ class CodeGen final : public CodeGenInterface void inst_TT(instruction ins, emitAttr size, GenTree* op1); void inst_RV_TT(instruction ins, emitAttr size, regNumber op1Reg, GenTree* op2); void inst_RV_RV_IV(instruction ins, emitAttr size, regNumber reg1, regNumber reg2, unsigned ival); - void inst_RV_TT_IV(instruction ins, emitAttr attr, regNumber reg1, GenTree* rmOp, int ival); + void inst_RV_TT_IV(instruction ins, emitAttr attr, regNumber reg1, GenTree* rmOp, int ival, insOpts instOptions); void inst_RV_RV_TT(instruction ins, emitAttr size, regNumber targetReg, @@ -1584,8 +1583,14 @@ class CodeGen final : public CodeGenInterface GenTree* op2, bool isRMW, insOpts instOptions); - void inst_RV_RV_TT_IV( - instruction ins, emitAttr size, regNumber targetReg, regNumber op1Reg, GenTree* op2, int8_t ival, bool isRMW); + void inst_RV_RV_TT_IV(instruction ins, + emitAttr size, + regNumber targetReg, + regNumber op1Reg, + GenTree* op2, + int8_t ival, + bool isRMW, + insOpts instOptions); #endif void inst_set_SV_var(GenTree* tree); diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 27fd021ba3d2c2..1271e594777a68 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -3856,7 +3856,7 @@ void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP& } #elif defined(TARGET_XARCH) // XORPS is the fastest and smallest way to initialize a XMM register to zero. - GetEmitter()->emitIns_SIMD_R_R_R(INS_xorps, EA_16BYTE, reg, reg, reg); + GetEmitter()->emitIns_SIMD_R_R_R(INS_xorps, EA_16BYTE, reg, reg, reg, INS_OPTS_NONE); dblInitReg = reg; #elif defined(TARGET_ARM64) // We will just zero out the entire vector register. This sets it to a double/float zero value @@ -3896,7 +3896,7 @@ void CodeGen::genZeroInitFltRegs(const regMaskTP& initFltRegs, const regMaskTP& } #elif defined(TARGET_XARCH) // XORPS is the fastest and smallest way to initialize a XMM register to zero. - GetEmitter()->emitIns_SIMD_R_R_R(INS_xorps, EA_16BYTE, reg, reg, reg); + GetEmitter()->emitIns_SIMD_R_R_R(INS_xorps, EA_16BYTE, reg, reg, reg, INS_OPTS_NONE); fltInitReg = reg; #elif defined(TARGET_ARM64) // We will just zero out the entire vector register. This sets it to a double/float zero value diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index aa8d9426b84db0..7bbb8b5415b01a 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -467,16 +467,16 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t { assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F)); emit->emitIns_SIMD_R_R_R_I(INS_vpternlogd, attr, targetReg, targetReg, targetReg, - static_cast(0xFF)); + static_cast(0xFF), INS_OPTS_NONE); } else { - emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, EA_16BYTE, targetReg, targetReg, targetReg); + emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, EA_16BYTE, targetReg, targetReg, targetReg, INS_OPTS_NONE); } } else if (val8.IsZero()) { - emit->emitIns_SIMD_R_R_R(INS_xorps, EA_16BYTE, targetReg, targetReg, targetReg); + emit->emitIns_SIMD_R_R_R(INS_xorps, EA_16BYTE, targetReg, targetReg, targetReg, INS_OPTS_NONE); } else { @@ -494,16 +494,16 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t { assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F)); emit->emitIns_SIMD_R_R_R_I(INS_vpternlogd, attr, targetReg, targetReg, targetReg, - static_cast(0xFF)); + static_cast(0xFF), INS_OPTS_NONE); } else { - emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, EA_16BYTE, targetReg, targetReg, targetReg); + emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, EA_16BYTE, targetReg, targetReg, targetReg, INS_OPTS_NONE); } } else if (val12.IsZero()) { - emit->emitIns_SIMD_R_R_R(INS_xorps, EA_16BYTE, targetReg, targetReg, targetReg); + emit->emitIns_SIMD_R_R_R(INS_xorps, EA_16BYTE, targetReg, targetReg, targetReg, INS_OPTS_NONE); } else { @@ -523,16 +523,16 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t { assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F)); emit->emitIns_SIMD_R_R_R_I(INS_vpternlogd, attr, targetReg, targetReg, targetReg, - static_cast(0xFF)); + static_cast(0xFF), INS_OPTS_NONE); } else { - emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, attr, targetReg, targetReg, targetReg); + emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, attr, targetReg, targetReg, targetReg, INS_OPTS_NONE); } } else if (val16.IsZero()) { - emit->emitIns_SIMD_R_R_R(INS_xorps, attr, targetReg, targetReg, targetReg); + emit->emitIns_SIMD_R_R_R(INS_xorps, attr, targetReg, targetReg, targetReg, INS_OPTS_NONE); } else { @@ -550,16 +550,16 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t { assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F)); emit->emitIns_SIMD_R_R_R_I(INS_vpternlogd, attr, targetReg, targetReg, targetReg, - static_cast(0xFF)); + static_cast(0xFF), INS_OPTS_NONE); } else { - emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, attr, targetReg, targetReg, targetReg); + emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, attr, targetReg, targetReg, targetReg, INS_OPTS_NONE); } } else if (val32.IsZero()) { - emit->emitIns_SIMD_R_R_R(INS_xorps, attr, targetReg, targetReg, targetReg); + emit->emitIns_SIMD_R_R_R(INS_xorps, attr, targetReg, targetReg, targetReg, INS_OPTS_NONE); } else { @@ -574,7 +574,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t if (val64.IsAllBitsSet() && compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F)) { emit->emitIns_SIMD_R_R_R_I(INS_vpternlogd, attr, targetReg, targetReg, targetReg, - static_cast(0xFF)); + static_cast(0xFF), INS_OPTS_NONE); } else if (val64.IsZero()) { @@ -584,7 +584,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t // xorps zmm0, zmm0, zmm0 (6 bytes) // xorps ymm0, ymm0, ymm0 (4 bytes) // - emit->emitIns_SIMD_R_R_R(INS_xorps, EA_32BYTE, targetReg, targetReg, targetReg); + emit->emitIns_SIMD_R_R_R(INS_xorps, EA_32BYTE, targetReg, targetReg, targetReg, INS_OPTS_NONE); } else { @@ -661,7 +661,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre if (tree->IsFloatPositiveZero()) { // A faster/smaller way to generate Zero - emit->emitIns_SIMD_R_R_R(INS_xorps, EA_16BYTE, targetReg, targetReg, targetReg); + emit->emitIns_SIMD_R_R_R(INS_xorps, EA_16BYTE, targetReg, targetReg, targetReg, INS_OPTS_NONE); } else if (tree->IsFloatAllBitsSet()) { @@ -669,12 +669,12 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre { assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F)); emit->emitIns_SIMD_R_R_R_I(INS_vpternlogd, EA_16BYTE, targetReg, targetReg, targetReg, - static_cast(0xFF)); + static_cast(0xFF), INS_OPTS_NONE); } else { // A faster/smaller way to generate AllBitsSet - emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, EA_16BYTE, targetReg, targetReg, targetReg); + emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, EA_16BYTE, targetReg, targetReg, targetReg, INS_OPTS_NONE); } } else @@ -1366,7 +1366,7 @@ void CodeGen::genSIMDSplitReturn(GenTree* src, ReturnTypeDesc* retTypeDesc) inst_Mov(TYP_SIMD16, reg0, opReg, /* canSkip */ true); // Move upper 64-bits of opReg into reg1 - GetEmitter()->emitIns_SIMD_R_R_R(INS_movhlps, EA_16BYTE, reg1, reg1, opReg); + GetEmitter()->emitIns_SIMD_R_R_R(INS_movhlps, EA_16BYTE, reg1, reg1, opReg, INS_OPTS_NONE); #else // TARGET_X86 assert(src->TypeIs(TYP_SIMD8)); assert(srcIsFloatReg != dstIsFloatReg); @@ -1378,14 +1378,14 @@ void CodeGen::genSIMDSplitReturn(GenTree* src, ReturnTypeDesc* retTypeDesc) // reg1 = opRef[61:32] if (compiler->compOpportunisticallyDependsOn(InstructionSet_SSE41)) { - inst_RV_TT_IV(INS_pextrd, EA_4BYTE, reg1, src, 1); + inst_RV_TT_IV(INS_pextrd, EA_4BYTE, reg1, src, 1, INS_OPTS_NONE); } else { bool isRMW = !compiler->canUseVexEncoding(); int8_t shuffleMask = 1; // we only need [61:32]->[31:0], the rest is not read. - inst_RV_RV_TT_IV(INS_pshufd, EA_8BYTE, opReg, opReg, src, shuffleMask, isRMW); + inst_RV_RV_TT_IV(INS_pshufd, EA_8BYTE, opReg, opReg, src, shuffleMask, isRMW, INS_OPTS_NONE); inst_Mov(TYP_INT, reg1, opReg, /* canSkip */ false); } #endif // TARGET_X86 @@ -2294,7 +2294,7 @@ void CodeGen::genMultiRegStoreToSIMDLocal(GenTreeLclVar* lclNode) if (targetReg != reg1) { - GetEmitter()->emitIns_SIMD_R_R_R(INS_movlhps, EA_16BYTE, targetReg, reg0, reg1); + GetEmitter()->emitIns_SIMD_R_R_R(INS_movlhps, EA_16BYTE, targetReg, reg0, reg1, INS_OPTS_NONE); } else { @@ -2311,8 +2311,8 @@ void CodeGen::genMultiRegStoreToSIMDLocal(GenTreeLclVar* lclNode) // and next swap low and high 8-bytes of targetReg to have them // rearranged in the right order. - GetEmitter()->emitIns_SIMD_R_R_R(INS_movlhps, EA_16BYTE, targetReg, reg1, reg0); - GetEmitter()->emitIns_SIMD_R_R_R_I(INS_shufpd, EA_16BYTE, targetReg, targetReg, reg1, 0x01); + GetEmitter()->emitIns_SIMD_R_R_R(INS_movlhps, EA_16BYTE, targetReg, reg1, reg0, INS_OPTS_NONE); + GetEmitter()->emitIns_SIMD_R_R_R_I(INS_shufpd, EA_16BYTE, targetReg, targetReg, reg1, 0x01, INS_OPTS_NONE); } genProduceReg(lclNode); #elif defined(TARGET_X86) @@ -2330,14 +2330,14 @@ void CodeGen::genMultiRegStoreToSIMDLocal(GenTreeLclVar* lclNode) const emitAttr size = emitTypeSize(TYP_SIMD8); if (compiler->compOpportunisticallyDependsOn(InstructionSet_SSE41)) { - GetEmitter()->emitIns_SIMD_R_R_R_I(INS_pinsrd, size, targetReg, targetReg, reg1, 1); + GetEmitter()->emitIns_SIMD_R_R_R_I(INS_pinsrd, size, targetReg, targetReg, reg1, 1, INS_OPTS_NONE); } else { regNumber tempXmm = internalRegisters.GetSingle(lclNode); assert(tempXmm != targetReg); inst_Mov(TYP_FLOAT, tempXmm, reg1, /* canSkip */ false); - GetEmitter()->emitIns_SIMD_R_R_R(INS_punpckldq, size, targetReg, targetReg, tempXmm); + GetEmitter()->emitIns_SIMD_R_R_R(INS_punpckldq, size, targetReg, targetReg, tempXmm, INS_OPTS_NONE); } genProduceReg(lclNode); } @@ -7523,7 +7523,7 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode) genConsumeOperands(treeNode->AsOp()); GetEmitter()->emitIns_SIMD_R_R_R(INS_xorps, EA_16BYTE, treeNode->GetRegNum(), treeNode->GetRegNum(), - treeNode->GetRegNum()); + treeNode->GetRegNum(), INS_OPTS_NONE); // Note that here we need to specify srcType that will determine // the size of source reg/mem operand and rex.w prefix. @@ -7568,7 +7568,7 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode) *cns = GetEmitter()->emitFltOrDblConst(d, EA_8BYTE); } - GetEmitter()->emitIns_SIMD_R_R_C(INS_addsd, EA_8BYTE, targetReg, targetReg, *cns, 0); + GetEmitter()->emitIns_SIMD_R_R_C(INS_addsd, EA_8BYTE, targetReg, targetReg, *cns, 0, INS_OPTS_NONE); genDefineTempLabel(label); } @@ -7734,7 +7734,8 @@ void CodeGen::genCkfinite(GenTree* treeNode) if (targetType == TYP_DOUBLE) { inst_Mov(targetType, targetReg, op1->GetRegNum(), /* canSkip */ true); - GetEmitter()->emitIns_SIMD_R_R_R_I(INS_shufps, EA_16BYTE, targetReg, targetReg, targetReg, (int8_t)0xB1); + GetEmitter()->emitIns_SIMD_R_R_R_I(INS_shufps, EA_16BYTE, targetReg, targetReg, targetReg, (int8_t)0xB1, + INS_OPTS_NONE); copyToTmpSrcReg = targetReg; } else @@ -7756,7 +7757,8 @@ void CodeGen::genCkfinite(GenTree* treeNode) if ((targetType == TYP_DOUBLE) && (targetReg == op1->GetRegNum())) { // We need to re-shuffle the targetReg to get the correct result. - GetEmitter()->emitIns_SIMD_R_R_R_I(INS_shufps, EA_16BYTE, targetReg, targetReg, targetReg, (int8_t)0xB1); + GetEmitter()->emitIns_SIMD_R_R_R_I(INS_shufps, EA_16BYTE, targetReg, targetReg, targetReg, (int8_t)0xB1, + INS_OPTS_NONE); } else { @@ -7944,7 +7946,7 @@ void CodeGen::genSSE2BitwiseOp(GenTree* treeNode) #endif } - GetEmitter()->emitIns_SIMD_R_R_C(ins, EA_16BYTE, targetReg, operandReg, *maskFld, 0); + GetEmitter()->emitIns_SIMD_R_R_C(ins, EA_16BYTE, targetReg, operandReg, *maskFld, 0, INS_OPTS_NONE); } //----------------------------------------------------------------------------------------- @@ -8015,7 +8017,7 @@ void CodeGen::genSSE41RoundOp(GenTreeOp* treeNode) } bool isRMW = !compiler->canUseVexEncoding(); - inst_RV_RV_TT_IV(ins, size, dstReg, dstReg, srcNode, ival, isRMW); + inst_RV_RV_TT_IV(ins, size, dstReg, dstReg, srcNode, ival, isRMW, INS_OPTS_NONE); } //--------------------------------------------------------------------- @@ -10993,7 +10995,7 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu // mov qword ptr [ebp/esp-OFFS], rax // // NOTE: it implicitly zeroes YMM4 and ZMM4 as well. - emit->emitIns_SIMD_R_R_R(INS_xorps, EA_16BYTE, zeroSIMDReg, zeroSIMDReg, zeroSIMDReg); + emit->emitIns_SIMD_R_R_R(INS_xorps, EA_16BYTE, zeroSIMDReg, zeroSIMDReg, zeroSIMDReg, INS_OPTS_NONE); int i = 0; if (maxSimdSize > XMM_REGSIZE_BYTES) @@ -11028,7 +11030,7 @@ void CodeGen::genZeroInitFrameUsingBlockInit(int untrLclHi, int untrLclLo, regNu // add rax, 48 ; | // jne SHORT -5 instr ; ---+ - emit->emitIns_SIMD_R_R_R(INS_xorps, EA_16BYTE, zeroSIMDReg, zeroSIMDReg, zeroSIMDReg); + emit->emitIns_SIMD_R_R_R(INS_xorps, EA_16BYTE, zeroSIMDReg, zeroSIMDReg, zeroSIMDReg, INS_OPTS_NONE); // How many extra don't fit into the 3x unroll int extraSimd = (blkSize % (XMM_REGSIZE_BYTES * 3)) / XMM_REGSIZE_BYTES; diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 963acd9a3e82ff..88b63c49ff8a43 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -9477,6 +9477,16 @@ class Compiler #endif } + bool canUseEmbeddedBroadcast() const + { + return JitConfig.EnableEmbeddedBroadcast(); + } + + bool canUseEmbeddedMasking() const + { + return JitConfig.EnableEmbeddedMasking(); + } + #ifdef TARGET_XARCH public: bool canUseVexEncoding() const diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 3e74bca557263d..7294e87502b187 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -1663,21 +1663,30 @@ class emitter { assert(!idIsEvexbContextSet()); - if (instOptions == INS_OPTS_EVEX_eb_er_rd) + switch (instOptions & INS_OPTS_EVEX_b_MASK) { - _idEvexbContext = 1; - } - else if (instOptions == INS_OPTS_EVEX_er_ru) - { - _idEvexbContext = 2; - } - else if (instOptions == INS_OPTS_EVEX_er_rz) - { - _idEvexbContext = 3; - } - else - { - unreached(); + case INS_OPTS_EVEX_eb_er_rd: + { + _idEvexbContext = 1; + break; + } + + case INS_OPTS_EVEX_er_ru: + { + _idEvexbContext = 2; + break; + } + + case INS_OPTS_EVEX_er_rz: + { + _idEvexbContext = 3; + break; + } + + default: + { + unreached(); + } } } @@ -1686,6 +1695,11 @@ class emitter return _idEvexbContext; } + bool idIsEvexAaaContextSet() const + { + return idGetEvexAaaContext() != 0; + } + unsigned idGetEvexAaaContext() const { assert(IsAvx512OrPriorInstruction(_idIns)); @@ -2289,9 +2303,9 @@ class emitter void emitDispInsAddr(const BYTE* code); void emitDispInsOffs(unsigned offs, bool doffs); void emitDispInsHex(instrDesc* id, BYTE* code, size_t sz); - void emitDispEmbBroadcastCount(instrDesc* id); - void emitDispEmbRounding(instrDesc* id); - void emitDispEmbMasking(instrDesc* id); + void emitDispEmbBroadcastCount(instrDesc* id) const; + void emitDispEmbRounding(instrDesc* id) const; + void emitDispEmbMasking(instrDesc* id) const; void emitDispIns(instrDesc* id, bool isNew, bool doffs, diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 637db7ba08b6f6..9cf8ac5ae87a82 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -1226,13 +1226,9 @@ bool emitter::TakesEvexPrefix(const instrDesc* id) const return true; } - if (HasEmbeddedBroadcast(id)) + if (HasEmbeddedBroadcast(id) || HasEmbeddedMask(id)) { // Requires the EVEX encoding due to embedded functionality - // - // TODO-XArch-AVX512: This needs to return true when the id includes: - // * embedded rounding control - // * other EVEX specific functionality return true; } @@ -1369,7 +1365,7 @@ emitter::code_t emitter::AddEvexPrefix(const instrDesc* id, code_t code, emitAtt { case IF_RWR_RRD_ARD_RRD: { - assert(id->idGetEvexAaaContext() == 0); + assert(!id->idIsEvexAaaContextSet()); CnsVal cnsVal; emitGetInsAmdCns(id, &cnsVal); @@ -1380,7 +1376,7 @@ emitter::code_t emitter::AddEvexPrefix(const instrDesc* id, code_t code, emitAtt case IF_RWR_RRD_MRD_RRD: { - assert(id->idGetEvexAaaContext() == 0); + assert(!id->idIsEvexAaaContextSet()); CnsVal cnsVal; emitGetInsDcmCns(id, &cnsVal); @@ -1391,7 +1387,7 @@ emitter::code_t emitter::AddEvexPrefix(const instrDesc* id, code_t code, emitAtt case IF_RWR_RRD_SRD_RRD: { - assert(id->idGetEvexAaaContext() == 0); + assert(!id->idIsEvexAaaContextSet()); CnsVal cnsVal; emitGetInsCns(id, &cnsVal); @@ -1402,7 +1398,7 @@ emitter::code_t emitter::AddEvexPrefix(const instrDesc* id, code_t code, emitAtt case IF_RWR_RRD_RRD_RRD: { - assert(id->idGetEvexAaaContext() == 0); + assert(!id->idIsEvexAaaContextSet()); maskReg = id->idReg4(); break; } @@ -6606,7 +6602,8 @@ void emitter::emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNum * Add an instruction with two register operands and an integer constant. */ -void emitter::emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int ival) +void emitter::emitIns_R_R_I( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int ival, insOpts instOptions) { #ifdef TARGET_AMD64 // mov reg, imm64 is the only opcode which takes a full 8 byte immediate @@ -6636,6 +6633,9 @@ void emitter::emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regN code = insCodeRM(ins); } + assert((instOptions & INS_OPTS_EVEX_b_MASK) == 0); + SetEvexEmbMaskIfNeeded(id, instOptions); + UNATIVE_OFFSET sz = emitInsSizeRR(id, code, ival); id->idCodeSize(sz); @@ -6668,16 +6668,17 @@ void emitter::emitIns_AR(instruction ins, emitAttr attr, regNumber base, int off // and that does not return a value // // Arguments: -// ins -- The instruction being emitted -// attr -- The emit attribute -// targetReg -- The target register -// op2Reg -- The register of the second operand -// op3Reg -- The register of the third operand -// base -- The base register used for the memory address (first operand) -// offs -- The offset from base +// ins -- The instruction being emitted +// attr -- The emit attribute +// targetReg -- The target register +// op2Reg -- The register of the second operand +// op3Reg -- The register of the third operand +// base -- The base register used for the memory address (first operand) +// offs -- The offset from base +// instOptions -- The options that modify how the instruction is generated // void emitter::emitIns_AR_R_R( - instruction ins, emitAttr attr, regNumber op2Reg, regNumber op3Reg, regNumber base, int offs) + instruction ins, emitAttr attr, regNumber op2Reg, regNumber op3Reg, regNumber base, int offs, insOpts instOptions) { assert(IsAvx512OrPriorInstruction(ins)); assert(IsThreeOperandAVXInstruction(ins)); @@ -6692,6 +6693,9 @@ void emitter::emitIns_AR_R_R( id->idAddr()->iiaAddrMode.amBaseReg = base; id->idAddr()->iiaAddrMode.amIndxReg = REG_NA; + assert((instOptions & INS_OPTS_EVEX_b_MASK) == 0); + SetEvexEmbMaskIfNeeded(id, instOptions); + UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins)); id->idCodeSize(sz); @@ -6699,7 +6703,7 @@ void emitter::emitIns_AR_R_R( emitCurIGsize += sz; } -void emitter::emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir) +void emitter::emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir, insOpts instOptions) { ssize_t offs = indir->Offset(); instrDesc* id = emitNewInstrAmd(attr, offs); @@ -6709,6 +6713,9 @@ void emitter::emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTre emitHandleMemOp(indir, id, emitInsModeFormat(ins, IF_RRD_ARD), ins); + SetEvexBroadcastIfNeeded(id, instOptions); + SetEvexEmbMaskIfNeeded(id, instOptions); + UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)); id->idCodeSize(sz); @@ -6716,7 +6723,8 @@ void emitter::emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTre emitCurIGsize += sz; } -void emitter::emitIns_R_A_I(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir, int ival) +void emitter::emitIns_R_A_I( + instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir, int ival, insOpts instOptions) { noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1)); assert(IsAvx512OrPriorInstruction(ins)); @@ -6740,6 +6748,9 @@ void emitter::emitIns_R_A_I(instruction ins, emitAttr attr, regNumber reg1, GenT code = insCodeRM(ins); } + SetEvexBroadcastIfNeeded(id, instOptions); + SetEvexEmbMaskIfNeeded(id, instOptions); + UNATIVE_OFFSET sz = emitInsSizeAM(id, code, ival); id->idCodeSize(sz); @@ -6747,8 +6758,13 @@ void emitter::emitIns_R_A_I(instruction ins, emitAttr attr, regNumber reg1, GenT emitCurIGsize += sz; } -void emitter::emitIns_R_C_I( - instruction ins, emitAttr attr, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival) +void emitter::emitIns_R_C_I(instruction ins, + emitAttr attr, + regNumber reg1, + CORINFO_FIELD_HANDLE fldHnd, + int offs, + int ival, + insOpts instOptions) { // Static always need relocs if (!jitStaticFldIsGlobAddr(fldHnd)) @@ -6777,6 +6793,9 @@ void emitter::emitIns_R_C_I( code = insCodeRM(ins); } + SetEvexBroadcastIfNeeded(id, instOptions); + SetEvexEmbMaskIfNeeded(id, instOptions); + UNATIVE_OFFSET sz = emitInsSizeCV(id, code, ival); id->idCodeSize(sz); @@ -6784,7 +6803,8 @@ void emitter::emitIns_R_C_I( emitCurIGsize += sz; } -void emitter::emitIns_R_S_I(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs, int ival) +void emitter::emitIns_R_S_I( + instruction ins, emitAttr attr, regNumber reg1, int varx, int offs, int ival, insOpts instOptions) { noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1)); assert(IsAvx512OrPriorInstruction(ins)); @@ -6811,6 +6831,9 @@ void emitter::emitIns_R_S_I(instruction ins, emitAttr attr, regNumber reg1, int code = insCodeRM(ins); } + SetEvexBroadcastIfNeeded(id, instOptions); + SetEvexEmbMaskIfNeeded(id, instOptions); + UNATIVE_OFFSET sz = emitInsSizeSV(id, code, varx, offs, ival); id->idCodeSize(sz); @@ -7029,8 +7052,14 @@ void emitter::emitIns_R_R_S( emitCurIGsize += sz; } -void emitter::emitIns_R_R_A_I( - instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, GenTreeIndir* indir, int ival, insFormat fmt) +void emitter::emitIns_R_R_A_I(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + GenTreeIndir* indir, + int ival, + insFormat fmt, + insOpts instOptions) { assert(IsAvx512OrPriorInstruction(ins)); assert(IsThreeOperandAVXInstruction(ins)); @@ -7044,6 +7073,9 @@ void emitter::emitIns_R_R_A_I( emitHandleMemOp(indir, id, fmt, ins); + SetEvexBroadcastIfNeeded(id, instOptions); + SetEvexEmbMaskIfNeeded(id, instOptions); + UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival); id->idCodeSize(sz); @@ -7074,8 +7106,14 @@ void emitter::emitIns_R_R_AR_I( emitCurIGsize += sz; } -void emitter::emitIns_R_R_C_I( - instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival) +void emitter::emitIns_R_R_C_I(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + CORINFO_FIELD_HANDLE fldHnd, + int offs, + int ival, + insOpts instOptions) { assert(IsAvx512OrPriorInstruction(ins)); assert(IsThreeOperandAVXInstruction(ins)); @@ -7094,6 +7132,9 @@ void emitter::emitIns_R_R_C_I( id->idReg2(reg2); id->idAddr()->iiaFieldHnd = fldHnd; + SetEvexBroadcastIfNeeded(id, instOptions); + SetEvexEmbMaskIfNeeded(id, instOptions); + UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins), ival); id->idCodeSize(sz); @@ -7105,16 +7146,17 @@ void emitter::emitIns_R_R_C_I( * emitIns_R_R_R_I: Add an instruction with three register operands and an immediate. * * Arguments: - * ins - the instruction to add - * attr - the emitter attribute for instruction - * targetReg - the target (destination) register - * reg1 - the first source register - * reg2 - the second source register - * ival - the immediate value + * ins - the instruction to add + * attr - the emitter attribute for instruction + * targetReg - the target (destination) register + * reg1 - the first source register + * reg2 - the second source register + * ival - the immediate value + * instOptions - the options that modify how the instruction is generated */ void emitter::emitIns_R_R_R_I( - instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2, int ival) + instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2, int ival, insOpts instOptions) { assert(IsAvx512OrPriorInstruction(ins)); assert(IsThreeOperandAVXInstruction(ins)); @@ -7126,6 +7168,9 @@ void emitter::emitIns_R_R_R_I( id->idReg2(reg1); id->idReg3(reg2); + assert((instOptions & INS_OPTS_EVEX_b_MASK) == 0); + SetEvexEmbMaskIfNeeded(id, instOptions); + UNATIVE_OFFSET sz = emitInsSizeRR(id, insCodeRM(ins), ival); id->idCodeSize(sz); @@ -7134,7 +7179,7 @@ void emitter::emitIns_R_R_R_I( } void emitter::emitIns_R_R_S_I( - instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int varx, int offs, int ival) + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int varx, int offs, int ival, insOpts instOptions) { assert(IsAvx512OrPriorInstruction(ins)); assert(IsThreeOperandAVXInstruction(ins)); @@ -7147,6 +7192,9 @@ void emitter::emitIns_R_R_S_I( id->idReg2(reg2); id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); + SetEvexBroadcastIfNeeded(id, instOptions); + SetEvexEmbMaskIfNeeded(id, instOptions); + #ifdef DEBUG id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs; #endif @@ -7163,18 +7211,24 @@ void emitter::emitIns_R_R_S_I( // another register operand, and that returns a value in register // // Arguments: -// ins -- The instruction being emitted -// attr -- The emit attribute -// targetReg -- The target register -// op1Reg -- The register of the first operand -// op3Reg -- The register of the third operand -// indir -- The GenTreeIndir used for the memory address +// ins -- The instruction being emitted +// attr -- The emit attribute +// targetReg -- The target register +// op1Reg -- The register of the first operand +// op3Reg -- The register of the third operand +// indir -- The GenTreeIndir used for the memory address +// instOptions -- The options that modify how the instruction is generated // // Remarks: // op2 is built from indir // -void emitter::emitIns_R_R_A_R( - instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, GenTreeIndir* indir) +void emitter::emitIns_R_R_A_R(instruction ins, + emitAttr attr, + regNumber targetReg, + regNumber op1Reg, + regNumber op3Reg, + GenTreeIndir* indir, + insOpts instOptions) { assert(isAvxBlendv(ins) || isAvx512Blendv(ins)); assert(UseSimdEncoding()); @@ -7189,6 +7243,9 @@ void emitter::emitIns_R_R_A_R( emitHandleMemOp(indir, id, IF_RWR_RRD_ARD_RRD, ins); + SetEvexBroadcastIfNeeded(id, instOptions); + SetEvexEmbMaskIfNeeded(id, instOptions); + UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival); id->idCodeSize(sz); @@ -7201,13 +7258,14 @@ void emitter::emitIns_R_R_A_R( // offset, another register operand, and that returns a value in register // // Arguments: -// ins -- The instruction being emitted -// attr -- The emit attribute -// targetReg -- The target register -// op1Reg -- The register of the first operand -// op3Reg -- The register of the third operand -// fldHnd -- The CORINFO_FIELD_HANDLE used for the memory address -// offs -- The offset added to the memory address from fldHnd +// ins -- The instruction being emitted +// attr -- The emit attribute +// targetReg -- The target register +// op1Reg -- The register of the first operand +// op3Reg -- The register of the third operand +// fldHnd -- The CORINFO_FIELD_HANDLE used for the memory address +// offs -- The offset added to the memory address from fldHnd +// instOptions -- The options that modify how the instruction is generated // // Remarks: // op2 is built from fldHnd + offs @@ -7218,7 +7276,8 @@ void emitter::emitIns_R_R_C_R(instruction ins, regNumber op1Reg, regNumber op3Reg, CORINFO_FIELD_HANDLE fldHnd, - int offs) + int offs, + insOpts instOptions) { assert(isAvxBlendv(ins) || isAvx512Blendv(ins)); assert(UseSimdEncoding()); @@ -7239,6 +7298,9 @@ void emitter::emitIns_R_R_C_R(instruction ins, id->idInsFmt(IF_RWR_RRD_MRD_RRD); id->idAddr()->iiaFieldHnd = fldHnd; + SetEvexBroadcastIfNeeded(id, instOptions); + SetEvexEmbMaskIfNeeded(id, instOptions); + UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins), ival); id->idCodeSize(sz); @@ -7251,19 +7313,26 @@ void emitter::emitIns_R_R_C_R(instruction ins, // offset, another register operand, and that returns a value in register // // Arguments: -// ins -- The instruction being emitted -// attr -- The emit attribute -// targetReg -- The target register -// op1Reg -- The register of the first operand -// op3Reg -- The register of the third operand -// varx -- The variable index used for the memory address -// offs -- The offset added to the memory address from varx +// ins -- The instruction being emitted +// attr -- The emit attribute +// targetReg -- The target register +// op1Reg -- The register of the first operand +// op3Reg -- The register of the third operand +// varx -- The variable index used for the memory address +// offs -- The offset added to the memory address from varx +// instOptions -- The options that modify how the instruction is generated // // Remarks: // op2 is built from varx + offs // -void emitter::emitIns_R_R_S_R( - instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, int varx, int offs) +void emitter::emitIns_R_R_S_R(instruction ins, + emitAttr attr, + regNumber targetReg, + regNumber op1Reg, + regNumber op3Reg, + int varx, + int offs, + insOpts instOptions) { assert(isAvxBlendv(ins) || isAvx512Blendv(ins)); assert(UseSimdEncoding()); @@ -7278,6 +7347,9 @@ void emitter::emitIns_R_R_S_R( id->idInsFmt(IF_RWR_RRD_SRD_RRD); id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); + SetEvexBroadcastIfNeeded(id, instOptions); + SetEvexEmbMaskIfNeeded(id, instOptions); + UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeRM(ins), varx, offs, ival); id->idCodeSize(sz); @@ -7285,8 +7357,13 @@ void emitter::emitIns_R_R_S_R( emitCurIGsize += sz; } -void emitter::emitIns_R_R_R_R( - instruction ins, emitAttr attr, regNumber targetReg, regNumber reg1, regNumber reg2, regNumber reg3) +void emitter::emitIns_R_R_R_R(instruction ins, + emitAttr attr, + regNumber targetReg, + regNumber reg1, + regNumber reg2, + regNumber reg3, + insOpts instOptions) { assert(isAvxBlendv(ins) || isAvx512Blendv(ins)); assert(UseSimdEncoding()); @@ -7300,6 +7377,9 @@ void emitter::emitIns_R_R_R_R( id->idReg3(reg2); id->idReg4(reg3); + assert((instOptions & INS_OPTS_EVEX_b_MASK) == 0); + SetEvexEmbMaskIfNeeded(id, instOptions); + UNATIVE_OFFSET sz = emitInsSizeRR(id, insCodeRM(ins)); if (!isMaskReg(reg3)) @@ -7318,7 +7398,8 @@ void emitter::emitIns_R_R_R_R( * * Add an instruction with a register + static member operands. */ -void emitter::emitIns_R_C(instruction ins, emitAttr attr, regNumber reg, CORINFO_FIELD_HANDLE fldHnd, int offs) +void emitter::emitIns_R_C( + instruction ins, emitAttr attr, regNumber reg, CORINFO_FIELD_HANDLE fldHnd, int offs, insOpts instOptions) { // Static always need relocs if (!jitStaticFldIsGlobAddr(fldHnd)) @@ -7384,6 +7465,9 @@ void emitter::emitIns_R_C(instruction ins, emitAttr attr, regNumber reg, CORINFO } } + SetEvexBroadcastIfNeeded(id, instOptions); + SetEvexEmbMaskIfNeeded(id, instOptions); + id->idCodeSize(sz); id->idAddr()->iiaFieldHnd = fldHnd; @@ -8223,11 +8307,12 @@ void emitter::emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNu // and that returns a value in register // // Arguments: -// ins -- The instruction being emitted -// attr -- The emit attribute -// targetReg -- The target register -// op1Reg -- The register of the first operand -// ival -- The immediate value +// ins -- The instruction being emitted +// attr -- The emit attribute +// targetReg -- The target register +// op1Reg -- The register of the first operand +// ival -- The immediate value +// instOptions -- The options that modify how the instruction is generated // // Notes: // This will handle the required register copy if 'op1Reg' and 'targetReg' are not the same, and @@ -8236,14 +8321,16 @@ void emitter::emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNu // where we frequently need to handle the case of generating 3-operand or 2-operand forms // depending on what target ISA is supported. // -void emitter::emitIns_SIMD_R_R_I(instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, int ival) +void emitter::emitIns_SIMD_R_R_I( + instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, int ival, insOpts instOptions) { if (UseSimdEncoding() || IsDstSrcImmAvxInstruction(ins)) { - emitIns_R_R_I(ins, attr, targetReg, op1Reg, ival); + emitIns_R_R_I(ins, attr, targetReg, op1Reg, ival, instOptions); } else { + assert(instOptions == INS_OPTS_NONE); emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); emitIns_R_I(ins, attr, targetReg, ival); } @@ -8254,11 +8341,12 @@ void emitter::emitIns_SIMD_R_R_I(instruction ins, emitAttr attr, regNumber targe // and that returns a value in register // // Arguments: -// ins -- The instruction being emitted -// attr -- The emit attribute -// targetReg -- The target register -// op1Reg -- The register of the first operand -// indir -- The GenTreeIndir used for the memory address +// ins -- The instruction being emitted +// attr -- The emit attribute +// targetReg -- The target register +// op1Reg -- The register of the first operand +// indir -- The GenTreeIndir used for the memory address +// instOptions -- The options that modify how the instruction is generated // void emitter::emitIns_SIMD_R_R_A( instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, GenTreeIndir* indir, insOpts instOptions) @@ -8280,12 +8368,13 @@ void emitter::emitIns_SIMD_R_R_A( // and that returns a value in register // // Arguments: -// ins -- The instruction being emitted -// attr -- The emit attribute -// targetReg -- The target register -// op1Reg -- The register of the first operand -// fldHnd -- The CORINFO_FIELD_HANDLE used for the memory address -// offs -- The offset added to the memory address from fldHnd +// ins -- The instruction being emitted +// attr -- The emit attribute +// targetReg -- The target register +// op1Reg -- The register of the first operand +// fldHnd -- The CORINFO_FIELD_HANDLE used for the memory address +// offs -- The offset added to the memory address from fldHnd +// instOptions -- The options that modify how the instruction is generated // void emitter::emitIns_SIMD_R_R_C(instruction ins, emitAttr attr, @@ -8312,11 +8401,12 @@ void emitter::emitIns_SIMD_R_R_C(instruction ins, // value in register // // Arguments: -// ins -- The instruction being emitted -// attr -- The emit attribute -// targetReg -- The target register -// op1Reg -- The register of the first operand -// op2Reg -- The register of the second operand +// ins -- The instruction being emitted +// attr -- The emit attribute +// targetReg -- The target register +// op1Reg -- The register of the first operand +// op2Reg -- The register of the second operand +// instOptions -- The options that modify how the instruction is generated // void emitter::emitIns_SIMD_R_R_R( instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, insOpts instOptions) @@ -8327,6 +8417,8 @@ void emitter::emitIns_SIMD_R_R_R( } else { + assert(instOptions == INS_OPTS_NONE); + // Ensure we aren't overwriting op2 assert((op2Reg != targetReg) || (op1Reg == targetReg)); @@ -8348,12 +8440,13 @@ void emitter::emitIns_SIMD_R_R_R( // and that returns a value in register // // Arguments: -// ins -- The instruction being emitted -// attr -- The emit attribute -// targetReg -- The target register -// op1Reg -- The register of the first operand -// varx -- The variable index used for the memory address -// offs -- The offset added to the memory address from varx +// ins -- The instruction being emitted +// attr -- The emit attribute +// targetReg -- The target register +// op1Reg -- The register of the first operand +// varx -- The variable index used for the memory address +// offs -- The offset added to the memory address from varx +// instOptions -- The options that modify how the instruction is generated // void emitter::emitIns_SIMD_R_R_S( instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, int varx, int offs, insOpts instOptions) @@ -8375,22 +8468,29 @@ void emitter::emitIns_SIMD_R_R_S( // an immediate operand, and that returns a value in register // // Arguments: -// ins -- The instruction being emitted -// attr -- The emit attribute -// targetReg -- The target register -// op1Reg -- The register of the first operand -// indir -- The GenTreeIndir used for the memory address -// ival -- The immediate value -// -void emitter::emitIns_SIMD_R_R_A_I( - instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, GenTreeIndir* indir, int ival) +// ins -- The instruction being emitted +// attr -- The emit attribute +// targetReg -- The target register +// op1Reg -- The register of the first operand +// indir -- The GenTreeIndir used for the memory address +// ival -- The immediate value +// instOptions -- The options that modify how the instruction is generated +// +void emitter::emitIns_SIMD_R_R_A_I(instruction ins, + emitAttr attr, + regNumber targetReg, + regNumber op1Reg, + GenTreeIndir* indir, + int ival, + insOpts instOptions) { if (UseSimdEncoding()) { - emitIns_R_R_A_I(ins, attr, targetReg, op1Reg, indir, ival, IF_RWR_RRD_ARD_CNS); + emitIns_R_R_A_I(ins, attr, targetReg, op1Reg, indir, ival, IF_RWR_RRD_ARD_CNS, instOptions); } else { + assert(instOptions == INS_OPTS_NONE); emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); emitIns_R_A_I(ins, attr, targetReg, indir, ival); } @@ -8401,13 +8501,14 @@ void emitter::emitIns_SIMD_R_R_A_I( // an immediate operand, and that returns a value in register // // Arguments: -// ins -- The instruction being emitted -// attr -- The emit attribute -// targetReg -- The target register -// op1Reg -- The register of the first operand -// fldHnd -- The CORINFO_FIELD_HANDLE used for the memory address -// offs -- The offset added to the memory address from fldHnd -// ival -- The immediate value +// ins -- The instruction being emitted +// attr -- The emit attribute +// targetReg -- The target register +// op1Reg -- The register of the first operand +// fldHnd -- The CORINFO_FIELD_HANDLE used for the memory address +// offs -- The offset added to the memory address from fldHnd +// ival -- The immediate value +// instOptions -- The options that modify how the instruction is generated // void emitter::emitIns_SIMD_R_R_C_I(instruction ins, emitAttr attr, @@ -8415,14 +8516,16 @@ void emitter::emitIns_SIMD_R_R_C_I(instruction ins, regNumber op1Reg, CORINFO_FIELD_HANDLE fldHnd, int offs, - int ival) + int ival, + insOpts instOptions) { if (UseSimdEncoding()) { - emitIns_R_R_C_I(ins, attr, targetReg, op1Reg, fldHnd, offs, ival); + emitIns_R_R_C_I(ins, attr, targetReg, op1Reg, fldHnd, offs, ival, instOptions); } else { + assert(instOptions == INS_OPTS_NONE); emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); emitIns_R_C_I(ins, attr, targetReg, fldHnd, offs, ival); } @@ -8433,22 +8536,30 @@ void emitter::emitIns_SIMD_R_R_C_I(instruction ins, // and that returns a value in register // // Arguments: -// ins -- The instruction being emitted -// attr -- The emit attribute -// targetReg -- The target register -// op1Reg -- The register of the first operand -// op2Reg -- The register of the second operand -// ival -- The immediate value -// -void emitter::emitIns_SIMD_R_R_R_I( - instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, int ival) +// ins -- The instruction being emitted +// attr -- The emit attribute +// targetReg -- The target register +// op1Reg -- The register of the first operand +// op2Reg -- The register of the second operand +// ival -- The immediate value +// instOptions -- The options that modify how the instruction is generated +// +void emitter::emitIns_SIMD_R_R_R_I(instruction ins, + emitAttr attr, + regNumber targetReg, + regNumber op1Reg, + regNumber op2Reg, + int ival, + insOpts instOptions) { if (UseSimdEncoding()) { - emitIns_R_R_R_I(ins, attr, targetReg, op1Reg, op2Reg, ival); + emitIns_R_R_R_I(ins, attr, targetReg, op1Reg, op2Reg, ival, instOptions); } else { + assert(instOptions == INS_OPTS_NONE); + // Ensure we aren't overwriting op2 assert((op2Reg != targetReg) || (op1Reg == targetReg)); @@ -8462,23 +8573,31 @@ void emitter::emitIns_SIMD_R_R_R_I( // an immediate operand, and that returns a value in register // // Arguments: -// ins -- The instruction being emitted -// attr -- The emit attribute -// targetReg -- The target register -// op1Reg -- The register of the first operand -// varx -- The variable index used for the memory address -// offs -- The offset added to the memory address from varx -// ival -- The immediate value -// -void emitter::emitIns_SIMD_R_R_S_I( - instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, int varx, int offs, int ival) +// ins -- The instruction being emitted +// attr -- The emit attribute +// targetReg -- The target register +// op1Reg -- The register of the first operand +// varx -- The variable index used for the memory address +// offs -- The offset added to the memory address from varx +// ival -- The immediate value +// instOptions -- The options that modify how the instruction is generated +// +void emitter::emitIns_SIMD_R_R_S_I(instruction ins, + emitAttr attr, + regNumber targetReg, + regNumber op1Reg, + int varx, + int offs, + int ival, + insOpts instOptions) { if (UseSimdEncoding()) { - emitIns_R_R_S_I(ins, attr, targetReg, op1Reg, varx, offs, ival); + emitIns_R_R_S_I(ins, attr, targetReg, op1Reg, varx, offs, ival, instOptions); } else { + assert(instOptions == INS_OPTS_NONE); emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); emitIns_R_S_I(ins, attr, targetReg, varx, offs, ival); } @@ -8490,15 +8609,21 @@ void emitter::emitIns_SIMD_R_R_S_I( // and that returns a value in register // // Arguments: -// ins -- The instruction being emitted -// attr -- The emit attribute -// targetReg -- The target register -// op1Reg -- The register of the first operand -// op2Reg -- The register of the second operand -// indir -- The GenTreeIndir used for the memory address -// -void emitter::emitIns_SIMD_R_R_R_A( - instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, GenTreeIndir* indir) +// ins -- The instruction being emitted +// attr -- The emit attribute +// targetReg -- The target register +// op1Reg -- The register of the first operand +// op2Reg -- The register of the second operand +// indir -- The GenTreeIndir used for the memory address +// instOptions -- The options that modify how the instruction is generated +// +void emitter::emitIns_SIMD_R_R_R_A(instruction ins, + emitAttr attr, + regNumber targetReg, + regNumber op1Reg, + regNumber op2Reg, + GenTreeIndir* indir, + insOpts instOptions) { assert(IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIInstruction(ins)); assert(UseSimdEncoding()); @@ -8507,7 +8632,7 @@ void emitter::emitIns_SIMD_R_R_R_A( assert((op2Reg != targetReg) || (op1Reg == targetReg)); emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); - emitIns_R_R_A(ins, attr, targetReg, op2Reg, indir); + emitIns_R_R_A(ins, attr, targetReg, op2Reg, indir, instOptions); } //------------------------------------------------------------------------ @@ -8515,13 +8640,14 @@ void emitter::emitIns_SIMD_R_R_R_A( // offset, and that returns a value in register // // Arguments: -// ins -- The instruction being emitted -// attr -- The emit attribute -// targetReg -- The target register -// op1Reg -- The register of the first operand -// op2Reg -- The register of the second operand -// fldHnd -- The CORINFO_FIELD_HANDLE used for the memory address -// offs -- The offset added to the memory address from fldHnd +// ins -- The instruction being emitted +// attr -- The emit attribute +// targetReg -- The target register +// op1Reg -- The register of the first operand +// op2Reg -- The register of the second operand +// fldHnd -- The CORINFO_FIELD_HANDLE used for the memory address +// offs -- The offset added to the memory address from fldHnd +// instOptions -- The options that modify how the instruction is generated // void emitter::emitIns_SIMD_R_R_R_C(instruction ins, emitAttr attr, @@ -8529,7 +8655,8 @@ void emitter::emitIns_SIMD_R_R_R_C(instruction ins, regNumber op1Reg, regNumber op2Reg, CORINFO_FIELD_HANDLE fldHnd, - int offs) + int offs, + insOpts instOptions) { assert(IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIInstruction(ins)); assert(UseSimdEncoding()); @@ -8538,7 +8665,7 @@ void emitter::emitIns_SIMD_R_R_R_C(instruction ins, assert((op2Reg != targetReg) || (op1Reg == targetReg)); emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); - emitIns_R_R_C(ins, attr, targetReg, op2Reg, fldHnd, offs); + emitIns_R_R_C(ins, attr, targetReg, op2Reg, fldHnd, offs, instOptions); } //------------------------------------------------------------------------ @@ -8546,13 +8673,13 @@ void emitter::emitIns_SIMD_R_R_R_C(instruction ins, // value in register // // Arguments: -// ins -- The instruction being emitted -// attr -- The emit attribute -// targetReg -- The target register -// op1Reg -- The register of the first operand -// op2Reg -- The register of the second operand -// op3Reg -- The register of the second operand -// instOptions - The options that modify how the instruction is generated +// ins -- The instruction being emitted +// attr -- The emit attribute +// targetReg -- The target register +// op1Reg -- The register of the first operand +// op2Reg -- The register of the second operand +// op3Reg -- The register of the second operand +// instOptions -- The options that modify how the instruction is generated // void emitter::emitIns_SIMD_R_R_R_R(instruction ins, emitAttr attr, @@ -8598,11 +8725,12 @@ void emitter::emitIns_SIMD_R_R_R_R(instruction ins, default: break; } - emitIns_R_R_R_R(ins, attr, targetReg, op1Reg, op2Reg, op3Reg); + emitIns_R_R_R_R(ins, attr, targetReg, op1Reg, op2Reg, op3Reg, instOptions); } else { assert(isSse41Blendv(ins)); + assert(instOptions == INS_OPTS_NONE); // Ensure we aren't overwriting op1 or op2 assert((op1Reg != REG_XMM0) || (op3Reg == REG_XMM0)); @@ -8630,16 +8758,23 @@ void emitter::emitIns_SIMD_R_R_R_R(instruction ins, // offset, and that returns a value in register // // Arguments: -// ins -- The instruction being emitted -// attr -- The emit attribute -// targetReg -- The target register -// op1Reg -- The register of the first operand -// op2Reg -- The register of the second operand -// varx -- The variable index used for the memory address -// offs -- The offset added to the memory address from varx -// -void emitter::emitIns_SIMD_R_R_R_S( - instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, int varx, int offs) +// ins -- The instruction being emitted +// attr -- The emit attribute +// targetReg -- The target register +// op1Reg -- The register of the first operand +// op2Reg -- The register of the second operand +// varx -- The variable index used for the memory address +// offs -- The offset added to the memory address from varx +// instOptions -- The options that modify how the instruction is generated +// +void emitter::emitIns_SIMD_R_R_R_S(instruction ins, + emitAttr attr, + regNumber targetReg, + regNumber op1Reg, + regNumber op2Reg, + int varx, + int offs, + insOpts instOptions) { assert(IsFMAInstruction(ins) || IsPermuteVar2xInstruction(ins) || IsAVXVNNIInstruction(ins)); assert(UseSimdEncoding()); @@ -8648,7 +8783,7 @@ void emitter::emitIns_SIMD_R_R_R_S( assert((op2Reg != targetReg) || (op1Reg == targetReg)); emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); - emitIns_R_R_S(ins, attr, targetReg, op2Reg, varx, offs); + emitIns_R_R_S(ins, attr, targetReg, op2Reg, varx, offs, instOptions); } //------------------------------------------------------------------------ @@ -8656,15 +8791,21 @@ void emitter::emitIns_SIMD_R_R_R_S( // another register operand, and that returns a value in register // // Arguments: -// ins -- The instruction being emitted -// attr -- The emit attribute -// targetReg -- The target register -// op1Reg -- The register of the first operand -// op3Reg -- The register of the third operand -// indir -- The GenTreeIndir used for the memory address -// -void emitter::emitIns_SIMD_R_R_A_R( - instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, GenTreeIndir* indir) +// ins -- The instruction being emitted +// attr -- The emit attribute +// targetReg -- The target register +// op1Reg -- The register of the first operand +// op3Reg -- The register of the third operand +// indir -- The GenTreeIndir used for the memory address +// instOptions -- The options that modify how the instruction is generated +// +void emitter::emitIns_SIMD_R_R_A_R(instruction ins, + emitAttr attr, + regNumber targetReg, + regNumber op1Reg, + regNumber op3Reg, + GenTreeIndir* indir, + insOpts instOptions) { if (UseSimdEncoding()) { @@ -8697,11 +8838,12 @@ void emitter::emitIns_SIMD_R_R_A_R( } } - emitIns_R_R_A_R(ins, attr, targetReg, op1Reg, op3Reg, indir); + emitIns_R_R_A_R(ins, attr, targetReg, op1Reg, op3Reg, indir, instOptions); } else { assert(isSse41Blendv(ins)); + assert(instOptions == INS_OPTS_NONE); // Ensure we aren't overwriting op1 assert(op1Reg != REG_XMM0); @@ -8722,13 +8864,14 @@ void emitter::emitIns_SIMD_R_R_A_R( // offset, another register operand, and that returns a value in register // // Arguments: -// ins -- The instruction being emitted -// attr -- The emit attribute -// targetReg -- The target register -// op1Reg -- The register of the first operand -// op3Reg -- The register of the third operand -// fldHnd -- The CORINFO_FIELD_HANDLE used for the memory address -// offs -- The offset added to the memory address from fldHnd +// ins -- The instruction being emitted +// attr -- The emit attribute +// targetReg -- The target register +// op1Reg -- The register of the first operand +// op3Reg -- The register of the third operand +// fldHnd -- The CORINFO_FIELD_HANDLE used for the memory address +// offs -- The offset added to the memory address from fldHnd +// instOptions -- The options that modify how the instruction is generated // void emitter::emitIns_SIMD_R_R_C_R(instruction ins, emitAttr attr, @@ -8736,7 +8879,8 @@ void emitter::emitIns_SIMD_R_R_C_R(instruction ins, regNumber op1Reg, regNumber op3Reg, CORINFO_FIELD_HANDLE fldHnd, - int offs) + int offs, + insOpts instOptions) { if (UseSimdEncoding()) { @@ -8769,11 +8913,12 @@ void emitter::emitIns_SIMD_R_R_C_R(instruction ins, } } - emitIns_R_R_C_R(ins, attr, targetReg, op1Reg, op3Reg, fldHnd, offs); + emitIns_R_R_C_R(ins, attr, targetReg, op1Reg, op3Reg, fldHnd, offs, instOptions); } else { assert(isSse41Blendv(ins)); + assert(instOptions == INS_OPTS_NONE); // Ensure we aren't overwriting op1 assert(op1Reg != REG_XMM0); @@ -8794,16 +8939,23 @@ void emitter::emitIns_SIMD_R_R_C_R(instruction ins, // offset, another register operand, and that returns a value in register // // Arguments: -// ins -- The instruction being emitted -// attr -- The emit attribute -// targetReg -- The target register -// op1Reg -- The register of the first operand -// op3Reg -- The register of the third operand -// varx -- The variable index used for the memory address -// offs -- The offset added to the memory address from varx -// -void emitter::emitIns_SIMD_R_R_S_R( - instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, int varx, int offs) +// ins -- The instruction being emitted +// attr -- The emit attribute +// targetReg -- The target register +// op1Reg -- The register of the first operand +// op3Reg -- The register of the third operand +// varx -- The variable index used for the memory address +// offs -- The offset added to the memory address from varx +// instOptions -- The options that modify how the instruction is generated +// +void emitter::emitIns_SIMD_R_R_S_R(instruction ins, + emitAttr attr, + regNumber targetReg, + regNumber op1Reg, + regNumber op3Reg, + int varx, + int offs, + insOpts instOptions) { if (UseSimdEncoding()) { @@ -8836,11 +8988,12 @@ void emitter::emitIns_SIMD_R_R_S_R( } } - emitIns_R_R_S_R(ins, attr, targetReg, op1Reg, op3Reg, varx, offs); + emitIns_R_R_S_R(ins, attr, targetReg, op1Reg, op3Reg, varx, offs, instOptions); } else { assert(isSse41Blendv(ins)); + assert(instOptions == INS_OPTS_NONE); // Ensure we aren't overwriting op1 assert(op1Reg != REG_XMM0); @@ -8861,13 +9014,14 @@ void emitter::emitIns_SIMD_R_R_S_R( // address, an immediate operand, and that returns a value in register // // Arguments: -// ins -- The instruction being emitted -// attr -- The emit attribute -// targetReg -- The target register -// op1Reg -- The register of the first operand -// op2Reg -- The register of the second operand -// indir -- The GenTreeIndir used for the memory address -// ival -- The immediate value +// ins -- The instruction being emitted +// attr -- The emit attribute +// targetReg -- The target register +// op1Reg -- The register of the first operand +// op2Reg -- The register of the second operand +// indir -- The GenTreeIndir used for the memory address +// ival -- The immediate value +// instOptions -- The options that modify how the instruction is generated // void emitter::emitIns_SIMD_R_R_R_A_I(instruction ins, emitAttr attr, @@ -8875,11 +9029,12 @@ void emitter::emitIns_SIMD_R_R_R_A_I(instruction ins, regNumber op1Reg, regNumber op2Reg, GenTreeIndir* indir, - int ival) + int ival, + insOpts instOptions) { assert(UseSimdEncoding()); emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); - emitIns_R_R_A_I(ins, attr, targetReg, op2Reg, indir, ival, IF_RWR_RRD_ARD_CNS); + emitIns_R_R_A_I(ins, attr, targetReg, op2Reg, indir, ival, IF_RWR_RRD_ARD_CNS, instOptions); } //------------------------------------------------------------------------ @@ -8887,14 +9042,15 @@ void emitter::emitIns_SIMD_R_R_R_A_I(instruction ins, // offset, an immediate operand, and that returns a value in register // // Arguments: -// ins -- The instruction being emitted -// attr -- The emit attribute -// targetReg -- The target register -// op1Reg -- The register of the first operand -// op2Reg -- The register of the second operand -// fldHnd -- The CORINFO_FIELD_HANDLE used for the memory address -// offs -- The offset added to the memory address from fldHnd -// ival -- The immediate value +// ins -- The instruction being emitted +// attr -- The emit attribute +// targetReg -- The target register +// op1Reg -- The register of the first operand +// op2Reg -- The register of the second operand +// fldHnd -- The CORINFO_FIELD_HANDLE used for the memory address +// offs -- The offset added to the memory address from fldHnd +// ival -- The immediate value +// instOptions -- The options that modify how the instruction is generated // void emitter::emitIns_SIMD_R_R_R_C_I(instruction ins, emitAttr attr, @@ -8903,11 +9059,12 @@ void emitter::emitIns_SIMD_R_R_R_C_I(instruction ins, regNumber op2Reg, CORINFO_FIELD_HANDLE fldHnd, int offs, - int ival) + int ival, + insOpts instOptions) { assert(UseSimdEncoding()); emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); - emitIns_R_R_C_I(ins, attr, targetReg, op2Reg, fldHnd, offs, ival); + emitIns_R_R_C_I(ins, attr, targetReg, op2Reg, fldHnd, offs, ival, instOptions); } //------------------------------------------------------------------------ @@ -8915,20 +9072,27 @@ void emitter::emitIns_SIMD_R_R_R_C_I(instruction ins, // operand, and that returns a value in register // // Arguments: -// ins -- The instruction being emitted -// attr -- The emit attribute -// targetReg -- The target register -// op1Reg -- The register of the first operand -// op2Reg -- The register of the second operand -// op3Reg -- The register of the third operand -// ival -- The immediate value -// -void emitter::emitIns_SIMD_R_R_R_R_I( - instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, regNumber op3Reg, int ival) +// ins -- The instruction being emitted +// attr -- The emit attribute +// targetReg -- The target register +// op1Reg -- The register of the first operand +// op2Reg -- The register of the second operand +// op3Reg -- The register of the third operand +// ival -- The immediate value +// instOptions -- The options that modify how the instruction is generated +// +void emitter::emitIns_SIMD_R_R_R_R_I(instruction ins, + emitAttr attr, + regNumber targetReg, + regNumber op1Reg, + regNumber op2Reg, + regNumber op3Reg, + int ival, + insOpts instOptions) { assert(UseSimdEncoding()); emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); - emitIns_R_R_R_I(ins, attr, targetReg, op2Reg, op3Reg, ival); + emitIns_R_R_R_I(ins, attr, targetReg, op2Reg, op3Reg, ival, instOptions); } //------------------------------------------------------------------------ @@ -8936,14 +9100,15 @@ void emitter::emitIns_SIMD_R_R_R_R_I( // offset, an immediate operand, and that returns a value in register // // Arguments: -// ins -- The instruction being emitted -// attr -- The emit attribute -// targetReg -- The target register -// op1Reg -- The register of the first operand -// op2Reg -- The register of the second operand -// varx -- The variable index used for the memory address -// offs -- The offset added to the memory address from varx -// ival -- The immediate value +// ins -- The instruction being emitted +// attr -- The emit attribute +// targetReg -- The target register +// op1Reg -- The register of the first operand +// op2Reg -- The register of the second operand +// varx -- The variable index used for the memory address +// offs -- The offset added to the memory address from varx +// ival -- The immediate value +// instOptions -- The options that modify how the instruction is generated // void emitter::emitIns_SIMD_R_R_R_S_I(instruction ins, emitAttr attr, @@ -8952,11 +9117,12 @@ void emitter::emitIns_SIMD_R_R_R_S_I(instruction ins, regNumber op2Reg, int varx, int offs, - int ival) + int ival, + insOpts instOptions) { assert(UseSimdEncoding()); emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); - emitIns_R_R_S_I(ins, attr, targetReg, op2Reg, varx, offs, ival); + emitIns_R_R_S_I(ins, attr, targetReg, op2Reg, varx, offs, ival, instOptions); } #endif // FEATURE_HW_INTRINSICS @@ -9105,7 +9271,7 @@ void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int va emitCurIGsize += sz; } -void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs) +void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs, insOpts instOptions) { emitAttr size = EA_SIZE(attr); noway_assert(emitVerifyEncodable(ins, size, ireg)); @@ -9123,6 +9289,9 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int va id->idReg1(ireg); id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs); + SetEvexBroadcastIfNeeded(id, instOptions); + SetEvexEmbMaskIfNeeded(id, instOptions); + sz = emitInsSizeSV(id, insCodeRM(ins), varx, offs); id->idCodeSize(sz); #ifdef DEBUG @@ -10485,11 +10654,14 @@ void emitter::emitDispFrameRef(int varx, int disp, int offs, bool asmfm) * * Display the mask for the instruction */ -void emitter::emitDispMask(const instrDesc* id, regNumber reg, emitAttr size) const +void emitter::emitDispMask(const instrDesc* id, regNumber reg) const { - printf("{%s}", emitRegName(reg, size)); - // TODO: Handle printing {z} if EVEX.z is set - printf(", "); + printf(" {%s}", emitRegName(reg)); + + if (id->idIsEvexZContextSet()) + { + printf("{z}"); + } } /***************************************************************************** @@ -10793,9 +10965,9 @@ void emitter::emitDispInsHex(instrDesc* id, BYTE* code, size_t sz) // Arguments: // id - The instruction descriptor // -void emitter::emitDispEmbBroadcastCount(instrDesc* id) +void emitter::emitDispEmbBroadcastCount(instrDesc* id) const { - if (!id->idIsEvexbContextSet()) + if (!IsEvexEncodableInstruction(id->idIns()) || !id->idIsEvexbContextSet()) { return; } @@ -10809,7 +10981,7 @@ void emitter::emitDispEmbBroadcastCount(instrDesc* id) // Arguments: // id - The instruction descriptor // -void emitter::emitDispEmbRounding(instrDesc* id) +void emitter::emitDispEmbRounding(instrDesc* id) const { if (!id->idIsEvexbContextSet()) { @@ -10840,21 +11012,21 @@ void emitter::emitDispEmbRounding(instrDesc* id) // Arguments: // id - The instruction descriptor // -void emitter::emitDispEmbMasking(instrDesc* id) +void emitter::emitDispEmbMasking(instrDesc* id) const { - regNumber maskReg = static_cast(id->idGetEvexAaaContext() + KBASE); - - if (maskReg == REG_K0) + if (!IsEvexEncodableInstruction(id->idIns())) { return; } - printf(" {%s}", emitRegName(maskReg)); + regNumber maskReg = static_cast(id->idGetEvexAaaContext() + KBASE); - if (id->idIsEvexZContextSet()) + if (maskReg == REG_K0) { - printf(" {z}"); + return; } + + emitDispMask(id, maskReg); } //-------------------------------------------------------------------- @@ -11062,6 +11234,8 @@ void emitter::emitDispIns( case IF_CNS: { + assert(!IsEvexEncodableInstruction(id->idIns())); + val = emitGetInsSC(id); #ifdef TARGET_AMD64 // no 8-byte immediates allowed here! @@ -11105,6 +11279,8 @@ void emitter::emitDispIns( case IF_AWR: case IF_ARW: { + assert(!IsEvexEncodableInstruction(id->idIns())); + if (((ins == INS_call) || (ins == INS_tail_i_jmp)) && id->idIsCallRegPtr()) { printf("%s", emitRegName(id->idAddr()->iiaAddrMode.amBaseReg)); @@ -11168,8 +11344,11 @@ void emitter::emitDispIns( // This logic ensures that we print `crc32 eax, bx` instead of `crc32 ax, bx` attr = EA_4BYTE; } - printf("%s, %s", emitRegName(id->idReg1(), attr), sstr); + printf("%s", emitRegName(id->idReg1(), attr)); + emitDispEmbMasking(id); + printf(", %s", sstr); emitDispAddrMode(id); + emitDispEmbBroadcastCount(id); emitDispCommentForHandle(id->idDebugOnlyInfo()->idMemCookie, 0, id->idDebugOnlyInfo()->idFlags); break; @@ -11179,8 +11358,11 @@ void emitter::emitDispIns( case IF_RRW_ARD_CNS: case IF_RWR_ARD_CNS: { - printf("%s, %s", emitRegName(id->idReg1(), attr), sstr); + printf("%s", emitRegName(id->idReg1(), attr)); + emitDispEmbMasking(id); + printf(", %s", sstr); emitDispAddrMode(id); + emitDispEmbBroadcastCount(id); emitGetInsAmdCns(id, &cnsVal); val = cnsVal.cnsVal; @@ -11232,6 +11414,7 @@ void emitter::emitDispIns( printf(sstr); emitDispAddrMode(id); + emitDispEmbMasking(id); printf(", %s", emitRegName(id->idReg1(), attr)); emitGetInsAmdCns(id, &cnsVal); @@ -11273,16 +11456,22 @@ void emitter::emitDispIns( attr = EA_16BYTE; } sstr = codeGen->genSizeStr(EA_ATTR(4)); - printf("%s, %s", emitRegName(id->idReg1(), attr), sstr); + printf("%s", emitRegName(id->idReg1(), attr)); + emitDispEmbMasking(id); + printf(", %s", sstr); emitDispAddrMode(id); + emitDispEmbBroadcastCount(id); printf(", %s", emitRegName(id->idReg2(), attr)); break; } case IF_RWR_RRD_ARD_CNS: { - printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr); + printf("%s", emitRegName(id->idReg1(), attr)); + emitDispEmbMasking(id); + printf(", %s, %s", emitRegName(id->idReg2(), attr), sstr); emitDispAddrMode(id); + emitDispEmbBroadcastCount(id); emitGetInsAmdCns(id, &cnsVal); val = cnsVal.cnsVal; @@ -11302,7 +11491,8 @@ void emitter::emitDispIns( case IF_RWR_RRD_ARD_RRD: { - printf("%s ", emitRegName(id->idReg1(), attr)); + printf("%s", emitRegName(id->idReg1(), attr)); + emitDispEmbMasking(id); emitGetInsAmdCns(id, &cnsVal); regNumber op3Reg = decodeRegFromIval(cnsVal.cnsVal); @@ -11310,11 +11500,12 @@ void emitter::emitDispIns( if (hasMaskReg) { - emitDispMask(id, op3Reg, attr); + emitDispMask(id, op3Reg); } - printf("%s, %s", emitRegName(id->idReg2(), attr), sstr); + printf(", %s, %s", emitRegName(id->idReg2(), attr), sstr); emitDispAddrMode(id); + emitDispEmbBroadcastCount(id); if (!hasMaskReg) { @@ -11330,6 +11521,7 @@ void emitter::emitDispIns( { printf("%s", sstr); emitDispAddrMode(id); + emitDispEmbMasking(id); printf(", %s", emitRegName(id->idReg1(), attr)); break; } @@ -11338,6 +11530,7 @@ void emitter::emitDispIns( { printf("%s", sstr); emitDispAddrMode(id); + emitDispEmbMasking(id); printf(", %s", emitRegName(id->idReg1(), attr)); printf(", %s", emitRegName(id->idReg2(), attr)); break; @@ -11350,6 +11543,7 @@ void emitter::emitDispIns( { printf("%s", sstr); emitDispAddrMode(id); + emitDispEmbMasking(id); emitGetInsAmdCns(id, &cnsVal); val = cnsVal.cnsVal; #ifdef TARGET_AMD64 @@ -11379,6 +11573,7 @@ void emitter::emitDispIns( case IF_SWR: case IF_SRW: { + assert(!IsEvexEncodableInstruction(id->idIns())); printf("%s", sstr); #if !FEATURE_FIXED_OUT_ARGS @@ -11407,6 +11602,7 @@ void emitter::emitDispIns( emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), id->idDebugOnlyInfo()->idVarRefOffs, asmfm); + emitDispEmbMasking(id); printf(", %s", emitRegName(id->idReg1(), attr)); break; @@ -11421,6 +11617,7 @@ void emitter::emitDispIns( emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), id->idDebugOnlyInfo()->idVarRefOffs, asmfm); + emitDispEmbMasking(id); emitGetInsCns(id, &cnsVal); val = cnsVal.cnsVal; @@ -11458,6 +11655,7 @@ void emitter::emitDispIns( emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), id->idDebugOnlyInfo()->idVarRefOffs, asmfm); + emitDispEmbMasking(id); printf(", %s", emitRegName(id->idReg1(), attr)); @@ -11497,9 +11695,12 @@ void emitter::emitDispIns( attr = EA_4BYTE; } - printf("%s, %s", emitRegName(id->idReg1(), attr), sstr); + printf("%s", emitRegName(id->idReg1(), attr)); + emitDispEmbMasking(id); + printf(", %s", sstr); emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), id->idDebugOnlyInfo()->idVarRefOffs, asmfm); + emitDispEmbBroadcastCount(id); break; } @@ -11508,9 +11709,12 @@ void emitter::emitDispIns( case IF_RWR_SRD_CNS: case IF_RRW_SRD_CNS: { - printf("%s, %s", emitRegName(id->idReg1(), attr), sstr); + printf("%s", emitRegName(id->idReg1(), attr)); + emitDispEmbMasking(id); + printf(", %s", sstr); emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), id->idDebugOnlyInfo()->idVarRefOffs, asmfm); + emitDispEmbBroadcastCount(id); emitGetInsCns(id, &cnsVal); val = cnsVal.cnsVal; @@ -11543,9 +11747,12 @@ void emitter::emitDispIns( case IF_RWR_RRD_SRD_CNS: { - printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr); + printf("%s", emitRegName(id->idReg1(), attr)); + emitDispEmbMasking(id); + printf(", %s, %s", emitRegName(id->idReg2(), attr), sstr); emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), id->idDebugOnlyInfo()->idVarRefOffs, asmfm); + emitDispEmbBroadcastCount(id); emitGetInsCns(id, &cnsVal); val = cnsVal.cnsVal; @@ -11564,7 +11771,8 @@ void emitter::emitDispIns( case IF_RWR_RRD_SRD_RRD: { - printf("%s ", emitRegName(id->idReg1(), attr)); + printf("%s", emitRegName(id->idReg1(), attr)); + emitDispEmbMasking(id); emitGetInsCns(id, &cnsVal); regNumber op3Reg = decodeRegFromIval(cnsVal.cnsVal); @@ -11572,12 +11780,13 @@ void emitter::emitDispIns( if (hasMaskReg) { - emitDispMask(id, op3Reg, attr); + emitDispMask(id, op3Reg); } - printf("%s, %s", emitRegName(id->idReg2(), attr), sstr); + printf(", %s, %s", emitRegName(id->idReg2(), attr), sstr); emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), id->idDebugOnlyInfo()->idVarRefOffs, asmfm); + emitDispEmbBroadcastCount(id); if (!hasMaskReg) { @@ -11590,9 +11799,13 @@ void emitter::emitDispIns( case IF_RWR_SRD_RRD: case IF_RRW_SRD_RRD: { - printf("%s, %s", emitRegName(id->idReg1(), attr), sstr); + printf("%s", emitRegName(id->idReg1(), attr)); + emitDispEmbMasking(id); + printf(", %s", sstr); + emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), id->idDebugOnlyInfo()->idVarRefOffs, asmfm); + emitDispEmbBroadcastCount(id); printf(", %s", emitRegName(id->idReg2(), attr)); break; } @@ -11602,6 +11815,7 @@ void emitter::emitDispIns( printf("%s", sstr); emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(), id->idDebugOnlyInfo()->idVarRefOffs, asmfm); + emitDispEmbMasking(id); printf(", %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr)); break; } @@ -11615,6 +11829,7 @@ void emitter::emitDispIns( { case INS_pmovmskb: { + assert(!id->idIsEvexAaaContextSet()); printf("%s, %s", emitRegName(id->idReg1(), EA_4BYTE), emitRegName(id->idReg2(), attr)); break; } @@ -11624,7 +11839,8 @@ void emitter::emitDispIns( case INS_cvtsi2ss64: case INS_cvtsi2sd64: { - printf(" %s, %s", emitRegName(id->idReg1(), EA_16BYTE), emitRegName(id->idReg2(), attr)); + assert(!id->idIsEvexAaaContextSet()); + printf("%s, %s", emitRegName(id->idReg1(), EA_16BYTE), emitRegName(id->idReg2(), attr)); break; } @@ -11641,7 +11857,8 @@ void emitter::emitDispIns( case INS_vcvttss2usi32: case INS_vcvttss2usi64: { - printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE)); + assert(!id->idIsEvexAaaContextSet()); + printf("%s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE)); break; } @@ -11686,19 +11903,25 @@ void emitter::emitDispIns( case INS_vpbroadcastd_gpr: case INS_vpbroadcastw_gpr: { - printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_4BYTE)); + printf("%s", emitRegName(id->idReg1(), attr)); + emitDispEmbMasking(id); + printf(", %s", emitRegName(id->idReg2(), EA_4BYTE)); break; } case INS_vpbroadcastq_gpr: { - printf(" %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_8BYTE)); + printf("%s", emitRegName(id->idReg1(), attr)); + emitDispEmbMasking(id); + printf(", %s", emitRegName(id->idReg2(), EA_8BYTE)); break; } default: { - printf("%s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr)); + printf("%s", emitRegName(id->idReg1(), attr)); + emitDispEmbMasking(id); + printf(", %s", emitRegName(id->idReg2(), attr)); emitDispEmbRounding(id); break; } @@ -11713,8 +11936,10 @@ void emitter::emitDispIns( { assert(IsVexOrEvexEncodableInstruction(ins)); assert(IsThreeOperandAVXInstruction(ins) || IsKInstruction(ins)); + regNumber reg2 = id->idReg2(); regNumber reg3 = id->idReg3(); + if (ins == INS_bextr || ins == INS_bzhi #ifdef TARGET_AMD64 || ins == INS_shrx || ins == INS_shlx || ins == INS_sarx @@ -11739,8 +11964,10 @@ void emitter::emitDispIns( { assert(IsVexOrEvexEncodableInstruction(ins)); assert(IsThreeOperandAVXInstruction(ins)); - printf("%s, ", emitRegName(id->idReg1(), attr)); - printf("%s, ", emitRegName(id->idReg2(), attr)); + + printf("%s", emitRegName(id->idReg1(), attr)); + emitDispEmbMasking(id); + printf(", %s", emitRegName(id->idReg2(), attr)); switch (ins) { @@ -11782,7 +12009,7 @@ void emitter::emitDispIns( } } - printf("%s, ", emitRegName(id->idReg3(), attr)); + printf(", %s, ", emitRegName(id->idReg3(), attr)); val = emitGetInsSC(id); goto PRINT_CONSTANT; break; @@ -11793,18 +12020,17 @@ void emitter::emitDispIns( assert(IsVexOrEvexEncodableInstruction(ins)); assert(UseVEXEncoding()); - printf("%s ", emitRegName(id->idReg1(), attr)); + printf("%s", emitRegName(id->idReg1(), attr)); regNumber op4Reg = id->idReg4(); bool hasMaskReg = isMaskReg(op4Reg); if (hasMaskReg) { - emitDispMask(id, op4Reg, attr); + emitDispMask(id, op4Reg); } - printf("%s, ", emitRegName(id->idReg2(), attr)); - printf("%s", emitRegName(id->idReg3(), attr)); + printf(", %s, %s", emitRegName(id->idReg2(), attr), emitRegName(id->idReg3(), attr)); if (!hasMaskReg) { @@ -11875,8 +12101,9 @@ void emitter::emitDispIns( } } - printf("%s,", emitRegName(id->idReg1(), tgtAttr)); - printf(" %s", emitRegName(id->idReg2(), attr)); + printf("%s", emitRegName(id->idReg1(), tgtAttr)); + emitDispEmbMasking(id); + printf(", %s", emitRegName(id->idReg2(), attr)); val = emitGetInsSC(id); #ifdef TARGET_AMD64 // no 8-byte immediates allowed here! @@ -11898,6 +12125,7 @@ void emitter::emitDispIns( case IF_RWR: case IF_RRW: { + assert(!IsEvexEncodableInstruction(id->idIns())); printf("%s", emitRegName(id->idReg1(), attr)); emitDispShift(ins); break; @@ -11909,6 +12137,7 @@ void emitter::emitDispIns( case IF_RRW_SHF: { printf("%s", emitRegName(id->idReg1(), attr)); + emitDispEmbMasking(id); emitGetInsCns(id, &cnsVal); val = cnsVal.cnsVal; @@ -11953,9 +12182,12 @@ void emitter::emitDispIns( // This logic ensures that we print `crc32 eax, bx` instead of `crc32 ax, bx` attr = EA_4BYTE; } - printf("%s, %s", emitRegName(id->idReg1(), attr), sstr); + printf("%s", emitRegName(id->idReg1(), attr)); + emitDispEmbMasking(id); + printf(", %s", sstr); offs = emitGetInsDsp(id); emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC); + emitDispEmbBroadcastCount(id); break; } @@ -11963,9 +12195,12 @@ void emitter::emitDispIns( case IF_RWR_MRD_CNS: case IF_RRW_MRD_CNS: { - printf("%s, %s", emitRegName(id->idReg1(), attr), sstr); + printf("%s", emitRegName(id->idReg1(), attr)); + emitDispEmbMasking(id); + printf(", %s", sstr); offs = emitGetInsDsp(id); emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC); + emitDispEmbBroadcastCount(id); emitGetInsDcmCns(id, &cnsVal); val = cnsVal.cnsVal; @@ -12017,6 +12252,7 @@ void emitter::emitDispIns( printf(sstr); offs = emitGetInsDsp(id); emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC); + emitDispEmbMasking(id); printf(", %s", emitRegName(id->idReg1(), attr)); emitGetInsDcmCns(id, &cnsVal); @@ -12051,9 +12287,12 @@ void emitter::emitDispIns( case IF_RWR_RRD_MRD_CNS: { - printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr); + printf("%s", emitRegName(id->idReg1(), attr)); + emitDispEmbMasking(id); + printf(", %s, %s", emitRegName(id->idReg2(), attr), sstr); offs = emitGetInsDsp(id); emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC); + emitDispEmbBroadcastCount(id); emitGetInsDcmCns(id, &cnsVal); val = cnsVal.cnsVal; @@ -12072,7 +12311,7 @@ void emitter::emitDispIns( case IF_RWR_RRD_MRD_RRD: { - printf("%s ", emitRegName(id->idReg1(), attr)); + printf("%s", emitRegName(id->idReg1(), attr)); emitGetInsDcmCns(id, &cnsVal); regNumber op3Reg = decodeRegFromIval(cnsVal.cnsVal); @@ -12080,12 +12319,13 @@ void emitter::emitDispIns( if (hasMaskReg) { - emitDispMask(id, op3Reg, attr); + emitDispMask(id, op3Reg); } - printf("%s, %s", emitRegName(id->idReg2(), attr), sstr); + printf(", %s, %s", emitRegName(id->idReg2(), attr), sstr); offs = emitGetInsDsp(id); emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC); + emitDispEmbBroadcastCount(id); if (!hasMaskReg) { @@ -12096,6 +12336,7 @@ void emitter::emitDispIns( case IF_RWR_MRD_OFF: { + assert(!IsEvexEncodableInstruction(id->idIns())); printf("%s, %s", emitRegName(id->idReg1(), attr), "offset"); offs = emitGetInsDsp(id); emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC); @@ -12110,6 +12351,7 @@ void emitter::emitDispIns( printf("%s", sstr); offs = emitGetInsDsp(id); emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC); + emitDispEmbMasking(id); printf(", %s", emitRegName(id->idReg1(), attr)); break; } @@ -12122,6 +12364,7 @@ void emitter::emitDispIns( printf("%s", sstr); offs = emitGetInsDsp(id); emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC); + emitDispEmbMasking(id); emitGetInsDcmCns(id, &cnsVal); val = cnsVal.cnsVal; #ifdef TARGET_AMD64 @@ -12148,6 +12391,7 @@ void emitter::emitDispIns( case IF_MWR: case IF_MRW: { + assert(!IsEvexEncodableInstruction(id->idIns())); printf("%s", sstr); offs = emitGetInsDsp(id); emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC); @@ -12157,6 +12401,7 @@ void emitter::emitDispIns( case IF_MRD_OFF: { + assert(!IsEvexEncodableInstruction(id->idIns())); printf("offset "); offs = emitGetInsDsp(id); emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC); @@ -12167,9 +12412,12 @@ void emitter::emitDispIns( case IF_RWR_MRD_RRD: case IF_RRW_MRD_RRD: { - printf("%s, %s", emitRegName(id->idReg1(), attr), sstr); + printf("%s", emitRegName(id->idReg1(), attr)); + emitDispEmbMasking(id); + printf(", %s, %s", emitRegName(id->idReg2(), attr), sstr); offs = emitGetInsDsp(id); emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC); + emitDispEmbBroadcastCount(id); printf(", %s", emitRegName(id->idReg2(), attr)); break; } @@ -12177,6 +12425,7 @@ void emitter::emitDispIns( case IF_MWR_RRD_RRD: { printf("%s", sstr); + emitDispEmbMasking(id); offs = emitGetInsDsp(id); emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC); printf(", %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr)); @@ -12187,6 +12436,8 @@ void emitter::emitDispIns( case IF_RWR_LABEL: case IF_SWR_LABEL: { + assert(!IsEvexEncodableInstruction(id->idIns())); + if (ins == INS_lea) { printf("%s, ", emitRegName(id->idReg1(), attr)); @@ -12228,6 +12479,7 @@ void emitter::emitDispIns( case IF_METHOD: case IF_METHPTR: { + assert(!IsEvexEncodableInstruction(id->idIns())); methodName = emitComp->eeGetMethodFullName((CORINFO_METHOD_HANDLE)id->idDebugOnlyInfo()->idMemCookie); if (id->idInsFmt() == IF_METHPTR) diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index e32cab66254fe8..f075e1d1e081b7 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -514,7 +514,7 @@ bool isPrefetch(instruction ins) /* Debug-only routines to display instructions */ /************************************************************************/ -void emitDispMask(const instrDesc* id, regNumber reg, emitAttr size) const; +void emitDispMask(const instrDesc* id, regNumber reg) const; void emitDispReloc(ssize_t value); void emitDispAddrMode(instrDesc* id, bool noDetail = false); void emitDispShift(instruction ins, int cnt = 0); @@ -637,19 +637,35 @@ void emitIns_Mov(instruction ins, emitAttr attr, regNumber dstReg, regNumber srg void emitIns_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, insOpts instOptions = INS_OPTS_NONE); -void emitIns_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int ival); +void emitIns_R_R_I( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int ival, insOpts instOptions = INS_OPTS_NONE); void emitIns_AR(instruction ins, emitAttr attr, regNumber base, int offs); -void emitIns_AR_R_R(instruction ins, emitAttr attr, regNumber op2Reg, regNumber op3Reg, regNumber base, int offs); +void emitIns_AR_R_R(instruction ins, + emitAttr attr, + regNumber op2Reg, + regNumber op3Reg, + regNumber base, + int offs, + insOpts instOptions = INS_OPTS_NONE); -void emitIns_R_A(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir); +void emitIns_R_A( + instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir, insOpts instOptions = INS_OPTS_NONE); -void emitIns_R_A_I(instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir, int ival); +void emitIns_R_A_I( + instruction ins, emitAttr attr, regNumber reg1, GenTreeIndir* indir, int ival, insOpts instOptions = INS_OPTS_NONE); -void emitIns_R_C_I(instruction ins, emitAttr attr, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival); +void emitIns_R_C_I(instruction ins, + emitAttr attr, + regNumber reg1, + CORINFO_FIELD_HANDLE fldHnd, + int offs, + int ival, + insOpts instOptions = INS_OPTS_NONE); -void emitIns_R_S_I(instruction ins, emitAttr attr, regNumber reg1, int varx, int offs, int ival); +void emitIns_R_S_I( + instruction ins, emitAttr attr, regNumber reg1, int varx, int offs, int ival, insOpts instOptions = INS_OPTS_NONE); void emitIns_R_R_A(instruction ins, emitAttr attr, @@ -692,8 +708,14 @@ void emitIns_R_R_R(instruction ins, regNumber reg3, insOpts instOptions = INS_OPTS_NONE); -void emitIns_R_R_A_I( - instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, GenTreeIndir* indir, int ival, insFormat fmt); +void emitIns_R_R_A_I(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + GenTreeIndir* indir, + int ival, + insFormat fmt, + insOpts instOptions = INS_OPTS_NONE); void emitIns_R_R_AR_I( instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber base, int offs, int ival); @@ -701,15 +723,39 @@ void emitIns_C_R_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, void emitIns_S_R_I(instruction ins, emitAttr attr, int varNum, int offs, regNumber reg, int ival); void emitIns_A_R_I(instruction ins, emitAttr attr, GenTreeIndir* indir, regNumber reg, int imm); -void emitIns_R_R_C_I( - instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival); - -void emitIns_R_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, int ival); - -void emitIns_R_R_S_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int varx, int offs, int ival); - -void emitIns_R_R_A_R( - instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, GenTreeIndir* indir); +void emitIns_R_R_C_I(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + CORINFO_FIELD_HANDLE fldHnd, + int offs, + int ival, + insOpts instOptions = INS_OPTS_NONE); + +void emitIns_R_R_R_I(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + regNumber reg3, + int ival, + insOpts instOptions = INS_OPTS_NONE); + +void emitIns_R_R_S_I(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + int varx, + int offs, + int ival, + insOpts instOptions = INS_OPTS_NONE); + +void emitIns_R_R_A_R(instruction ins, + emitAttr attr, + regNumber targetReg, + regNumber op1Reg, + regNumber op3Reg, + GenTreeIndir* indir, + insOpts instOptions); void emitIns_R_R_C_R(instruction ins, emitAttr attr, @@ -717,22 +763,41 @@ void emitIns_R_R_C_R(instruction ins, regNumber op1Reg, regNumber op3Reg, CORINFO_FIELD_HANDLE fldHnd, - int offs); - -void emitIns_R_R_S_R( - instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, int varx, int offs); - -void emitIns_R_R_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, regNumber reg4); + int offs, + insOpts instOptions = INS_OPTS_NONE); + +void emitIns_R_R_S_R(instruction ins, + emitAttr attr, + regNumber targetReg, + regNumber op1Reg, + regNumber op3Reg, + int varx, + int offs, + insOpts instOptions = INS_OPTS_NONE); + +void emitIns_R_R_R_R(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + regNumber reg3, + regNumber reg4, + insOpts instOptions = INS_OPTS_NONE); void emitIns_S(instruction ins, emitAttr attr, int varx, int offs); void emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs); -void emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs); +void emitIns_R_S( + instruction ins, emitAttr attr, regNumber ireg, int varx, int offs, insOpts instOptions = INS_OPTS_NONE); void emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, int val); -void emitIns_R_C(instruction ins, emitAttr attr, regNumber reg, CORINFO_FIELD_HANDLE fldHnd, int offs); +void emitIns_R_C(instruction ins, + emitAttr attr, + regNumber reg, + CORINFO_FIELD_HANDLE fldHnd, + int offs, + insOpts instOptions = INS_OPTS_NONE); void emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs); @@ -786,80 +851,109 @@ void emitIns_R_AX(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, void emitIns_AX_R(instruction ins, emitAttr attr, regNumber ireg, regNumber reg, unsigned mul, int disp); -void emitIns_SIMD_R_R_I(instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, int ival); +void emitIns_SIMD_R_R_I( + instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, int ival, insOpts instOptions); -void emitIns_SIMD_R_R_A(instruction ins, - emitAttr attr, - regNumber targetReg, - regNumber op1Reg, - GenTreeIndir* indir, - insOpts instOptions = INS_OPTS_NONE); +void emitIns_SIMD_R_R_A( + instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, GenTreeIndir* indir, insOpts instOptions); void emitIns_SIMD_R_R_C(instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, CORINFO_FIELD_HANDLE fldHnd, int offs, - insOpts instOptions = INS_OPTS_NONE); -void emitIns_SIMD_R_R_R(instruction ins, - emitAttr attr, - regNumber targetReg, - regNumber op1Reg, - regNumber op2Reg, - insOpts instOptions = INS_OPTS_NONE); -void emitIns_SIMD_R_R_S(instruction ins, - emitAttr attr, - regNumber targetReg, - regNumber op1Reg, - int varx, - int offs, - insOpts instOptions = INS_OPTS_NONE); - -void emitIns_SIMD_R_R_A_I( - instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, GenTreeIndir* indir, int ival); + insOpts instOptions); +void emitIns_SIMD_R_R_R( + instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, insOpts instOptions); +void emitIns_SIMD_R_R_S( + instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, int varx, int offs, insOpts instOptions); + +void emitIns_SIMD_R_R_A_I(instruction ins, + emitAttr attr, + regNumber targetReg, + regNumber op1Reg, + GenTreeIndir* indir, + int ival, + insOpts instOptions); void emitIns_SIMD_R_R_C_I(instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, CORINFO_FIELD_HANDLE fldHnd, int offs, - int ival); -void emitIns_SIMD_R_R_R_I( - instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, int ival); -void emitIns_SIMD_R_R_S_I( - instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, int varx, int offs, int ival); + int ival, + insOpts instOptions); +void emitIns_SIMD_R_R_R_I(instruction ins, + emitAttr attr, + regNumber targetReg, + regNumber op1Reg, + regNumber op2Reg, + int ival, + insOpts instOptions); +void emitIns_SIMD_R_R_S_I(instruction ins, + emitAttr attr, + regNumber targetReg, + regNumber op1Reg, + int varx, + int offs, + int ival, + insOpts instOptions); #ifdef FEATURE_HW_INTRINSICS -void emitIns_SIMD_R_R_R_A( - instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, GenTreeIndir* indir); +void emitIns_SIMD_R_R_R_A(instruction ins, + emitAttr attr, + regNumber targetReg, + regNumber op1Reg, + regNumber op2Reg, + GenTreeIndir* indir, + insOpts instOptions); void emitIns_SIMD_R_R_R_C(instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, CORINFO_FIELD_HANDLE fldHnd, - int offs); + int offs, + insOpts instOptions); void emitIns_SIMD_R_R_R_R(instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, regNumber op3Reg, - insOpts instOptions = INS_OPTS_NONE); -void emitIns_SIMD_R_R_R_S( - instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, int varx, int offs); - -void emitIns_SIMD_R_R_A_R( - instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, GenTreeIndir* indir); + insOpts instOptions); +void emitIns_SIMD_R_R_R_S(instruction ins, + emitAttr attr, + regNumber targetReg, + regNumber op1Reg, + regNumber op2Reg, + int varx, + int offs, + insOpts instOptions); + +void emitIns_SIMD_R_R_A_R(instruction ins, + emitAttr attr, + regNumber targetReg, + regNumber op1Reg, + regNumber op2Reg, + GenTreeIndir* indir, + insOpts instOptions); void emitIns_SIMD_R_R_C_R(instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, CORINFO_FIELD_HANDLE fldHnd, - int offs); -void emitIns_SIMD_R_R_S_R( - instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, int varx, int offs); + int offs, + insOpts instOptions); +void emitIns_SIMD_R_R_S_R(instruction ins, + emitAttr attr, + regNumber targetReg, + regNumber op1Reg, + regNumber op2Reg, + int varx, + int offs, + insOpts instOptions); void emitIns_SIMD_R_R_R_A_I(instruction ins, emitAttr attr, @@ -867,7 +961,8 @@ void emitIns_SIMD_R_R_R_A_I(instruction ins, regNumber op1Reg, regNumber op2Reg, GenTreeIndir* indir, - int ival); + int ival, + insOpts instOptions); void emitIns_SIMD_R_R_R_C_I(instruction ins, emitAttr attr, regNumber targetReg, @@ -875,14 +970,16 @@ void emitIns_SIMD_R_R_R_C_I(instruction ins, regNumber op2Reg, CORINFO_FIELD_HANDLE fldHnd, int offs, - int ival); + int ival, + insOpts instOptions); void emitIns_SIMD_R_R_R_R_I(instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, regNumber op3Reg, - int ival); + int ival, + insOpts instOptions); void emitIns_SIMD_R_R_R_S_I(instruction ins, emitAttr attr, regNumber targetReg, @@ -890,7 +987,8 @@ void emitIns_SIMD_R_R_R_S_I(instruction ins, regNumber op2Reg, int varx, int offs, - int ival); + int ival, + insOpts instOptions); #endif // FEATURE_HW_INTRINSICS enum EmitCallType @@ -973,6 +1071,20 @@ inline bool HasEmbeddedBroadcast(const instrDesc* id) const return id->idIsEvexbContextSet(); } +//------------------------------------------------------------------------ +// HasEmbeddedBroadcast: Do we consider embedded broadcast while encoding. +// +// Arguments: +// id - Instruction descriptor. +// +// Returns: +// `true` if the instruction does embedded broadcast. +// +inline bool HasEmbeddedMask(const instrDesc* id) const +{ + return id->idIsEvexAaaContextSet(); +} + inline bool HasHighSIMDReg(const instrDesc* id) const; inline bool HasMaskReg(const instrDesc* id) const; diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index d48fc45868ba22..b71aa661229f42 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -19737,13 +19737,6 @@ bool GenTree::isContainableHWIntrinsic() const return true; } - case NI_Vector128_get_Zero: - case NI_Vector256_get_Zero: - { - // These HWIntrinsic operations are contained as part of Sse41.Insert - return true; - } - case NI_SSE3_MoveAndDuplicate: case NI_AVX_BroadcastScalarToVector128: case NI_AVX2_BroadcastScalarToVector128: @@ -19901,10 +19894,7 @@ bool GenTree::isRMWHWIntrinsic(Compiler* comp) // bool GenTree::isEvexCompatibleHWIntrinsic() const { - // TODO-XARCH-AVX512 remove the ReturnsPerElementMask check once K registers have been properly - // implemented in the register allocator - return OperIsHWIntrinsic() && HWIntrinsicInfo::HasEvexSemantics(AsHWIntrinsic()->GetHWIntrinsicId()) && - !HWIntrinsicInfo::ReturnsPerElementMask(AsHWIntrinsic()->GetHWIntrinsicId()); + return OperIsHWIntrinsic() && HWIntrinsicInfo::HasEvexSemantics(AsHWIntrinsic()->GetHWIntrinsicId()); } //------------------------------------------------------------------------ @@ -19919,10 +19909,25 @@ bool GenTree::isEmbeddedMaskingCompatibleHWIntrinsic() const if (OperIsHWIntrinsic()) { #if defined(TARGET_XARCH) - // TODO-AVX512F-CQ: Expand this to the full set of APIs and make it table driven - // using IsEmbMaskingCompatible. For now, however, limit it to some explicit ids - // for prototyping purposes. - return (AsHWIntrinsic()->GetHWIntrinsicId() == NI_AVX512F_Add); + NamedIntrinsic intrinsicId = AsHWIntrinsic()->GetHWIntrinsicId(); + var_types simdBaseType = AsHWIntrinsic()->GetSimdBaseType(); + + switch (intrinsicId) + { + case NI_AVX512F_ConvertToVector256Int32: + case NI_AVX512F_ConvertToVector256UInt32: + case NI_AVX512F_ConvertToVector512Int32: + case NI_AVX512F_ConvertToVector512UInt32: + case NI_AVX512F_VL_ConvertToVector128UInt32: + { + return varTypeIsFloating(simdBaseType); + } + + default: + { + return HWIntrinsicInfo::IsEmbMaskingCompatible(intrinsicId); + } + } #elif defined(TARGET_ARM64) return HWIntrinsicInfo::IsEmbeddedMaskedOperation(AsHWIntrinsic()->GetHWIntrinsicId()) || HWIntrinsicInfo::IsOptionalEmbeddedMaskedOperation(AsHWIntrinsic()->GetHWIntrinsicId()); @@ -26737,7 +26742,23 @@ bool GenTreeHWIntrinsic::OperIsMemoryStoreOrBarrier() const bool GenTreeHWIntrinsic::OperIsEmbBroadcastCompatible() const { #if defined(TARGET_XARCH) - return HWIntrinsicInfo::IsEmbBroadcastCompatible(GetHWIntrinsicId()); + NamedIntrinsic intrinsicId = GetHWIntrinsicId(); + var_types simdBaseType = GetSimdBaseType(); + + switch (intrinsicId) + { + case NI_AVX512F_ConvertToVector256Int32: + case NI_AVX512F_ConvertToVector256UInt32: + case NI_AVX512F_VL_ConvertToVector128UInt32: + { + return varTypeIsFloating(simdBaseType); + } + + default: + { + return !varTypeIsSmall(simdBaseType) && HWIntrinsicInfo::IsEmbBroadcastCompatible(intrinsicId); + } + } #else return false; #endif // TARGET_XARCH @@ -27056,6 +27077,9 @@ void GenTreeHWIntrinsic::SetHWIntrinsicId(NamedIntrinsic intrinsicId) // We'll choose to trust the programmer here. assert((oldOperandCount == static_cast(newOperandCount)) || newCountUnknown); + + // All ids being set must be valid + assert(!HWIntrinsicInfo::IsInvalidNodeId(intrinsicId)); #endif // DEBUG gtHWIntrinsicId = intrinsicId; diff --git a/src/coreclr/jit/hwintrinsic.h b/src/coreclr/jit/hwintrinsic.h index 82cf179c742f4f..227941fa15f725 100644 --- a/src/coreclr/jit/hwintrinsic.h +++ b/src/coreclr/jit/hwintrinsic.h @@ -82,9 +82,8 @@ enum HWIntrinsicFlag : unsigned int // - should be transformed in the compiler front-end, cannot reach CodeGen HW_Flag_NoCodeGen = 0x2, - // Multi-instruction - // - that one intrinsic can generate multiple instructions - HW_Flag_MultiIns = 0x4, + // The intrinsic is invalid as the ID of a gtNode + HW_Flag_InvalidNodeId = 0x4, // Select base type using the first argument type HW_Flag_BaseTypeFromFirstArg = 0x8, @@ -229,8 +228,8 @@ enum HWIntrinsicFlag : unsigned int // The intrinsic is an embedded rounding compatible intrinsic HW_Flag_EmbRoundingCompatible = 0x10000000, - // The intrinsic is an embedded masking incompatible intrinsic - HW_Flag_EmbMaskingIncompatible = 0x20000000, + // The intrinsic is an embedded masking compatible intrinsic + HW_Flag_EmbMaskingCompatible = 0x20000000, #elif defined(TARGET_ARM64) // The intrinsic has an enum operand. Using this implies HW_Flag_HasImmediateOperand. @@ -632,7 +631,7 @@ struct HWIntrinsicInfo static bool IsEmbMaskingCompatible(NamedIntrinsic id) { HWIntrinsicFlag flags = lookupFlags(id); - return (flags & HW_Flag_EmbMaskingIncompatible) == 0; + return (flags & HW_Flag_EmbMaskingCompatible) != 0; } #endif // TARGET_XARCH @@ -657,13 +656,7 @@ struct HWIntrinsicInfo static bool RequiresCodegen(NamedIntrinsic id) { HWIntrinsicFlag flags = lookupFlags(id); - return (flags & HW_Flag_NoCodeGen) == 0; - } - - static bool GeneratesMultipleIns(NamedIntrinsic id) - { - HWIntrinsicFlag flags = lookupFlags(id); - return (flags & HW_Flag_MultiIns) != 0; + return (flags & (HW_Flag_NoCodeGen | HW_Flag_InvalidNodeId)) == 0; } static bool SupportsContainment(NamedIntrinsic id) @@ -797,7 +790,13 @@ struct HWIntrinsicInfo static bool HasSpecialImport(NamedIntrinsic id) { HWIntrinsicFlag flags = lookupFlags(id); - return (flags & HW_Flag_SpecialImport) != 0; + return (flags & (HW_Flag_SpecialImport | HW_Flag_InvalidNodeId)) != 0; + } + + static bool IsInvalidNodeId(NamedIntrinsic id) + { + HWIntrinsicFlag flags = lookupFlags(id); + return (flags & HW_Flag_InvalidNodeId) != 0; } static bool IsMultiReg(NamedIntrinsic id) @@ -984,7 +983,7 @@ struct HWIntrinsic final { // TODO-Arm64-Cleanup - make more categories to the table-driven framework bool isTableDrivenCategory = category != HW_Category_Helper; - bool isTableDrivenFlag = !HWIntrinsicInfo::GeneratesMultipleIns(id) && !HWIntrinsicInfo::HasSpecialCodegen(id); + bool isTableDrivenFlag = !HWIntrinsicInfo::HasSpecialCodegen(id); return isTableDrivenCategory && isTableDrivenFlag; } diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 7223d7b2203e83..220017b9b8fb01 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -98,7 +98,6 @@ CodeGen::HWIntrinsicImmOpHelper::HWIntrinsicImmOpHelper(CodeGen* codeGen, GenTre // these by // using the same approach as in hwintrinsicxarch.cpp - adding an additional indirection level in form of a // branch table. - assert(!HWIntrinsicInfo::GeneratesMultipleIns(intrin->GetHWIntrinsicId())); branchTargetReg = codeGen->internalRegisters.GetSingle(intrin); } diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp index a4b6748f3db531..8ef9517c344d53 100644 --- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp @@ -74,11 +74,10 @@ static void assertIsContainableHWIntrinsicOp(Lowering* lowering, static bool genIsTableDrivenHWIntrinsic(NamedIntrinsic intrinsicId, HWIntrinsicCategory category) { // TODO - make more categories to the table-driven framework - // HW_Category_Helper and HW_Flag_MultiIns/HW_Flag_SpecialCodeGen usually need manual codegen + // HW_Category_Helper and HW_Flag_SpecialCodeGen usually need manual codegen const bool tableDrivenCategory = (category != HW_Category_Special) && (category != HW_Category_Scalar) && (category != HW_Category_Helper); - const bool tableDrivenFlag = - !HWIntrinsicInfo::GeneratesMultipleIns(intrinsicId) && !HWIntrinsicInfo::HasSpecialCodegen(intrinsicId); + const bool tableDrivenFlag = !HWIntrinsicInfo::HasSpecialCodegen(intrinsicId); return tableDrivenCategory && tableDrivenFlag; } @@ -247,8 +246,11 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) // We don't need to genProduceReg(node) since that will be handled by processing op2 // likewise, processing op2 will ensure its own registers are consumed - // Make sure we consume the registers that are getting specially handled - genConsumeReg(op1); + if (!mergeWithZero) + { + // Make sure we consume the registers that are getting specially handled + genConsumeReg(op1); + } embMaskOp = op3; } } @@ -412,17 +414,17 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) { assert(!op1->isContained()); emit->emitIns_SIMD_R_R_R_I(ins, simdSize, targetReg, op1Reg, op1Reg, - static_cast(ival)); + static_cast(ival), instOptions); } else { - genHWIntrinsic_R_RM_I(node, ins, simdSize, static_cast(ival)); + genHWIntrinsic_R_RM_I(node, ins, simdSize, static_cast(ival), instOptions); } } else if (HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) { assert(!op1->isContained()); - emit->emitIns_SIMD_R_R_R(ins, simdSize, targetReg, op1Reg, op1Reg); + emit->emitIns_SIMD_R_R_R(ins, simdSize, targetReg, op1Reg, op1Reg, instOptions); } else { @@ -472,7 +474,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) if (ival != -1) { assert((ival >= 0) && (ival <= 127)); - genHWIntrinsic_R_R_RM_I(node, ins, simdSize, static_cast(ival)); + genHWIntrinsic_R_R_RM_I(node, ins, simdSize, static_cast(ival), instOptions); } else if (category == HW_Category_MemoryLoad) { @@ -502,11 +504,11 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) if (HWIntrinsicInfo::CopiesUpperBits(intrinsicId)) { assert(!op1->isContained()); - emit->emitIns_SIMD_R_R_R_I(ins, simdSize, targetReg, op1Reg, op1Reg, i); + emit->emitIns_SIMD_R_R_R_I(ins, simdSize, targetReg, op1Reg, op1Reg, i, instOptions); } else { - genHWIntrinsic_R_RM_I(node, ins, simdSize, i); + genHWIntrinsic_R_RM_I(node, ins, simdSize, i, instOptions); } }; @@ -531,7 +533,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) } else if (node->TypeGet() == TYP_VOID) { - genHWIntrinsic_R_RM(node, ins, simdSize, op1Reg, op2); + genHWIntrinsic_R_RM(node, ins, simdSize, op1Reg, op2, instOptions); } else { @@ -560,7 +562,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) if (HWIntrinsicInfo::isImmOp(intrinsicId, op3)) { auto emitSwCase = [&](int8_t i) { - genHWIntrinsic_R_R_RM_I(node, ins, simdSize, i); + genHWIntrinsic_R_R_RM_I(node, ins, simdSize, i, instOptions); }; if (op3->IsCnsIntOrI()) @@ -581,11 +583,13 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) } else if (category == HW_Category_MemoryStore) { + assert(instOptions == INS_OPTS_NONE); + // The Mask instructions do not currently support containment of the address. assert(!op2->isContained()); if (intrinsicId == NI_AVX_MaskStore || intrinsicId == NI_AVX2_MaskStore) { - emit->emitIns_AR_R_R(ins, simdSize, op2Reg, op3Reg, op1Reg, 0); + emit->emitIns_AR_R_R(ins, simdSize, op2Reg, op3Reg, op1Reg, 0, instOptions); } else { @@ -595,7 +599,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) // SSE2 MaskMove hardcodes the destination (op3) in DI/EDI/RDI emit->emitIns_Mov(INS_mov, EA_PTRSIZE, REG_EDI, op3Reg, /* canSkip */ true); - emit->emitIns_R_R(ins, simdSize, op1Reg, op2Reg); + emit->emitIns_R_R(ins, simdSize, op1Reg, op2Reg, instOptions); } } else @@ -607,7 +611,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) case NI_AVX2_BlendVariable: case NI_AVX512F_BlendVariableMask: { - genHWIntrinsic_R_R_RM_R(node, ins, simdSize); + genHWIntrinsic_R_R_RM_R(node, ins, simdSize, instOptions); break; } @@ -618,7 +622,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) assert(op1Reg != REG_NA); assert(op2Reg != REG_NA); - genHWIntrinsic_R_R_R_RM(ins, simdSize, targetReg, op1Reg, op2Reg, op3); + genHWIntrinsic_R_R_R_RM(ins, simdSize, targetReg, op1Reg, op2Reg, op3, instOptions); break; } @@ -656,7 +660,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) if (HWIntrinsicInfo::isImmOp(intrinsicId, op4)) { auto emitSwCase = [&](int8_t i) { - genHWIntrinsic_R_R_R_RM_I(node, ins, simdSize, i); + genHWIntrinsic_R_R_R_RM_I(node, ins, simdSize, i, instOptions); }; if (op4->IsCnsIntOrI()) @@ -706,28 +710,45 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) case InstructionSet_Vector128: case InstructionSet_Vector256: case InstructionSet_Vector512: - genBaseIntrinsic(node); + { + genBaseIntrinsic(node, instOptions); break; + } + case InstructionSet_X86Base: case InstructionSet_X86Base_X64: - genX86BaseIntrinsic(node); + { + genX86BaseIntrinsic(node, instOptions); break; + } + case InstructionSet_SSE: case InstructionSet_SSE_X64: + { genSSEIntrinsic(node, instOptions); break; + } case InstructionSet_SSE2: case InstructionSet_SSE2_X64: + { genSSE2Intrinsic(node, instOptions); break; + } + case InstructionSet_SSE41: case InstructionSet_SSE41_X64: - genSSE41Intrinsic(node); + { + genSSE41Intrinsic(node, instOptions); break; + } + case InstructionSet_SSE42: case InstructionSet_SSE42_X64: - genSSE42Intrinsic(node); + { + genSSE42Intrinsic(node, instOptions); break; + } + case InstructionSet_AVX: case InstructionSet_AVX2: case InstructionSet_AVX512F: @@ -737,35 +758,49 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) case InstructionSet_AVX512BW_VL: case InstructionSet_AVX512VBMI: case InstructionSet_AVX512VBMI_VL: + { genAvxFamilyIntrinsic(node, instOptions); break; - case InstructionSet_AES: - genAESIntrinsic(node); - break; + } + case InstructionSet_BMI1: case InstructionSet_BMI1_X64: case InstructionSet_BMI2: case InstructionSet_BMI2_X64: + { genBMI1OrBMI2Intrinsic(node, instOptions); break; + } + case InstructionSet_FMA: + { genFMAIntrinsic(node, instOptions); break; + } + case InstructionSet_LZCNT: case InstructionSet_LZCNT_X64: + { + assert(instOptions == INS_OPTS_NONE); genLZCNTIntrinsic(node); break; - case InstructionSet_PCLMULQDQ: - genPCLMULQDQIntrinsic(node); - break; + } + case InstructionSet_POPCNT: case InstructionSet_POPCNT_X64: + { + assert(instOptions == INS_OPTS_NONE); genPOPCNTIntrinsic(node); break; + } + case InstructionSet_X86Serialize: case InstructionSet_X86Serialize_X64: + { + assert(instOptions == INS_OPTS_NONE); genX86SerializeIntrinsic(node); break; + } default: unreached(); @@ -787,7 +822,13 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) void CodeGen::genHWIntrinsic_R_RM( GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, regNumber reg, GenTree* rmOp, insOpts instOptions) { - emitter* emit = GetEmitter(); + emitter* emit = GetEmitter(); + + if (CodeGenInterface::IsEmbeddedBroadcastEnabled(ins, rmOp)) + { + instOptions = AddEmbBroadcastMode(instOptions); + } + OperandDesc rmOpDesc = genOperandDesc(rmOp); if (((instOptions & INS_OPTS_EVEX_b_MASK) != 0) && (rmOpDesc.GetKind() == OperandKind::Reg)) @@ -809,11 +850,11 @@ void CodeGen::genHWIntrinsic_R_RM( switch (rmOpDesc.GetKind()) { case OperandKind::ClsVar: - emit->emitIns_R_C(ins, attr, reg, rmOpDesc.GetFieldHnd(), 0); + emit->emitIns_R_C(ins, attr, reg, rmOpDesc.GetFieldHnd(), 0, instOptions); break; case OperandKind::Local: - emit->emitIns_R_S(ins, attr, reg, rmOpDesc.GetVarNum(), rmOpDesc.GetLclOffset()); + emit->emitIns_R_S(ins, attr, reg, rmOpDesc.GetVarNum(), rmOpDesc.GetLclOffset(), instOptions); break; case OperandKind::Indir: @@ -822,7 +863,7 @@ void CodeGen::genHWIntrinsic_R_RM( // temporary GT_IND to generate code with. GenTreeIndir indirForm; GenTreeIndir* indir = rmOpDesc.GetIndirForm(&indirForm); - emit->emitIns_R_A(ins, attr, reg, indir); + emit->emitIns_R_A(ins, attr, reg, indir, instOptions); } break; @@ -832,6 +873,7 @@ void CodeGen::genHWIntrinsic_R_RM( if (emit->IsMovInstruction(ins)) { + assert(instOptions == INS_OPTS_NONE); emit->emitIns_Mov(ins, attr, reg, rmOpReg, /* canSkip */ false); } else @@ -936,7 +978,7 @@ void CodeGen::genHWIntrinsic_R_RM( } } - emit->emitIns_R_R(ins, attr, reg, rmOpReg); + emit->emitIns_R_R(ins, attr, reg, rmOpReg, instOptions); } break; } @@ -955,7 +997,8 @@ void CodeGen::genHWIntrinsic_R_RM( // ins - The instruction being generated // ival - The immediate value // -void CodeGen::genHWIntrinsic_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, emitAttr simdSize, int8_t ival) +void CodeGen::genHWIntrinsic_R_RM_I( + GenTreeHWIntrinsic* node, instruction ins, emitAttr simdSize, int8_t ival, insOpts instOptions) { regNumber targetReg = node->GetRegNum(); GenTree* op1 = node->Op(1); @@ -971,7 +1014,7 @@ void CodeGen::genHWIntrinsic_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, e assert(HWIntrinsicInfo::SupportsContainment(node->GetHWIntrinsicId())); assertIsContainableHWIntrinsicOp(compiler->m_pLowering, node, op1); } - inst_RV_TT_IV(ins, simdSize, targetReg, op1, ival); + inst_RV_TT_IV(ins, simdSize, targetReg, op1, ival, instOptions); } //------------------------------------------------------------------------ @@ -1013,7 +1056,8 @@ void CodeGen::genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins, e // ins - The instruction being generated // ival - The immediate value // -void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, emitAttr simdSize, int8_t ival) +void CodeGen::genHWIntrinsic_R_R_RM_I( + GenTreeHWIntrinsic* node, instruction ins, emitAttr simdSize, int8_t ival, insOpts instOptions) { regNumber targetReg = node->GetRegNum(); GenTree* op1 = node->Op(1); @@ -1042,7 +1086,7 @@ void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, // insertps can also contain op2 when it is zero in which case // we just reuse op1Reg since ival specifies the entry to zero - GetEmitter()->emitIns_SIMD_R_R_R_I(ins, simdSize, targetReg, op1Reg, op1Reg, ival); + GetEmitter()->emitIns_SIMD_R_R_R_I(ins, simdSize, targetReg, op1Reg, op1Reg, ival, instOptions); return; } } @@ -1050,7 +1094,7 @@ void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, assert(op1Reg != REG_NA); bool isRMW = node->isRMWHWIntrinsic(compiler); - inst_RV_RV_TT_IV(ins, simdSize, targetReg, op1Reg, op2, ival, isRMW); + inst_RV_RV_TT_IV(ins, simdSize, targetReg, op1Reg, op2, ival, isRMW, instOptions); } //------------------------------------------------------------------------ @@ -1061,7 +1105,7 @@ void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, // node - The hardware intrinsic node // ins - The instruction being generated // -void CodeGen::genHWIntrinsic_R_R_RM_R(GenTreeHWIntrinsic* node, instruction ins, emitAttr simdSize) +void CodeGen::genHWIntrinsic_R_R_RM_R(GenTreeHWIntrinsic* node, instruction ins, emitAttr simdSize, insOpts instOptions) { regNumber targetReg = node->GetRegNum(); GenTree* op1 = node->Op(1); @@ -1076,6 +1120,11 @@ void CodeGen::genHWIntrinsic_R_R_RM_R(GenTreeHWIntrinsic* node, instruction ins, assert(op1Reg != REG_NA); assert(op3Reg != REG_NA); + if (CodeGenInterface::IsEmbeddedBroadcastEnabled(ins, op2)) + { + instOptions = AddEmbBroadcastMode(instOptions); + } + OperandDesc op2Desc = genOperandDesc(op2); if (op2Desc.IsContained()) @@ -1087,12 +1136,12 @@ void CodeGen::genHWIntrinsic_R_R_RM_R(GenTreeHWIntrinsic* node, instruction ins, switch (op2Desc.GetKind()) { case OperandKind::ClsVar: - emit->emitIns_SIMD_R_R_C_R(ins, simdSize, targetReg, op1Reg, op3Reg, op2Desc.GetFieldHnd(), 0); + emit->emitIns_SIMD_R_R_C_R(ins, simdSize, targetReg, op1Reg, op3Reg, op2Desc.GetFieldHnd(), 0, instOptions); break; case OperandKind::Local: emit->emitIns_SIMD_R_R_S_R(ins, simdSize, targetReg, op1Reg, op3Reg, op2Desc.GetVarNum(), - op2Desc.GetLclOffset()); + op2Desc.GetLclOffset(), instOptions); break; case OperandKind::Indir: @@ -1101,12 +1150,12 @@ void CodeGen::genHWIntrinsic_R_R_RM_R(GenTreeHWIntrinsic* node, instruction ins, // temporary GT_IND to generate code with. GenTreeIndir indirForm; GenTreeIndir* indir = op2Desc.GetIndirForm(&indirForm); - emit->emitIns_SIMD_R_R_A_R(ins, simdSize, targetReg, op1Reg, op3Reg, indir); + emit->emitIns_SIMD_R_R_A_R(ins, simdSize, targetReg, op1Reg, op3Reg, indir, instOptions); } break; case OperandKind::Reg: - emit->emitIns_SIMD_R_R_R_R(ins, simdSize, targetReg, op1Reg, op2Desc.GetReg(), op3Reg); + emit->emitIns_SIMD_R_R_R_R(ins, simdSize, targetReg, op1Reg, op2Desc.GetReg(), op3Reg, instOptions); break; default: @@ -1139,7 +1188,13 @@ void CodeGen::genHWIntrinsic_R_R_R_RM(instruction ins, assert(op1Reg != REG_NA); assert(op2Reg != REG_NA); - emitter* emit = GetEmitter(); + emitter* emit = GetEmitter(); + + if (CodeGenInterface::IsEmbeddedBroadcastEnabled(ins, op3)) + { + instOptions = AddEmbBroadcastMode(instOptions); + } + OperandDesc op3Desc = genOperandDesc(op3); if (((instOptions & INS_OPTS_EVEX_b_MASK) != 0) && (op3Desc.GetKind() == OperandKind::Reg)) @@ -1155,12 +1210,12 @@ void CodeGen::genHWIntrinsic_R_R_R_RM(instruction ins, switch (op3Desc.GetKind()) { case OperandKind::ClsVar: - emit->emitIns_SIMD_R_R_R_C(ins, attr, targetReg, op1Reg, op2Reg, op3Desc.GetFieldHnd(), 0); + emit->emitIns_SIMD_R_R_R_C(ins, attr, targetReg, op1Reg, op2Reg, op3Desc.GetFieldHnd(), 0, instOptions); break; case OperandKind::Local: emit->emitIns_SIMD_R_R_R_S(ins, attr, targetReg, op1Reg, op2Reg, op3Desc.GetVarNum(), - op3Desc.GetLclOffset()); + op3Desc.GetLclOffset(), instOptions); break; case OperandKind::Indir: @@ -1169,12 +1224,12 @@ void CodeGen::genHWIntrinsic_R_R_R_RM(instruction ins, // temporary GT_IND to generate code with. GenTreeIndir indirForm; GenTreeIndir* indir = op3Desc.GetIndirForm(&indirForm); - emit->emitIns_SIMD_R_R_R_A(ins, attr, targetReg, op1Reg, op2Reg, indir); + emit->emitIns_SIMD_R_R_R_A(ins, attr, targetReg, op1Reg, op2Reg, indir, instOptions); } break; case OperandKind::Reg: - emit->emitIns_SIMD_R_R_R_R(ins, attr, targetReg, op1Reg, op2Reg, op3Desc.GetReg()); + emit->emitIns_SIMD_R_R_R_R(ins, attr, targetReg, op1Reg, op2Reg, op3Desc.GetReg(), instOptions); break; default: @@ -1191,7 +1246,8 @@ void CodeGen::genHWIntrinsic_R_R_R_RM(instruction ins, // ins - The instruction being generated // ival - The immediate value // -void CodeGen::genHWIntrinsic_R_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, int8_t ival) +void CodeGen::genHWIntrinsic_R_R_R_RM_I( + GenTreeHWIntrinsic* node, instruction ins, emitAttr attr, int8_t ival, insOpts instOptions) { regNumber targetReg = node->GetRegNum(); GenTree* op1 = node->Op(1); @@ -1236,21 +1292,28 @@ void CodeGen::genHWIntrinsic_R_R_R_RM_I(GenTreeHWIntrinsic* node, instruction in assert(op1Reg != REG_NA); assert(op2Reg != REG_NA); - emitter* emit = GetEmitter(); + emitter* emit = GetEmitter(); + + if (CodeGenInterface::IsEmbeddedBroadcastEnabled(ins, op3)) + { + instOptions = AddEmbBroadcastMode(instOptions); + } + OperandDesc op3Desc = genOperandDesc(op3); switch (op3Desc.GetKind()) { case OperandKind::ClsVar: { - emit->emitIns_SIMD_R_R_R_C_I(ins, attr, targetReg, op1Reg, op2Reg, op3Desc.GetFieldHnd(), 0, ival); + emit->emitIns_SIMD_R_R_R_C_I(ins, attr, targetReg, op1Reg, op2Reg, op3Desc.GetFieldHnd(), 0, ival, + instOptions); break; } case OperandKind::Local: { emit->emitIns_SIMD_R_R_R_S_I(ins, attr, targetReg, op1Reg, op2Reg, op3Desc.GetVarNum(), - op3Desc.GetLclOffset(), ival); + op3Desc.GetLclOffset(), ival, instOptions); break; } @@ -1260,13 +1323,13 @@ void CodeGen::genHWIntrinsic_R_R_R_RM_I(GenTreeHWIntrinsic* node, instruction in // temporary GT_IND to generate code with. GenTreeIndir indirForm; GenTreeIndir* indir = op3Desc.GetIndirForm(&indirForm); - emit->emitIns_SIMD_R_R_R_A_I(ins, attr, targetReg, op1Reg, op2Reg, indir, ival); + emit->emitIns_SIMD_R_R_R_A_I(ins, attr, targetReg, op1Reg, op2Reg, indir, ival, instOptions); } break; case OperandKind::Reg: { - emit->emitIns_SIMD_R_R_R_R_I(ins, attr, targetReg, op1Reg, op2Reg, op3Desc.GetReg(), ival); + emit->emitIns_SIMD_R_R_R_R_I(ins, attr, targetReg, op1Reg, op2Reg, op3Desc.GetReg(), ival, instOptions); break; } @@ -1461,7 +1524,7 @@ void CodeGen::genNonTableDrivenHWIntrinsicsJumpTableFallback(GenTreeHWIntrinsic* // Note: // We currently assume that all base intrinsics have zero or one operand. // -void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node) +void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) { NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); regNumber targetReg = node->GetRegNum(); @@ -1489,7 +1552,7 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node) { if (varTypeIsIntegral(baseType)) { - genHWIntrinsic_R_RM(node, ins, emitActualTypeSize(baseType), targetReg, op1); + genHWIntrinsic_R_RM(node, ins, emitActualTypeSize(baseType), targetReg, op1, instOptions); } else { @@ -1499,10 +1562,11 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node) if (op1->isContained() || op1->isUsedFromSpillTemp()) { - genHWIntrinsic_R_RM(node, ins, attr, targetReg, op1); + genHWIntrinsic_R_RM(node, ins, attr, targetReg, op1, instOptions); } else { + assert(instOptions == INS_OPTS_NONE); // Just use movaps for reg->reg moves as it has zero-latency on modern CPUs emit->emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); } @@ -1514,6 +1578,8 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node) case NI_Vector256_GetElement: case NI_Vector512_GetElement: { + assert(instOptions == INS_OPTS_NONE); + if (simdType == TYP_SIMD12) { // op1 of TYP_SIMD12 should be considered as TYP_SIMD16 @@ -1603,25 +1669,25 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node) else { emit->emitIns_SIMD_R_R_R_I(INS_shufps, attr, targetReg, op1Reg, op1Reg, - static_cast(0x55)); + static_cast(0x55), instOptions); } } else if (ival == 2) { - emit->emitIns_SIMD_R_R_R(INS_unpckhps, attr, targetReg, op1Reg, op1Reg); + emit->emitIns_SIMD_R_R_R(INS_unpckhps, attr, targetReg, op1Reg, op1Reg, instOptions); } else { assert(ival == 3); emit->emitIns_SIMD_R_R_R_I(INS_shufps, attr, targetReg, op1Reg, op1Reg, - static_cast(0xFF)); + static_cast(0xFF), instOptions); } } else { assert(baseType == TYP_DOUBLE); assert(ival == 1); - emit->emitIns_SIMD_R_R_R(INS_unpckhpd, attr, targetReg, op1Reg, op1Reg); + emit->emitIns_SIMD_R_R_R(INS_unpckhpd, attr, targetReg, op1Reg, op1Reg, instOptions); } } else @@ -1679,11 +1745,12 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node) ins = ins_Move_Extend(baseType, false); attr = emitTypeSize(baseType); } - genHWIntrinsic_R_RM(node, ins, attr, targetReg, op1); + genHWIntrinsic_R_RM(node, ins, attr, targetReg, op1, instOptions); } else { assert(varTypeIsFloating(baseType)); + assert(instOptions == INS_OPTS_NONE); // Just use movaps for reg->reg moves as it has zero-latency on modern CPUs emit->emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true); @@ -1710,10 +1777,12 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node) if (op1->isContained() || op1->isUsedFromSpillTemp()) { - genHWIntrinsic_R_RM(node, ins, attr, targetReg, op1); + genHWIntrinsic_R_RM(node, ins, attr, targetReg, op1, instOptions); } else { + assert(instOptions == INS_OPTS_NONE); + // Just use movaps for reg->reg moves as it has zero-latency on modern CPUs emit->emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ false); } @@ -1740,10 +1809,12 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node) { attr = emitTypeSize(TYP_SIMD16); } - genHWIntrinsic_R_RM(node, ins, attr, targetReg, op1); + genHWIntrinsic_R_RM(node, ins, attr, targetReg, op1, instOptions); } else { + assert(instOptions == INS_OPTS_NONE); + // We want to always emit the EA_32BYTE version here. // // For ToVector256Unsafe the upper bits don't matter and this allows same @@ -1780,7 +1851,7 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node) // Arguments: // node - The hardware intrinsic node // -void CodeGen::genX86BaseIntrinsic(GenTreeHWIntrinsic* node) +void CodeGen::genX86BaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) { NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); @@ -1798,7 +1869,7 @@ void CodeGen::genX86BaseIntrinsic(GenTreeHWIntrinsic* node) var_types targetType = node->TypeGet(); instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, targetType); - genHWIntrinsic_R_RM(node, ins, emitTypeSize(targetType), targetReg, op1); + genHWIntrinsic_R_RM(node, ins, emitTypeSize(targetType), targetReg, op1, instOptions); break; } @@ -1813,6 +1884,7 @@ void CodeGen::genX86BaseIntrinsic(GenTreeHWIntrinsic* node) case NI_X86Base_X64_DivRem: { assert(node->GetOperandCount() == 3); + assert(instOptions == INS_OPTS_NONE); // SIMD base type is from signature and can distinguish signed and unsigned var_types targetType = node->GetSimdBaseType(); @@ -1878,7 +1950,7 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) { assert(targetType == TYP_LONG); instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); - genHWIntrinsic_R_RM(node, ins, EA_8BYTE, targetReg, node->Op(1)); + genHWIntrinsic_R_RM(node, ins, EA_8BYTE, targetReg, node->Op(1), instOptions); break; } @@ -1896,6 +1968,7 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) case NI_SSE_PrefetchNonTemporal: { assert(baseType == TYP_UBYTE); + assert(instOptions == INS_OPTS_NONE); // These do not support containment. assert(!node->Op(1)->isContained()); @@ -1951,7 +2024,7 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) { assert(baseType == TYP_LONG || baseType == TYP_ULONG); instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); - genHWIntrinsic_R_RM(node, ins, emitTypeSize(baseType), targetReg, node->Op(1)); + genHWIntrinsic_R_RM(node, ins, emitTypeSize(baseType), targetReg, node->Op(1), instOptions); break; } @@ -1975,7 +2048,7 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) } instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); - genHWIntrinsic_R_RM(node, ins, attr, targetReg, node->Op(1)); + genHWIntrinsic_R_RM(node, ins, attr, targetReg, node->Op(1), instOptions); break; } @@ -2017,7 +2090,7 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) // Arguments: // node - The hardware intrinsic node // -void CodeGen::genSSE41Intrinsic(GenTreeHWIntrinsic* node) +void CodeGen::genSSE41Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) { NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); GenTree* op1 = node->Op(1); @@ -2045,7 +2118,7 @@ void CodeGen::genSSE41Intrinsic(GenTreeHWIntrinsic* node) } else { - genHWIntrinsic_R_RM(node, ins, EA_16BYTE, targetReg, op1); + genHWIntrinsic_R_RM(node, ins, EA_16BYTE, targetReg, op1, instOptions); } break; } @@ -2060,7 +2133,7 @@ void CodeGen::genSSE41Intrinsic(GenTreeHWIntrinsic* node) emitAttr attr = emitActualTypeSize(node->TypeGet()); auto emitSwCase = [&](int8_t i) { - inst_RV_TT_IV(ins, attr, targetReg, op1, i); + inst_RV_TT_IV(ins, attr, targetReg, op1, i, instOptions); }; if (op2->IsCnsIntOrI()) @@ -2095,7 +2168,7 @@ void CodeGen::genSSE41Intrinsic(GenTreeHWIntrinsic* node) // Arguments: // node - The hardware intrinsic node // -void CodeGen::genSSE42Intrinsic(GenTreeHWIntrinsic* node) +void CodeGen::genSSE42Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) { NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); regNumber targetReg = node->GetRegNum(); @@ -2117,18 +2190,20 @@ void CodeGen::genSSE42Intrinsic(GenTreeHWIntrinsic* node) case NI_SSE42_Crc32: case NI_SSE42_X64_Crc32: { + assert(instOptions == INS_OPTS_NONE); + assert((op2->GetRegNum() != targetReg) || (op1Reg == targetReg)); emit->emitIns_Mov(INS_mov, emitTypeSize(targetType), targetReg, op1Reg, /* canSkip */ true); if ((baseType == TYP_UBYTE) || (baseType == TYP_USHORT)) // baseType is the type of the second argument { assert(targetType == TYP_INT); - genHWIntrinsic_R_RM(node, INS_crc32, emitTypeSize(baseType), targetReg, op2); + genHWIntrinsic_R_RM(node, INS_crc32, emitTypeSize(baseType), targetReg, op2, instOptions); } else { assert((targetType == TYP_INT) || (targetType == TYP_LONG)); - genHWIntrinsic_R_RM(node, INS_crc32, emitTypeSize(targetType), targetReg, op2); + genHWIntrinsic_R_RM(node, INS_crc32, emitTypeSize(targetType), targetReg, op2, instOptions); } break; @@ -2163,7 +2238,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption if (HWIntrinsicInfo::IsPermuteVar2x(intrinsicId)) { - genPermuteVar2x(node); + genPermuteVar2x(node, instOptions); return; } @@ -2184,6 +2259,8 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption case NI_AVX2_ConvertToInt32: case NI_AVX2_ConvertToUInt32: { + assert(instOptions == INS_OPTS_NONE); + op1Reg = op1->GetRegNum(); assert((baseType == TYP_INT) || (baseType == TYP_UINT)); instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType); @@ -2206,7 +2283,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption } else { - genHWIntrinsic_R_RM(node, ins, EA_32BYTE, targetReg, op1); + genHWIntrinsic_R_RM(node, ins, EA_32BYTE, targetReg, op1, instOptions); } break; } @@ -2216,6 +2293,8 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption case NI_AVX2_GatherMaskVector128: case NI_AVX2_GatherMaskVector256: { + assert(instOptions == INS_OPTS_NONE); + GenTree* op2 = node->Op(2); GenTree* op3 = node->Op(3); GenTree* lastOp = nullptr; @@ -2258,7 +2337,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption // generate all-one mask vector assert(!emitter::isHighSimdReg(targetReg)); - emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, attr, maskReg, maskReg, maskReg); + emit->emitIns_SIMD_R_R_R(INS_pcmpeqd, attr, maskReg, maskReg, maskReg, instOptions); } bool isVector128GatherWithVector256Index = (targetType == TYP_SIMD16) && (indexOp->TypeGet() == TYP_SIMD32); @@ -2309,6 +2388,8 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption case NI_AVX512F_AddMask: { + assert(instOptions == INS_OPTS_NONE); + uint32_t simdSize = node->GetSimdSize(); uint32_t count = simdSize / genTypeSize(baseType); @@ -2347,6 +2428,8 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption case NI_AVX512F_AndMask: { + assert(instOptions == INS_OPTS_NONE); + uint32_t simdSize = node->GetSimdSize(); uint32_t count = simdSize / genTypeSize(baseType); @@ -2385,6 +2468,8 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption case NI_AVX512F_AndNotMask: { + assert(instOptions == INS_OPTS_NONE); + uint32_t simdSize = node->GetSimdSize(); uint32_t count = simdSize / genTypeSize(baseType); @@ -2423,6 +2508,8 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption case NI_AVX512F_MoveMask: { + assert(instOptions == INS_OPTS_NONE); + uint32_t simdSize = node->GetSimdSize(); uint32_t count = simdSize / genTypeSize(baseType); @@ -2458,6 +2545,8 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption case NI_AVX512F_KORTEST: { + assert(instOptions == INS_OPTS_NONE); + uint32_t simdSize = node->GetSimdSize(); uint32_t count = simdSize / genTypeSize(baseType); @@ -2498,6 +2587,8 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption case NI_AVX512F_KTEST: { + assert(instOptions == INS_OPTS_NONE); + uint32_t simdSize = node->GetSimdSize(); uint32_t count = simdSize / genTypeSize(baseType); @@ -2534,6 +2625,8 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption case NI_AVX512F_NotMask: { + assert(instOptions == INS_OPTS_NONE); + uint32_t simdSize = node->GetSimdSize(); uint32_t count = simdSize / genTypeSize(baseType); @@ -2567,6 +2660,8 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption case NI_AVX512F_OrMask: { + assert(instOptions == INS_OPTS_NONE); + uint32_t simdSize = node->GetSimdSize(); uint32_t count = simdSize / genTypeSize(baseType); @@ -2605,6 +2700,8 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption case NI_AVX512F_ShiftLeftMask: { + assert(instOptions == INS_OPTS_NONE); + uint32_t simdSize = node->GetSimdSize(); uint32_t count = simdSize / genTypeSize(baseType); @@ -2644,6 +2741,8 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption case NI_AVX512F_ShiftRightMask: { + assert(instOptions == INS_OPTS_NONE); + uint32_t simdSize = node->GetSimdSize(); uint32_t count = simdSize / genTypeSize(baseType); @@ -2683,6 +2782,8 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption case NI_AVX512F_XorMask: { + assert(instOptions == INS_OPTS_NONE); + uint32_t simdSize = node->GetSimdSize(); uint32_t count = simdSize / genTypeSize(baseType); @@ -2787,7 +2888,7 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption // is passed in as the RM register and op1 is passed as the R register op1Reg = op1->GetRegNum(); - emit->emitIns_R_R(ins, attr, op1Reg, targetReg); + emit->emitIns_R_R(ins, attr, op1Reg, targetReg, instOptions); break; } @@ -2808,17 +2909,6 @@ void CodeGen::genAvxFamilyIntrinsic(GenTreeHWIntrinsic* node, insOpts instOption genProduceReg(node); } -//------------------------------------------------------------------------ -// genAESIntrinsic: Generates the code for an AES hardware intrinsic node -// -// Arguments: -// node - The hardware intrinsic node -// -void CodeGen::genAESIntrinsic(GenTreeHWIntrinsic* node) -{ - NYI("Implement AES intrinsic code generation"); -} - //------------------------------------------------------------------------ // genBMI1OrBMI2Intrinsic: Generates the code for a BMI1 and BMI2 hardware intrinsic node // @@ -2864,7 +2954,7 @@ void CodeGen::genBMI1OrBMI2Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptio case NI_BMI1_X64_ResetLowestSetBit: { assert((targetType == TYP_INT) || (targetType == TYP_LONG)); - genHWIntrinsic_R_RM(node, ins, emitTypeSize(node->TypeGet()), targetReg, node->Op(1)); + genHWIntrinsic_R_RM(node, ins, emitTypeSize(node->TypeGet()), targetReg, node->Op(1), instOptions); break; } @@ -2879,6 +2969,9 @@ void CodeGen::genBMI1OrBMI2Intrinsic(GenTreeHWIntrinsic* node, insOpts instOptio case NI_BMI2_MultiplyNoFlags: case NI_BMI2_X64_MultiplyNoFlags: { + + assert(instOptions == INS_OPTS_NONE); + size_t numArgs = node->GetOperandCount(); assert(numArgs == 2 || numArgs == 3); @@ -3061,7 +3154,7 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions) // Arguments: // node - The hardware intrinsic node // -void CodeGen::genPermuteVar2x(GenTreeHWIntrinsic* node) +void CodeGen::genPermuteVar2x(GenTreeHWIntrinsic* node, insOpts instOptions) { NamedIntrinsic intrinsicId = node->GetHWIntrinsicId(); assert(HWIntrinsicInfo::IsPermuteVar2x(intrinsicId)); @@ -3144,7 +3237,7 @@ void CodeGen::genPermuteVar2x(GenTreeHWIntrinsic* node) } assert(ins != INS_invalid); - genHWIntrinsic_R_R_R_RM(ins, attr, targetReg, emitOp1->GetRegNum(), emitOp2->GetRegNum(), emitOp3); + genHWIntrinsic_R_R_R_RM(ins, attr, targetReg, emitOp1->GetRegNum(), emitOp2->GetRegNum(), emitOp3, instOptions); genProduceReg(node); } @@ -3164,17 +3257,6 @@ void CodeGen::genLZCNTIntrinsic(GenTreeHWIntrinsic* node) genProduceReg(node); } -//------------------------------------------------------------------------ -// genPCLMULQDQIntrinsic: Generates the code for a PCLMULQDQ hardware intrinsic node -// -// Arguments: -// node - The hardware intrinsic node -// -void CodeGen::genPCLMULQDQIntrinsic(GenTreeHWIntrinsic* node) -{ - NYI("Implement PCLMULQDQ intrinsic code generation"); -} - //------------------------------------------------------------------------ // genPOPCNTIntrinsic: Generates the code for a POPCNT hardware intrinsic node // @@ -3233,7 +3315,7 @@ void CodeGen::genXCNTIntrinsic(GenTreeHWIntrinsic* node, instruction ins) { GetEmitter()->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg); } - genHWIntrinsic_R_RM(node, ins, emitTypeSize(node->TypeGet()), targetReg, op1); + genHWIntrinsic_R_RM(node, ins, emitTypeSize(node->TypeGet()), targetReg, op1, INS_OPTS_NONE); } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/hwintrinsiclistarm64.h b/src/coreclr/jit/hwintrinsiclistarm64.h index 06feb2816de5ce..057b2cf0c8023c 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64.h +++ b/src/coreclr/jit/hwintrinsiclistarm64.h @@ -15,107 +15,107 @@ // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Vector64 Intrinsics -HARDWARE_INTRINSIC(Vector64, Abs, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, Add, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, AndNot, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, As, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector64, AsByte, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector64, AsDouble, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector64, AsInt16, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector64, AsInt32, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector64, AsInt64, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector64, AsNInt, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector64, AsNUInt, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector64, AsSByte, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector64, AsSingle, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector64, AsUInt16, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector64, AsUInt32, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector64, AsUInt64, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector64, BitwiseAnd, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, BitwiseOr, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, Ceiling, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, ConditionalSelect, 8, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, ConvertToDouble, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, ConvertToInt32, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, ConvertToInt32Native, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, ConvertToInt64, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, ConvertToInt64Native, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, ConvertToSingle, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, ConvertToUInt32, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, ConvertToUInt32Native, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, ConvertToUInt64, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, ConvertToUInt64Native, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, Abs, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, Add, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, AndNot, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, As, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, AsByte, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, AsDouble, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, AsInt16, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, AsInt32, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, AsInt64, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, AsNInt, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, AsNUInt, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, AsSByte, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, AsSingle, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, AsUInt16, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, AsUInt32, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, AsUInt64, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, BitwiseAnd, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, BitwiseOr, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, Ceiling, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, ConditionalSelect, 8, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, ConvertToDouble, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, ConvertToInt32, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, ConvertToInt32Native, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, ConvertToInt64, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, ConvertToInt64Native, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, ConvertToSingle, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, ConvertToUInt32, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, ConvertToUInt32Native, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, ConvertToUInt64, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, ConvertToUInt64Native, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector64, Create, 8, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector64, CreateScalar, 8, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector64, CreateScalarUnsafe, 8, 1, true, {INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_invalid, INS_invalid, INS_fmov, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) -HARDWARE_INTRINSIC(Vector64, CreateSequence, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, Divide, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, Dot, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, Equals, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, EqualsAll, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, EqualsAny, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, ExtractMostSignificantBits, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, Floor, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, get_AllBitsSet, 8, 0, true, {INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni}, HW_Category_Helper, HW_Flag_NoCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector64, get_Indices, 8, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector64, get_One, 8, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector64, get_Zero, 8, 0, true, {INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi}, HW_Category_Helper, HW_Flag_NoCodeGen|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Vector64, CreateSequence, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, Divide, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, Dot, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, Equals, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, EqualsAll, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, EqualsAny, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, ExtractMostSignificantBits, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, Floor, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, get_AllBitsSet, 8, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, get_Indices, 8, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, get_One, 8, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, get_Zero, 8, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, GetElement, 8, 2, true, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, GreaterThan, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, GreaterThanAll, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, GreaterThanAny, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, GreaterThanOrEqual, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, GreaterThanOrEqualAll, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, GreaterThanOrEqualAny, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, LessThan, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, LessThanAll, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, LessThanAny, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, LessThanOrEqual, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, LessThanOrEqualAll, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, LessThanOrEqualAny, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, Load, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, LoadAligned, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, LoadAlignedNonTemporal, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, LoadUnsafe, 8, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, Max, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, Min, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, Multiply, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, Narrow, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, Negate, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, OnesComplement, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, op_Addition, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, op_BitwiseAnd, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative) -HARDWARE_INTRINSIC(Vector64, op_BitwiseOr, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative) -HARDWARE_INTRINSIC(Vector64, op_Division, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector64, GreaterThan, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, GreaterThanAll, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, GreaterThanAny, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, GreaterThanOrEqual, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, GreaterThanOrEqualAll, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, GreaterThanOrEqualAny, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, LessThan, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, LessThanAll, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, LessThanAny, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, LessThanOrEqual, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, LessThanOrEqualAll, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, LessThanOrEqualAny, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, Load, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, LoadAligned, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, LoadAlignedNonTemporal, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, LoadUnsafe, 8, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, Max, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, Min, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, Multiply, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, Narrow, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, Negate, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, OnesComplement, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, op_Addition, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, op_BitwiseAnd, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_Commutative) +HARDWARE_INTRINSIC(Vector64, op_BitwiseOr, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_Commutative) +HARDWARE_INTRINSIC(Vector64, op_Division, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, op_Equality, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector64, op_ExclusiveOr, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector64, op_ExclusiveOr, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector64, op_Inequality, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector64, op_LeftShift, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, op_Multiply, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, op_OnesComplement, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, op_RightShift, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, op_Subtraction, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, op_UnaryNegation, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, op_UnaryPlus, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, op_UnsignedRightShift, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, ShiftLeft, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, ShiftRightArithmetic, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, ShiftRightLogical, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, Shuffle, 8, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, Sqrt, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, Store, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, StoreAligned, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, StoreAlignedNonTemporal, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, StoreUnsafe, 8, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, Subtract, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector64, Sum, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector64, op_LeftShift, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, op_Multiply, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, op_OnesComplement, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, op_RightShift, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, op_Subtraction, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, op_UnaryNegation, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, op_UnaryPlus, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, op_UnsignedRightShift, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, ShiftLeft, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, ShiftRightArithmetic, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, ShiftRightLogical, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, Shuffle, 8, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, Sqrt, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, Store, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, StoreAligned, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, StoreAlignedNonTemporal, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, StoreUnsafe, 8, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, Subtract, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector64, Sum, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector64, ToScalar, 8, 1, true, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector64, ToVector128, 8, 1, true, {INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector64, ToVector128Unsafe, 8, 1, true, {INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov}, HW_Category_SIMD, HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Vector64, WidenLower, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector64, WidenUpper, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, WidenLower, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector64, WidenUpper, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector64, WithElement, 8, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector64, Xor, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector64, Xor, 8, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags @@ -123,114 +123,114 @@ HARDWARE_INTRINSIC(Vector64, Xor, // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Vector128 Intrinsics -HARDWARE_INTRINSIC(Vector128, Abs, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, Add, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, AndNot, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, As, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector128, AsByte, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector128, AsDouble, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector128, AsInt16, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector128, AsInt32, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector128, AsInt64, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector128, AsNInt, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector128, AsNUInt, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector128, AsSByte, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector128, AsSingle, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector128, AsUInt16, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector128, AsUInt32, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector128, AsUInt64, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector128, AsVector, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector128, AsVector2, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Vector128, Abs, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, Add, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, AndNot, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, As, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsByte, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsDouble, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsInt16, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsInt32, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsInt64, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsNInt, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsNUInt, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsSByte, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsSingle, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsUInt16, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsUInt32, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsUInt64, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsVector, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsVector2, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, AsVector3, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector128, AsVector4, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector128, AsVector128, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector128, BitwiseAnd, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, BitwiseOr, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, Ceiling, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, ConditionalSelect, 16, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, ConvertToDouble, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToInt32, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToInt32Native, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToInt64, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToInt64Native, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToSingle, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToUInt32, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToUInt32Native, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToUInt64, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToUInt64Native, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsVector4, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsVector128, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, BitwiseAnd, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, BitwiseOr, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, Ceiling, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, ConditionalSelect, 16, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, ConvertToDouble, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, ConvertToInt32, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, ConvertToInt32Native, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, ConvertToInt64, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, ConvertToInt64Native, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, ConvertToSingle, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, ConvertToUInt32, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, ConvertToUInt32Native, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, ConvertToUInt64, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, ConvertToUInt64Native, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, Create, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, CreateScalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, CreateScalarUnsafe, 16, 1, true, {INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_fmov, INS_fmov}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment) -HARDWARE_INTRINSIC(Vector128, CreateSequence, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, Divide, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, Dot, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, Equals, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, EqualsAll, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, EqualsAny, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, ExtractMostSignificantBits, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, Floor, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, get_AllBitsSet, 16, 0, true, {INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni, INS_mvni}, HW_Category_Helper, HW_Flag_NoCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector128, get_Indices, 16, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector128, get_One, 16, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoCodeGen|HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(Vector128, get_Zero, 16, 0, true, {INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi, INS_movi}, HW_Category_Helper, HW_Flag_NoCodeGen|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Vector128, CreateSequence, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, Divide, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, Dot, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, Equals, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, EqualsAll, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, EqualsAny, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, ExtractMostSignificantBits, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, Floor, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, get_AllBitsSet, 16, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, get_Indices, 16, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, get_One, 16, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, get_Zero, 16, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, GetElement, 16, 2, true, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, GetLower, 16, 1, true, {INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov, INS_mov}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector128, GetUpper, 16, 1, true, {INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext, INS_ext}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Vector128, GreaterThan, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, GreaterThanAll, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, GreaterThanAny, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, GreaterThanOrEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, GreaterThanOrEqualAll, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, GreaterThanOrEqualAny, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, LessThan, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, LessThanAll, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, LessThanAny, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, LessThanOrEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, LessThanOrEqualAll, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, LessThanOrEqualAny, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, Load, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, LoadAligned, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, LoadAlignedNonTemporal, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, LoadUnsafe, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, Max, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, Min, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, Multiply, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, Narrow, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, Negate, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, OnesComplement, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, op_Addition, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, op_BitwiseAnd, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative) -HARDWARE_INTRINSIC(Vector128, op_BitwiseOr, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative) -HARDWARE_INTRINSIC(Vector128, op_Division, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector128, GreaterThan, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, GreaterThanAll, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, GreaterThanAny, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, GreaterThanOrEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, GreaterThanOrEqualAll, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, GreaterThanOrEqualAny, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, LessThan, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, LessThanAll, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, LessThanAny, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, LessThanOrEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, LessThanOrEqualAll, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, LessThanOrEqualAny, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, Load, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, LoadAligned, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, LoadAlignedNonTemporal, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, LoadUnsafe, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, Max, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, Min, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, Multiply, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, Narrow, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, Negate, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, OnesComplement, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, op_Addition, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, op_BitwiseAnd, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_Commutative) +HARDWARE_INTRINSIC(Vector128, op_BitwiseOr, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_Commutative) +HARDWARE_INTRINSIC(Vector128, op_Division, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, op_Equality, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector128, op_ExclusiveOr, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector128, op_ExclusiveOr, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, op_Inequality, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector128, op_LeftShift, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, op_RightShift, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, op_UnsignedRightShift, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, op_Multiply, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, op_OnesComplement, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, op_Subtraction, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, op_UnaryNegation, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, op_UnaryPlus, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, ShiftLeft, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, ShiftRightArithmetic, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, ShiftRightLogical, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, Shuffle, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, Sqrt, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, Store, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, StoreAligned, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, StoreAlignedNonTemporal, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, StoreUnsafe, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, Subtract, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, Sum, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector128, op_LeftShift, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, op_RightShift, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, op_UnsignedRightShift, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, op_Multiply, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, op_OnesComplement, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, op_Subtraction, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, op_UnaryNegation, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, op_UnaryPlus, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, ShiftLeft, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, ShiftRightArithmetic, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, ShiftRightLogical, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, Shuffle, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, Sqrt, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, Store, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, StoreAligned, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, StoreAlignedNonTemporal, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, StoreUnsafe, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, Subtract, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, Sum, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, ToScalar, 16, 1, true, {INS_smov, INS_umov, INS_smov, INS_umov, INS_smov, INS_umov, INS_umov, INS_umov, INS_dup, INS_dup}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Vector128, WidenLower, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, WidenUpper, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, WidenLower, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, WidenUpper, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, WithElement, 16, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport) HARDWARE_INTRINSIC(Vector128, WithLower, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, WithUpper, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, Xor, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector128, Xor, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags @@ -336,8 +336,8 @@ HARDWARE_INTRINSIC(AdvSimd, LoadAndReplicateToVector128, HARDWARE_INTRINSIC(AdvSimd, LoadAndReplicateToVector64x2, 8, 1, true, {INS_ld2r, INS_ld2r, INS_ld2r, INS_ld2r, INS_ld2r, INS_ld2r, INS_invalid, INS_invalid, INS_ld2r, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) HARDWARE_INTRINSIC(AdvSimd, LoadAndReplicateToVector64x3, 8, 1, true, {INS_ld3r, INS_ld3r, INS_ld3r, INS_ld3r, INS_ld3r, INS_ld3r, INS_invalid, INS_invalid, INS_ld3r, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) HARDWARE_INTRINSIC(AdvSimd, LoadAndReplicateToVector64x4, 8, 1, true, {INS_ld4r, INS_ld4r, INS_ld4r, INS_ld4r, INS_ld4r, INS_ld4r, INS_invalid, INS_invalid, INS_ld4r, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) -HARDWARE_INTRINSIC(AdvSimd, LoadVector64, 8, 1, true, {INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(AdvSimd, LoadVector128, 16, 1, true, {INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1, INS_ld1}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(AdvSimd, LoadVector64, 8, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AdvSimd, LoadVector128, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(AdvSimd, LoadVector64x2AndUnzip, 8, 1, true, {INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_ld2, INS_invalid, INS_invalid, INS_ld2, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) HARDWARE_INTRINSIC(AdvSimd, LoadVector64x3AndUnzip, 8, 1, true, {INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_ld3, INS_invalid, INS_invalid, INS_ld3, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) HARDWARE_INTRINSIC(AdvSimd, LoadVector64x4AndUnzip, 8, 1, true, {INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_ld4, INS_invalid, INS_invalid, INS_ld4, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialImport|HW_Flag_MultiReg|HW_Flag_NeedsConsecutiveRegisters) @@ -489,7 +489,7 @@ HARDWARE_INTRINSIC(AdvSimd, ShiftRightLogicalScalar, HARDWARE_INTRINSIC(AdvSimd, SignExtendWideningLower, 8, 1, true, {INS_sxtl, INS_invalid, INS_sxtl, INS_invalid, INS_sxtl, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AdvSimd, SignExtendWideningUpper, 16, 1, true, {INS_sxtl2, INS_invalid, INS_sxtl2, INS_invalid, INS_sxtl2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AdvSimd, SqrtScalar, 8, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_fsqrt, INS_fsqrt}, HW_Category_SIMD, HW_Flag_SIMDScalar) -HARDWARE_INTRINSIC(AdvSimd, Store, -1, 2, true, {INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(AdvSimd, Store, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) HARDWARE_INTRINSIC(AdvSimd, StoreSelectedScalar, 8, 3, true, {INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1, INS_st1}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport) HARDWARE_INTRINSIC(AdvSimd, StoreSelectedScalarVector64x2, 8, 3, true, {INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_st2, INS_invalid, INS_invalid, INS_st2, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters) HARDWARE_INTRINSIC(AdvSimd, StoreSelectedScalarVector64x3, 8, 3, true, {INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_st3, INS_invalid, INS_invalid, INS_st3, INS_invalid}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NeedsConsecutiveRegisters) diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index d3f7c575a86667..a726cd624dcfd5 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -29,337 +29,337 @@ // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Vector128 Intrinsics -HARDWARE_INTRINSIC(Vector128, Abs, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, Add, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, AndNot, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, As, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, AsByte, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, AsDouble, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, AsInt16, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, AsInt32, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, AsInt64, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, AsNInt, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, AsNUInt, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, AsSByte, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, AsSingle, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, AsUInt16, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, AsUInt32, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, AsUInt64, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, AsVector, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector128, Abs, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, Add, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, AndNot, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, As, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsByte, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsDouble, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsInt16, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsInt32, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsInt64, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsNInt, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsNUInt, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsSByte, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsSingle, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsUInt16, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsUInt32, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsUInt64, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsVector, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, AsVector2, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsd_simd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector128, AsVector3, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movups, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Vector128, AsVector4, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, AsVector128, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, BitwiseAnd, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, BitwiseOr, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, Ceiling, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector128, AsVector4, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, AsVector128, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, BitwiseAnd, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, BitwiseOr, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, Ceiling, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, ConditionalSelect, 16, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, ConvertToDouble, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToInt32, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToInt32Native, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToInt64, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToInt64Native, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToSingle, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToUInt32, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToUInt32Native, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToUInt64, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, ConvertToUInt64Native, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, ConvertToDouble, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, ConvertToInt32, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, ConvertToInt32Native, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, ConvertToInt64, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, ConvertToInt64Native, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, ConvertToSingle, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, ConvertToUInt32, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, ConvertToUInt32Native, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, ConvertToUInt64, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, ConvertToUInt64Native, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, Create, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, CreateScalar, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector128, CreateScalarUnsafe, 16, 1, true, {INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Vector128, CreateSequence, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, Divide, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, Dot, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, Equals, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Vector128, EqualsAll, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, EqualsAny, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, ExtractMostSignificantBits, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, Floor, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, get_AllBitsSet, 16, 0, true, {INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_cmpps, INS_cmpps}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(Vector128, get_Indices, 16, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, get_One, 16, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, get_Zero, 16, 0, false, {INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Vector128, CreateSequence, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, Divide, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, Dot, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, Equals, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, EqualsAll, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, EqualsAny, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, ExtractMostSignificantBits, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, Floor, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, get_AllBitsSet, 16, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, get_Indices, 16, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, get_One, 16, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, get_Zero, 16, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, GetElement, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_extractps, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, GreaterThan, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Vector128, GreaterThanAll, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, GreaterThanAny, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, GreaterThanOrEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Vector128, GreaterThanOrEqualAll, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, GreaterThanOrEqualAny, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, LessThan, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Vector128, LessThanAll, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, LessThanAny, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, LessThanOrEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Vector128, LessThanOrEqualAll, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, LessThanOrEqualAny, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, Load, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, LoadAligned, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, LoadAlignedNonTemporal, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, LoadUnsafe, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, Max, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, Min, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, Multiply, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, Narrow, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, Negate, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, OnesComplement, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, op_Addition, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, op_BitwiseAnd, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative) -HARDWARE_INTRINSIC(Vector128, op_BitwiseOr, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative) -HARDWARE_INTRINSIC(Vector128, op_Division, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector128, GreaterThan, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, GreaterThanAll, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, GreaterThanAny, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, GreaterThanOrEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, GreaterThanOrEqualAll, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, GreaterThanOrEqualAny, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, LessThan, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, LessThanAll, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, LessThanAny, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, LessThanOrEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, LessThanOrEqualAll, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, LessThanOrEqualAny, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, Load, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, LoadAligned, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, LoadAlignedNonTemporal, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, LoadUnsafe, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, Max, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, Min, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, Multiply, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, Narrow, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, Negate, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, OnesComplement, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, op_Addition, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, op_BitwiseAnd, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, op_BitwiseOr, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, op_Division, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, op_Equality, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector128, op_ExclusiveOr, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector128, op_ExclusiveOr, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector128, op_Inequality, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector128, op_LeftShift, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, op_Multiply, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, op_OnesComplement, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, op_RightShift, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, op_Subtraction, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, op_UnaryNegation, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, op_UnaryPlus, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, op_UnsignedRightShift, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, ShiftLeft, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, ShiftRightArithmetic, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, ShiftRightLogical, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, Shuffle, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, Sqrt, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, Store, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, StoreAligned, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, StoreAlignedNonTemporal, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, StoreUnsafe, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, Subtract, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector128, Sum, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector128, op_LeftShift, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, op_Multiply, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, op_OnesComplement, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, op_RightShift, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, op_Subtraction, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, op_UnaryNegation, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, op_UnaryPlus, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, op_UnsignedRightShift, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, ShiftLeft, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, ShiftRightArithmetic, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, ShiftRightLogical, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, Shuffle, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, Sqrt, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, Store, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, StoreAligned, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, StoreAlignedNonTemporal, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, StoreUnsafe, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, Subtract, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector128, Sum, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, ToScalar, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector128, ToVector256, 16, 1, true, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector128, ToVector256Unsafe, 16, 1, true, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(Vector128, ToVector512, 16, 1, true, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(Vector128, WidenLower, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, WidenUpper, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, WidenLower, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector128, WidenUpper, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector128, WithElement, 16, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector128, Xor, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector128, Xor, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Vector256 Intrinsics -HARDWARE_INTRINSIC(Vector256, Abs, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, Add, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, AndNot, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, As, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsByte, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsDouble, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsInt16, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsInt32, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsInt64, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsNInt, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsNUInt, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsSByte, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsSingle, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsUInt16, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsUInt32, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsUInt64, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsVector, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, AsVector256, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, BitwiseAnd, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, BitwiseOr, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, Ceiling, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, Abs, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, Add, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, AndNot, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, As, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, AsByte, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, AsDouble, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, AsInt16, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, AsInt32, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, AsInt64, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, AsNInt, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, AsNUInt, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, AsSByte, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, AsSingle, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, AsUInt16, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, AsUInt32, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, AsUInt64, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, AsVector, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, AsVector256, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, BitwiseAnd, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, BitwiseOr, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, Ceiling, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, ConditionalSelect, 32, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, ConvertToDouble, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, ConvertToInt32, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, ConvertToInt32Native, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, ConvertToInt64, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, ConvertToInt64Native, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, ConvertToSingle, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, ConvertToUInt32, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, ConvertToUInt32Native, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, ConvertToUInt64, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, ConvertToUInt64Native, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, ConvertToDouble, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, ConvertToInt32, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, ConvertToInt32Native, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, ConvertToInt64, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, ConvertToInt64Native, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, ConvertToSingle, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, ConvertToUInt32, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, ConvertToUInt32Native, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, ConvertToUInt64, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, ConvertToUInt64Native, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector256, Create, 32, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, CreateScalar, 32, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, CreateScalarUnsafe, 32, 1, true, {INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, CreateSequence, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, Divide, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, Dot, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, Equals, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Vector256, EqualsAll, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, EqualsAny, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, ExtractMostSignificantBits, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, Floor, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, get_AllBitsSet, 32, 0, true, {INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_cmpps, INS_cmpps}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_AvxOnlyCompatible|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(Vector256, get_Indices, 32, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, get_One, 32, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, get_Zero, 32, 0, false, {INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, CreateSequence, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, Divide, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, Dot, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, Equals, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, EqualsAll, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, EqualsAny, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, ExtractMostSignificantBits, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, Floor, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, get_AllBitsSet, 32, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, get_Indices, 32, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, get_One, 32, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, get_Zero, 32, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, GetElement, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, GetLower, 32, 1, true, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, GetUpper, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, GreaterThan, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Vector256, GreaterThanAll, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, GreaterThanAny, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, GreaterThanOrEqual, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Vector256, GreaterThanOrEqualAll, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, GreaterThanOrEqualAny, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, LessThan, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Vector256, LessThanAll, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, LessThanAny, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, LessThanOrEqual, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Vector256, LessThanOrEqualAll, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, LessThanOrEqualAny, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, Load, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, LoadAligned, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, LoadAlignedNonTemporal, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, LoadUnsafe, 32, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, Max, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, Min, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, Multiply, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, Narrow, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, Negate, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, OnesComplement, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, op_Addition, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, op_BitwiseAnd, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, op_BitwiseOr, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, op_Division, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector256, GreaterThan, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, GreaterThanAll, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, GreaterThanAny, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, GreaterThanOrEqual, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, GreaterThanOrEqualAny, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, GreaterThanOrEqualAll, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, LessThan, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, LessThanAll, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, LessThanAny, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, LessThanOrEqual, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, LessThanOrEqualAny, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, LessThanOrEqualAll, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, Load, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, LoadAligned, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, LoadAlignedNonTemporal, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, LoadUnsafe, 32, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, Max, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, Min, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, Multiply, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, Narrow, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, Negate, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, OnesComplement, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, op_Addition, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, op_BitwiseAnd, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, op_BitwiseOr, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, op_Division, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector256, op_Equality, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector256, op_ExclusiveOr, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, op_ExclusiveOr, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, op_Inequality, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector256, op_LeftShift, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, op_Multiply, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, op_OnesComplement, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, op_RightShift, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, op_Subtraction, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, op_UnaryNegation, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, op_UnaryPlus, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, op_UnsignedRightShift, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, ShiftLeft, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, ShiftRightArithmetic, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, ShiftRightLogical, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, Shuffle, 32, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, Sqrt, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, Store, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, StoreAligned, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, StoreAlignedNonTemporal, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, StoreUnsafe, 32, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, Subtract, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector256, Sum, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector256, op_LeftShift, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, op_Multiply, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, op_OnesComplement, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, op_RightShift, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, op_Subtraction, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, op_UnaryNegation, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, op_UnaryPlus, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, op_UnsignedRightShift, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, ShiftLeft, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, ShiftRightArithmetic, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, ShiftRightLogical, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, Shuffle, 32, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, Sqrt, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, Store, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, StoreAligned, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, StoreAlignedNonTemporal, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, StoreUnsafe, 32, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, Subtract, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector256, Sum, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector256, ToScalar, 32, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, ToVector512, 32, 1, true, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector256, ToVector512Unsafe, 32, 1, true, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, WidenLower, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector256, WidenUpper, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, WidenLower, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector256, WidenUpper, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector256, WithElement, 32, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, WithLower, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) HARDWARE_INTRINSIC(Vector256, WithUpper, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) -HARDWARE_INTRINSIC(Vector256, Xor, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible) +HARDWARE_INTRINSIC(Vector256, Xor, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // Vector512 Intrinsics -HARDWARE_INTRINSIC(Vector512, Abs, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, Add, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, AndNot, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, As, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, AsByte, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, AsDouble, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, AsInt16, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, AsInt32, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, AsInt64, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, AsNInt, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, AsNUInt, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, AsSByte, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, AsSingle, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, AsUInt16, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, AsUInt32, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, AsUInt64, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, AsVector, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, AsVector512, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, BitwiseAnd, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector512, Abs, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, Add, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, AndNot, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, As, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, AsByte, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, AsDouble, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, AsInt16, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, AsInt32, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, AsInt64, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, AsNInt, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, AsNUInt, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, AsSByte, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, AsSingle, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, AsUInt16, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, AsUInt32, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, AsUInt64, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, AsVector, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, AsVector512, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, BitwiseAnd, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, ConditionalSelect, 64, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, BitwiseOr, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, Ceiling, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector512, BitwiseOr, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, Ceiling, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, Create, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector512, CreateScalar, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector512, CreateScalarUnsafe, 64, 1, true, {INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Vector512, CreateSequence, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, ConvertToDouble, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, ConvertToInt32, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, ConvertToInt32Native, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, ConvertToInt64, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, ConvertToInt64Native, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, ConvertToSingle, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, ConvertToUInt32, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, ConvertToUInt32Native, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, ConvertToUInt64, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, ConvertToUInt64Native, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, Divide, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, Equals, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, EqualsAll, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, EqualsAny, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, ExtractMostSignificantBits, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, Floor, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, get_AllBitsSet, 64, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Vector512, get_Indices, 64, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, get_One, 64, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, get_Zero, 64, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Vector512, CreateSequence, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, ConvertToDouble, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, ConvertToInt32, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, ConvertToInt32Native, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, ConvertToInt64, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, ConvertToInt64Native, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, ConvertToSingle, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, ConvertToUInt32, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, ConvertToUInt32Native, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, ConvertToUInt64, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, ConvertToUInt64Native, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, Divide, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, Equals, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, EqualsAll, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, EqualsAny, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, ExtractMostSignificantBits, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, Floor, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, get_AllBitsSet, 64, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, get_Indices, 64, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, get_One, 64, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, get_Zero, 64, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, GetElement, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector512, GetLower, 64, 1, true, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Vector512, GetLower128, 64, 1, true, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector512, GetUpper, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, GreaterThan, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Vector512, GreaterThanAll, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, GreaterThanAny, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, GreaterThanOrEqual, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Vector512, GreaterThanOrEqualAll, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, GreaterThanOrEqualAny, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, LessThan, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Vector512, LessThanAll, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, LessThanAny, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, LessThanOrEqual, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Vector512, LessThanOrEqualAll, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, LessThanOrEqualAny, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, Load, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, LoadAligned, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, LoadAlignedNonTemporal, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, LoadUnsafe, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, Max, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, Min, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, Multiply, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, Narrow, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, Negate, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, OnesComplement, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, op_Addition, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, op_BitwiseAnd, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative) -HARDWARE_INTRINSIC(Vector512, op_BitwiseOr, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative) -HARDWARE_INTRINSIC(Vector512, op_Division, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector512, GreaterThan, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, GreaterThanAll, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, GreaterThanAny, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, GreaterThanOrEqual, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, GreaterThanOrEqualAll, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, GreaterThanOrEqualAny, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, LessThan, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, LessThanAll, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, LessThanAny, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, LessThanOrEqual, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, LessThanOrEqualAll, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, LessThanOrEqualAny, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, Load, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, LoadAligned, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, LoadAlignedNonTemporal, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, LoadUnsafe, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, Max, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, Min, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, Multiply, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, Narrow, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, Negate, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, OnesComplement, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, op_Addition, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, op_BitwiseAnd, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, op_BitwiseOr, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, op_Division, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, op_Equality, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector512, op_ExclusiveOr, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector512, op_ExclusiveOr, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(Vector512, op_Inequality, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(Vector512, op_LeftShift, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, op_Multiply, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, op_OnesComplement, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, op_RightShift, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, op_Subtraction, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, op_UnaryNegation, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, op_UnaryPlus, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, op_UnsignedRightShift, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, ShiftLeft, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, ShiftRightArithmetic, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, ShiftRightLogical, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, Shuffle, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, Sqrt, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, Store, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, StoreAligned, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, StoreAlignedNonTemporal, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, StoreUnsafe, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, Subtract, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, Sum, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector512, op_LeftShift, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, op_Multiply, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, op_OnesComplement, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, op_RightShift, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, op_Subtraction, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, op_UnaryNegation, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, op_UnaryPlus, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, op_UnsignedRightShift, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, ShiftLeft, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, ShiftRightArithmetic, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, ShiftRightLogical, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, Shuffle, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, Sqrt, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, Store, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, StoreAligned, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, StoreAlignedNonTemporal, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, StoreUnsafe, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, Subtract, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(Vector512, Sum, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector512, ToScalar, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, WidenLower, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(Vector512, WidenUpper, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, WidenLower, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(Vector512, WidenUpper, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector512, WithElement, 64, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(Vector512, WithLower, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) HARDWARE_INTRINSIC(Vector512, WithUpper, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(Vector512, Xor, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(Vector512, Xor, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags @@ -385,10 +385,10 @@ HARDWARE_INTRINSIC(X86Base_X64, DivRem, // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // SSE Intrinsics -HARDWARE_INTRINSIC(SSE, Add, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(SSE, Add, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(SSE, AddScalar, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE, And, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(SSE, AndNot, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andnps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(SSE, And, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(SSE, AndNot, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andnps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(SSE, CompareEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSE, CompareScalarOrderedEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE, CompareScalarEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) @@ -409,7 +409,7 @@ HARDWARE_INTRINSIC(SSE, CompareLessThanOrEqual, HARDWARE_INTRINSIC(SSE, CompareScalarOrderedLessThanOrEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE, CompareScalarLessThanOrEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSE, CompareScalarUnorderedLessThanOrEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, CompareNotEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(SSE, CompareNotEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSE, CompareScalarOrderedNotEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE, CompareScalarNotEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSE, CompareScalarUnorderedNotEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomiss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) @@ -428,24 +428,24 @@ HARDWARE_INTRINSIC(SSE, CompareScalarUnordered, HARDWARE_INTRINSIC(SSE, ConvertToInt32, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE, ConvertScalarToVector128Single, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss32, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE, ConvertToInt32WithTruncation, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttss2si32, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, Divide, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(SSE, Divide, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(SSE, DivideScalar, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE, LoadAlignedVector128, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movaps, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE, LoadHigh, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhps, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE, LoadLow, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlps, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE, LoadScalarVector128, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE, LoadVector128, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movups, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(SSE, Max, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(SSE, LoadVector128, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(SSE, Max, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(SSE, MaxScalar, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE, Min, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(SSE, Min, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(SSE, MinScalar, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE, MoveHighToLow, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhlps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment) HARDWARE_INTRINSIC(SSE, MoveLowToHigh, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlhps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment) HARDWARE_INTRINSIC(SSE, MoveMask, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movmskps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSE, MoveScalar, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoContainment) -HARDWARE_INTRINSIC(SSE, Multiply, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(SSE, Multiply, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(SSE, MultiplyScalar, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE, Or, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_orps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(SSE, Or, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_orps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(SSE, Prefetch0, 0, 1, false, {INS_invalid, INS_prefetcht0, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) HARDWARE_INTRINSIC(SSE, Prefetch1, 0, 1, false, {INS_invalid, INS_prefetcht1, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) HARDWARE_INTRINSIC(SSE, Prefetch2, 0, 1, false, {INS_invalid, INS_prefetcht2, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Other) @@ -454,21 +454,21 @@ HARDWARE_INTRINSIC(SSE, Reciprocal, HARDWARE_INTRINSIC(SSE, ReciprocalScalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rcpss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSE, ReciprocalSqrt, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rsqrtps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSE, ReciprocalSqrtScalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rsqrtss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE, Shuffle, 16, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_shufps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(SSE, Sqrt, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE, Shuffle, 16, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_shufps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(SSE, Sqrt, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE, SqrtScalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE, Store, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movups, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(SSE, Store, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) HARDWARE_INTRINSIC(SSE, StoreAligned, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movaps, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) HARDWARE_INTRINSIC(SSE, StoreAlignedNonTemporal, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movntps, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) HARDWARE_INTRINSIC(SSE, StoreFence, 0, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Barrier) HARDWARE_INTRINSIC(SSE, StoreHigh, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhps, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) HARDWARE_INTRINSIC(SSE, StoreLow, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlps, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) HARDWARE_INTRINSIC(SSE, StoreScalar, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movss, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(SSE, Subtract, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(SSE, Subtract, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(SSE, SubtractScalar, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE, UnpackHigh, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_unpckhps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(SSE, UnpackLow, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_unpcklps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(SSE, Xor, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_xorps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(SSE, UnpackHigh, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_unpckhps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(SSE, UnpackLow, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_unpcklps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(SSE, Xor, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_xorps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags @@ -484,12 +484,12 @@ HARDWARE_INTRINSIC(SSE_X64, ConvertScalarToVector128Single, // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // SSE2 Intrinsics -HARDWARE_INTRINSIC(SSE2, Add, 16, 2, true, {INS_paddb, INS_paddb, INS_paddw, INS_paddw, INS_paddd, INS_paddd, INS_paddq, INS_paddq, INS_invalid, INS_addpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(SSE2, AddSaturate, 16, 2, true, {INS_paddsb, INS_paddusb, INS_paddsw, INS_paddusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(SSE2, Add, 16, 2, true, {INS_paddb, INS_paddb, INS_paddw, INS_paddw, INS_paddd, INS_paddd, INS_paddq, INS_paddq, INS_invalid, INS_addpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(SSE2, AddSaturate, 16, 2, true, {INS_paddsb, INS_paddusb, INS_paddsw, INS_paddusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(SSE2, AddScalar, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE2, And, 16, 2, true, {INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_invalid, INS_andpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(SSE2, AndNot, 16, 2, true, {INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_invalid, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(SSE2, Average, 16, 2, true, {INS_invalid, INS_pavgb, INS_invalid, INS_pavgw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(SSE2, And, 16, 2, true, {INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_invalid, INS_andpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(SSE2, AndNot, 16, 2, true, {INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_invalid, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(SSE2, Average, 16, 2, true, {INS_invalid, INS_pavgb, INS_invalid, INS_pavgw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(SSE2, CompareEqual, 16, 2, true, {INS_pcmpeqb, INS_pcmpeqb, INS_pcmpeqw, INS_pcmpeqw, INS_pcmpeqd, INS_pcmpeqd, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSE2, CompareScalarOrderedEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, CompareScalarEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) @@ -498,7 +498,7 @@ HARDWARE_INTRINSIC(SSE2, CompareGreaterThan, HARDWARE_INTRINSIC(SSE2, CompareScalarOrderedGreaterThan, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, CompareScalarGreaterThan, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSE2, CompareScalarUnorderedGreaterThan, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, CompareGreaterThanOrEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(SSE2, CompareGreaterThanOrEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSE2, CompareScalarOrderedGreaterThanOrEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, CompareScalarGreaterThanOrEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSE2, CompareScalarUnorderedGreaterThanOrEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) @@ -506,38 +506,38 @@ HARDWARE_INTRINSIC(SSE2, CompareLessThan, HARDWARE_INTRINSIC(SSE2, CompareScalarOrderedLessThan, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, CompareScalarLessThan, 16, 2, true, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSE2, CompareScalarUnorderedLessThan, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, CompareLessThanOrEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(SSE2, CompareLessThanOrEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSE2, CompareScalarOrderedLessThanOrEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, CompareScalarLessThanOrEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSE2, CompareScalarUnorderedLessThanOrEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, CompareNotEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(SSE2, CompareScalarOrderedNotEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE2, CompareNotEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(SSE2, CompareScalarOrderedNotEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_comisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, CompareScalarNotEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareScalarUnorderedNotEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_MultiIns|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE2, CompareScalarUnorderedNotEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_ucomisd}, HW_Category_SIMDScalar, HW_Flag_Commutative|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, CompareNotGreaterThan, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSE2, CompareScalarNotGreaterThan, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSE2, CompareNotGreaterThanOrEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSE2, CompareScalarNotGreaterThanOrEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSE2, CompareNotLessThan, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSE2, CompareScalarNotLessThan, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareNotLessThanOrEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(SSE2, CompareNotLessThanOrEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSE2, CompareScalarNotLessThanOrEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareOrdered, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(SSE2, CompareOrdered, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSE2, CompareScalarOrdered, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE2, CompareUnordered, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(SSE2, CompareUnordered, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSE2, CompareScalarUnordered, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSE2, ConvertToInt32, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsd2si}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, ConvertToInt32WithTruncation, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttsd2si32}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, ConvertToUInt32, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, ConvertToVector128Double, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2pd, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE2, ConvertToVector128Double, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2pd, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, ConvertScalarToVector128Double, 16, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd32, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2sd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(SSE2, ConvertToVector128Int32, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_cvtpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE2, ConvertToVector128Int32, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_cvtpd2dq}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, ConvertScalarToVector128Int32, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, ConvertToVector128Int32WithTruncation, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttps2dq, INS_cvttpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, ConvertToVector128Single, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2ps}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE2, ConvertToVector128Int32WithTruncation, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttps2dq, INS_cvttpd2dq}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE2, ConvertToVector128Single, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2ps}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, ConvertScalarToVector128Single, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsd2ss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoFlag) HARDWARE_INTRINSIC(SSE2, ConvertScalarToVector128UInt32, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, Divide, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(SSE2, Divide, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(SSE2, DivideScalar, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE2, Extract, 16, 2, false, {INS_invalid, INS_invalid, INS_pextrw, INS_pextrw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, Insert, 16, 3, false, {INS_invalid, INS_invalid, INS_pinsrw, INS_pinsrw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CanBenefitFromConstantProp) @@ -546,47 +546,47 @@ HARDWARE_INTRINSIC(SSE2, LoadFence, HARDWARE_INTRINSIC(SSE2, LoadHigh, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhpd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, LoadLow, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlpd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, LoadScalarVector128, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_movd, INS_movq, INS_movq, INS_invalid, INS_movsd_simd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE2, LoadVector128, 16, 1, true, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_invalid, INS_movupd}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(SSE2, LoadVector128, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(SSE2, MaskMove, 16, 3, false, {INS_maskmovdqu, INS_maskmovdqu, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(SSE2, Max, 16, 2, true, {INS_invalid, INS_pmaxub, INS_pmaxsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(SSE2, Max, 16, 2, true, {INS_invalid, INS_pmaxub, INS_pmaxsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(SSE2, MemoryFence, 0, 0, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_SpecialSideEffect_Barrier) HARDWARE_INTRINSIC(SSE2, MaxScalar, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE2, Min, 16, 2, true, {INS_invalid, INS_pminub, INS_pminsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(SSE2, Min, 16, 2, true, {INS_invalid, INS_pminub, INS_pminsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(SSE2, MinScalar, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE2, MoveMask, 16, 1, true, {INS_pmovmskb, INS_pmovmskb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movmskpd}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSE2, MoveScalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movq, INS_movq, INS_invalid, INS_movsd_simd}, HW_Category_SIMDScalar, HW_Flag_NoContainment) -HARDWARE_INTRINSIC(SSE2, Multiply, 16, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuludq, INS_invalid, INS_mulpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(SSE2, MultiplyHigh, 16, 2, true, {INS_invalid, INS_invalid, INS_pmulhw, INS_pmulhuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(SSE2, MultiplyAddAdjacent, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmaddwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(SSE2, MultiplyLow, 16, 2, false, {INS_invalid, INS_invalid, INS_pmullw, INS_pmullw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(SSE2, Multiply, 16, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuludq, INS_invalid, INS_mulpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(SSE2, MultiplyHigh, 16, 2, true, {INS_invalid, INS_invalid, INS_pmulhw, INS_pmulhuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(SSE2, MultiplyAddAdjacent, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmaddwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(SSE2, MultiplyLow, 16, 2, false, {INS_invalid, INS_invalid, INS_pmullw, INS_pmullw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(SSE2, MultiplyScalar, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE2, Or, 16, 2, true, {INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_invalid, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(SSE2, PackSignedSaturate, 16, 2, true, {INS_packsswb, INS_invalid, INS_packssdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(SSE2, PackUnsignedSaturate, 16, 2, false, {INS_invalid, INS_packuswb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(SSE2, SumAbsoluteDifferences, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_psadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(SSE2, ShiftLeftLogical, 16, 2, true, {INS_invalid, INS_invalid, INS_psllw, INS_psllw, INS_pslld, INS_pslld, INS_psllq, INS_psllq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(SSE2, Or, 16, 2, true, {INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_invalid, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(SSE2, PackSignedSaturate, 16, 2, true, {INS_packsswb, INS_invalid, INS_packssdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(SSE2, PackUnsignedSaturate, 16, 2, false, {INS_invalid, INS_packuswb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(SSE2, SumAbsoluteDifferences, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_psadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(SSE2, ShiftLeftLogical, 16, 2, true, {INS_invalid, INS_invalid, INS_psllw, INS_psllw, INS_pslld, INS_pslld, INS_psllq, INS_psllq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(SSE2, ShiftLeftLogical128BitLane, 16, 2, false, {INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(SSE2, ShiftRightArithmetic, 16, 2, true, {INS_invalid, INS_invalid, INS_psraw, INS_invalid, INS_psrad, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(SSE2, ShiftRightLogical, 16, 2, true, {INS_invalid, INS_invalid, INS_psrlw, INS_psrlw, INS_psrld, INS_psrld, INS_psrlq, INS_psrlq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(SSE2, ShiftRightArithmetic, 16, 2, true, {INS_invalid, INS_invalid, INS_psraw, INS_invalid, INS_psrad, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(SSE2, ShiftRightLogical, 16, 2, true, {INS_invalid, INS_invalid, INS_psrlw, INS_psrlw, INS_psrld, INS_psrld, INS_psrlq, INS_psrlq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(SSE2, ShiftRightLogical128BitLane, 16, 2, false, {INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(SSE2, Shuffle, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pshufd, INS_pshufd, INS_invalid, INS_invalid, INS_invalid, INS_shufpd}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(SSE2, ShuffleHigh, 16, 2, false, {INS_invalid, INS_invalid, INS_pshufhw, INS_pshufhw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(SSE2, ShuffleLow, 16, 2, false, {INS_invalid, INS_invalid, INS_pshuflw, INS_pshuflw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(SSE2, Sqrt, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE2, Shuffle, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pshufd, INS_pshufd, INS_invalid, INS_invalid, INS_invalid, INS_shufpd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(SSE2, ShuffleHigh, 16, 2, false, {INS_invalid, INS_invalid, INS_pshufhw, INS_pshufhw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(SSE2, ShuffleLow, 16, 2, false, {INS_invalid, INS_invalid, INS_pshuflw, INS_pshuflw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(SSE2, Sqrt, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE2, SqrtScalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE2, Store, 16, 2, true, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_invalid, INS_movupd}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(SSE2, Store, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) HARDWARE_INTRINSIC(SSE2, StoreAligned, 16, 2, true, {INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_invalid, INS_movapd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) HARDWARE_INTRINSIC(SSE2, StoreAlignedNonTemporal, 16, 2, true, {INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_invalid, INS_movntpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) HARDWARE_INTRINSIC(SSE2, StoreHigh, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movhpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) HARDWARE_INTRINSIC(SSE2, StoreLow, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movlpd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) HARDWARE_INTRINSIC(SSE2, StoreNonTemporal, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movnti, INS_movnti, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg) HARDWARE_INTRINSIC(SSE2, StoreScalar, 16, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_movd, INS_movq, INS_movq, INS_invalid, INS_movsd_simd}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(SSE2, Subtract, 16, 2, true, {INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_invalid, INS_subpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(SSE2, SubtractSaturate, 16, 2, true, {INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(SSE2, Subtract, 16, 2, true, {INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_invalid, INS_subpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(SSE2, SubtractSaturate, 16, 2, true, {INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(SSE2, SubtractScalar, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE2, UnpackHigh, 16, 2, true, {INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_invalid, INS_unpckhpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(SSE2, UnpackLow, 16, 2, true, {INS_punpcklbw, INS_punpcklbw, INS_punpcklwd, INS_punpcklwd, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_invalid, INS_unpcklpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(SSE2, Xor, 16, 2, true, {INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_invalid, INS_xorpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(SSE2, UnpackHigh, 16, 2, true, {INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_invalid, INS_unpckhpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(SSE2, UnpackLow, 16, 2, true, {INS_punpcklbw, INS_punpcklbw, INS_punpcklwd, INS_punpcklwd, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_invalid, INS_unpcklpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(SSE2, Xor, 16, 2, true, {INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_invalid, INS_xorpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags @@ -620,15 +620,15 @@ HARDWARE_INTRINSIC(SSE3, MoveLowAndDuplicate, // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // SSSE3 Intrinsics -HARDWARE_INTRINSIC(SSSE3, Abs, 16, 1, true, {INS_pabsb, INS_invalid, INS_pabsw, INS_invalid, INS_pabsd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(SSSE3, AlignRight, 16, 3, false, {INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(SSSE3, Abs, 16, 1, true, {INS_pabsb, INS_invalid, INS_pabsw, INS_invalid, INS_pabsd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSSE3, AlignRight, 16, 3, false, {INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(SSSE3, HorizontalAdd, 16, 2, true, {INS_invalid, INS_invalid, INS_phaddw, INS_phaddw, INS_phaddd, INS_phaddd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSSE3, HorizontalAddSaturate, 16, 2, false, {INS_invalid, INS_invalid, INS_phaddsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSSE3, HorizontalSubtract, 16, 2, true, {INS_invalid, INS_invalid, INS_phsubw, INS_invalid, INS_phsubd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSSE3, HorizontalSubtractSaturate, 16, 2, false, {INS_invalid, INS_invalid, INS_phsubsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSSE3, MultiplyAddAdjacent, 16, 2, false, {INS_invalid, INS_invalid, INS_pmaddubsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(SSSE3, MultiplyHighRoundScale, 16, 2, false, {INS_invalid, INS_invalid, INS_pmulhrsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(SSSE3, Shuffle, 16, 2, false, {INS_pshufb, INS_pshufb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(SSSE3, MultiplyAddAdjacent, 16, 2, false, {INS_invalid, INS_invalid, INS_pmaddubsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(SSSE3, MultiplyHighRoundScale, 16, 2, false, {INS_invalid, INS_invalid, INS_pmulhrsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(SSSE3, Shuffle, 16, 2, false, {INS_pshufb, INS_pshufb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(SSSE3, Sign, 16, 2, true, {INS_psignb, INS_invalid, INS_psignw, INS_invalid, INS_psignd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** @@ -638,34 +638,34 @@ HARDWARE_INTRINSIC(SSSE3, Sign, // SSE41 Intrinsics HARDWARE_INTRINSIC(SSE41, Blend, 16, 3, true, {INS_invalid, INS_invalid, INS_pblendw, INS_pblendw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blendps, INS_blendpd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSE41, BlendVariable, 16, 3, true, {INS_pblendvb, INS_pblendvb, INS_pblendvb, INS_pblendvb, INS_pblendvb, INS_pblendvb, INS_pblendvb, INS_pblendvb, INS_blendvps, INS_blendvpd}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE41, Ceiling, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE41, Ceiling, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(SSE41, CeilingScalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE41, CompareEqual, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pcmpeqq, INS_pcmpeqq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSE41, ConvertToVector128Int16, 16, 1, true, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_MaybeMemoryLoad) HARDWARE_INTRINSIC(SSE41, ConvertToVector128Int32, 16, 1, true, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_MaybeMemoryLoad) HARDWARE_INTRINSIC(SSE41, ConvertToVector128Int64, 16, 1, true, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics|HW_Flag_MaybeMemoryLoad) HARDWARE_INTRINSIC(SSE41, DotProduct, 16, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_dpps, INS_dppd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE41, Extract, 16, 2, true, {INS_pextrb, INS_pextrb, INS_invalid, INS_invalid, INS_pextrd, INS_pextrd, INS_invalid, INS_invalid, INS_extractps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MultiIns|HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE41, Floor, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE41, Extract, 16, 2, true, {INS_pextrb, INS_pextrb, INS_invalid, INS_invalid, INS_pextrd, INS_pextrd, INS_invalid, INS_invalid, INS_extractps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE41, Floor, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(SSE41, FloorScalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE41, Insert, 16, 3, true, {INS_pinsrb, INS_pinsrb, INS_invalid, INS_invalid, INS_pinsrd, INS_pinsrd, INS_invalid, INS_invalid, INS_insertps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CanBenefitFromConstantProp) HARDWARE_INTRINSIC(SSE41, LoadAlignedVector128NonTemporal, 16, 1, false, {INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics) -HARDWARE_INTRINSIC(SSE41, Max, 16, 2, true, {INS_pmaxsb, INS_invalid, INS_invalid, INS_pmaxuw, INS_pmaxsd, INS_pmaxud, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(SSE41, Min, 16, 2, true, {INS_pminsb, INS_invalid, INS_invalid, INS_pminuw, INS_pminsd, INS_pminud, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(SSE41, Max, 16, 2, true, {INS_pmaxsb, INS_invalid, INS_invalid, INS_pmaxuw, INS_pmaxsd, INS_pmaxud, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_Commutative) +HARDWARE_INTRINSIC(SSE41, Min, 16, 2, true, {INS_pminsb, INS_invalid, INS_invalid, INS_pminuw, INS_pminsd, INS_pminud, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_Commutative) HARDWARE_INTRINSIC(SSE41, MinHorizontal, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_phminposuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSE41, MultipleSumAbsoluteDifferences, 16, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_mpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(SSE41, Multiply, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuldq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(SSE41, MultiplyLow, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmulld, INS_pmulld, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(SSE41, PackUnsignedSaturate, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_packusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(SSE41, RoundCurrentDirection, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE41, Multiply, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuldq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(SSE41, MultiplyLow, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmulld, INS_pmulld, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(SSE41, PackUnsignedSaturate, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_packusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(SSE41, RoundCurrentDirection, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(SSE41, RoundCurrentDirectionScalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE41, RoundToNearestInteger, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE41, RoundToNearestInteger, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(SSE41, RoundToNearestIntegerScalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE41, RoundToNegativeInfinity, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE41, RoundToNegativeInfinity, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(SSE41, RoundToNegativeInfinityScalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE41, RoundToPositiveInfinity, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE41, RoundToPositiveInfinity, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(SSE41, RoundToPositiveInfinityScalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(SSE41, RoundToZero, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE41, RoundToZero, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(SSE41, RoundToZeroScalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundss, INS_roundsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(SSE41, TestC, 16, 2, false, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(SSE41, TestNotZAndNotC, 16, 2, false, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics) @@ -676,7 +676,7 @@ HARDWARE_INTRINSIC(SSE41, TestZ, // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // SSE41 64-bit-only Intrinsics -HARDWARE_INTRINSIC(SSE41_X64, Extract, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pextrq, INS_pextrq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MultiIns|HW_Flag_NoRMWSemantics) +HARDWARE_INTRINSIC(SSE41_X64, Extract, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pextrq, INS_pextrq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics) HARDWARE_INTRINSIC(SSE41_X64, Insert, 16, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pinsrq, INS_pinsrq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CanBenefitFromConstantProp) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** @@ -685,8 +685,8 @@ HARDWARE_INTRINSIC(SSE41_X64, Insert, // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // SSE42 Intrinsics HARDWARE_INTRINSIC(SSE42, Crc32, 0, 2, false, {INS_invalid, INS_crc32, INS_invalid, INS_crc32, INS_invalid, INS_crc32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(SSE42, CompareGreaterThan, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pcmpgtq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(SSE42, CompareLessThan, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pcmpgtq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(SSE42, CompareGreaterThan, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pcmpgtq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(SSE42, CompareLessThan, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pcmpgtq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags @@ -700,91 +700,91 @@ HARDWARE_INTRINSIC(SSE42_X64, Crc32, // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVX Intrinsics -HARDWARE_INTRINSIC(AVX, Add, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addps, INS_addpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(AVX, Add, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addps, INS_addpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX, AddSubtract, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addsubps, INS_addsubpd}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, And, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andps, INS_andpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(AVX, AndNot, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andnps, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(AVX, And, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andps, INS_andpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX, AndNot, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andnps, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX, Blend, 32, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blendps, INS_blendpd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX, BlendVariable, 32, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vblendvps, INS_vblendvpd}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, Ceiling, 32, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, Ceiling, 32, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX, BroadcastScalarToVector128, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcastss, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX, BroadcastScalarToVector256, 32, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcastss, INS_vbroadcastsd}, HW_Category_MemoryLoad, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX, BroadcastVector128ToVector256, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcastf128, INS_vbroadcastf128}, HW_Category_MemoryLoad, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, Compare, 32, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_IMM, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX, CompareEqual, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX, CompareGreaterThan, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX, CompareGreaterThanOrEqual, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX, CompareLessThan, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX, CompareLessThanOrEqual, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX, CompareNotEqual, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX, CompareNotGreaterThan, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX, CompareNotGreaterThanOrEqual, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX, CompareNotLessThan, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX, CompareNotLessThanOrEqual, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX, CompareOrdered, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX, CompareUnordered, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX, Compare, 32, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_IMM, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX, CompareEqual, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX, CompareGreaterThan, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX, CompareGreaterThanOrEqual, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX, CompareLessThan, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX, CompareLessThanOrEqual, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX, CompareNotEqual, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX, CompareNotGreaterThan, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX, CompareNotGreaterThanOrEqual, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX, CompareNotLessThan, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX, CompareNotLessThanOrEqual, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX, CompareOrdered, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVX, CompareUnordered, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpps, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX, CompareScalar, 16, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpss, INS_cmpsd}, HW_Category_IMM, HW_Flag_CopyUpperBits|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, ConvertToVector128Int32, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2dq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2dq}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, ConvertToVector128Single, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2ps}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX, ConvertToVector256Int32, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, ConvertToVector256Single, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX, ConvertToVector256Double, 32, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2pd, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX, ConvertToVector128Int32WithTruncation, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttpd2dq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, ConvertToVector256Int32WithTruncation, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttps2dq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX, Divide, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_divpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(AVX, ConvertToVector128Int32, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2dq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2dq}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX, ConvertToVector128Single, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2ps}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX, ConvertToVector256Int32, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX, ConvertToVector256Single, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX, ConvertToVector256Double, 32, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2pd, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX, ConvertToVector128Int32WithTruncation, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttpd2dq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX, ConvertToVector256Int32WithTruncation, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttps2dq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX, Divide, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_divpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX, DotProduct, 32, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_dpps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX, DuplicateEvenIndexed, 32, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsldup, INS_movddup}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX, DuplicateOddIndexed, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movshdup, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, ExtractVector128, 32, 2, false, {INS_vextractf128, INS_vextractf128, INS_vextractf128, INS_vextractf128, INS_vextractf128, INS_vextractf128, INS_vextractf128, INS_vextractf128, INS_vextractf128, INS_vextractf128}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX, Floor, 32, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, ExtractVector128, 32, 2, false, {INS_vextractf128, INS_vextractf128, INS_vextractf128, INS_vextractf128, INS_vextractf128, INS_vextractf128, INS_vextractf128, INS_vextractf128, INS_vextractf128, INS_vextractf128}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX, Floor, 32, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX, HorizontalAdd, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_haddps, INS_haddpd}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX, HorizontalSubtract, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_hsubps, INS_hsubpd}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, InsertVector128, 32, 3, false, {INS_vinsertf128, INS_vinsertf128, INS_vinsertf128, INS_vinsertf128, INS_vinsertf128, INS_vinsertf128, INS_vinsertf128, INS_vinsertf128, INS_vinsertf128, INS_vinsertf128}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX, InsertVector128, 32, 3, false, {INS_vinsertf128, INS_vinsertf128, INS_vinsertf128, INS_vinsertf128, INS_vinsertf128, INS_vinsertf128, INS_vinsertf128, INS_vinsertf128, INS_vinsertf128, INS_vinsertf128}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX, LoadAlignedVector256, 32, 1, true, {INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movaps, INS_movapd}, HW_Category_MemoryLoad, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX, LoadDquVector256, 32, 1, false, {INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, LoadVector256, 32, 1, true, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(AVX, Max, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxps, INS_maxpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(AVX, Min, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minps, INS_minpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(AVX, LoadVector256, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX, Max, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxps, INS_maxpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX, Min, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minps, INS_minpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX, MaskLoad, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vmaskmovps, INS_vmaskmovpd}, HW_Category_MemoryLoad, HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX, MaskStore, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vmaskmovps, INS_vmaskmovpd}, HW_Category_MemoryStore, HW_Flag_NoContainment|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX, MoveMask, 32, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movmskps, INS_movmskpd}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, Multiply, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulps, INS_mulpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(AVX, Or, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_orps, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(AVX, Permute, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilps, INS_vpermilpd}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX, Multiply, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulps, INS_mulpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX, Or, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_orps, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX, Permute, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilps, INS_vpermilpd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX, Permute2x128, 32, 3, false, {INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128, INS_vperm2f128}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, PermuteVar, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpsvar, INS_vpermilpdvar}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX, PermuteVar, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpsvar, INS_vpermilpdvar}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX, Reciprocal, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rcpps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX, ReciprocalSqrt, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_rsqrtps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, RoundCurrentDirection, 32, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, RoundToNearestInteger, 32, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, RoundToNegativeInfinity, 32, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, RoundToPositiveInfinity, 32, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, RoundToZero, 32, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, Shuffle, 32, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_shufps, INS_shufpd}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX, Sqrt, 32, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_sqrtpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, Store, 32, 2, true, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(AVX, RoundCurrentDirection, 32, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX, RoundToNearestInteger, 32, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX, RoundToNegativeInfinity, 32, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX, RoundToPositiveInfinity, 32, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX, RoundToZero, 32, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX, Shuffle, 32, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_shufps, INS_shufpd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX, Sqrt, 32, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_sqrtpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX, Store, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) HARDWARE_INTRINSIC(AVX, StoreAligned, 32, 2, true, {INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movaps, INS_movapd}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg) HARDWARE_INTRINSIC(AVX, StoreAlignedNonTemporal, 32, 2, true, {INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntps, INS_movntpd}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVX, Subtract, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subps, INS_subpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(AVX, Subtract, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subps, INS_subpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX, TestC, -1, 2, true, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX, TestNotZAndNotC, -1, 2, true, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX, TestZ, -1, 2, true, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX, UnpackHigh, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_unpckhps, INS_unpckhpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, UnpackLow, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_unpcklps, INS_unpcklpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX, Xor, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_xorps, INS_xorpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(AVX, UnpackHigh, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_unpckhps, INS_unpckhpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX, UnpackLow, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_unpcklps, INS_unpcklpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX, Xor, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_xorps, INS_xorpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVX2 Intrinsics -HARDWARE_INTRINSIC(AVX2, Abs, 32, 1, true, {INS_pabsb, INS_invalid, INS_pabsw, INS_invalid, INS_pabsd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX2, Add, 32, 2, true, {INS_paddb, INS_paddb, INS_paddw, INS_paddw, INS_paddd, INS_paddd, INS_paddq, INS_paddq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(AVX2, AddSaturate, 32, 2, true, {INS_paddsb, INS_paddusb, INS_paddsw, INS_paddusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX2, AlignRight, 32, 3, false, {INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX2, And, 32, 2, false, {INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(AVX2, AndNot, 32, 2, false, {INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(AVX2, Average, 32, 2, true, {INS_invalid, INS_pavgb, INS_invalid, INS_pavgw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX2, Abs, 32, 1, true, {INS_pabsb, INS_invalid, INS_pabsw, INS_invalid, INS_pabsd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX2, Add, 32, 2, true, {INS_paddb, INS_paddb, INS_paddw, INS_paddw, INS_paddd, INS_paddd, INS_paddq, INS_paddq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX2, AddSaturate, 32, 2, true, {INS_paddsb, INS_paddusb, INS_paddsw, INS_paddusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX2, AlignRight, 32, 3, false, {INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX2, And, 32, 2, false, {INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX2, AndNot, 32, 2, false, {INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX2, Average, 32, 2, true, {INS_invalid, INS_pavgb, INS_invalid, INS_pavgw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX2, Blend, -1, 3, true, {INS_invalid, INS_invalid, INS_pblendw, INS_pblendw, INS_vpblendd, INS_vpblendd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX2, BlendVariable, 32, 3, false, {INS_vpblendvb, INS_vpblendvb, INS_vpblendvb, INS_vpblendvb, INS_vpblendvb, INS_vpblendvb, INS_vpblendvb, INS_vpblendvb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX2, BroadcastScalarToVector128, 16, 1, true, {INS_vpbroadcastb, INS_vpbroadcastb, INS_vpbroadcastw, INS_vpbroadcastw, INS_vpbroadcastd, INS_vpbroadcastd, INS_vpbroadcastq, INS_vpbroadcastq, INS_vbroadcastss, INS_movddup}, HW_Category_SIMDScalar, HW_Flag_MaybeMemoryLoad) @@ -793,7 +793,7 @@ HARDWARE_INTRINSIC(AVX2, BroadcastVector128ToVector256, HARDWARE_INTRINSIC(AVX2, CompareEqual, 32, 2, true, {INS_pcmpeqb, INS_pcmpeqb, INS_pcmpeqw, INS_pcmpeqw, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqq, INS_pcmpeqq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX2, CompareGreaterThan, 32, 2, true, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_pcmpgtq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX2, CompareLessThan, 32, 2, true, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_pcmpgtq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, ExtractVector128, 32, 2, false, {INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX2, ExtractVector128, 32, 2, false, {INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX2, ConvertToInt32, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX2, ConvertToUInt32, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX2, ConvertToVector256Int16, 32, 1, true, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_MaybeMemoryLoad) @@ -807,72 +807,73 @@ HARDWARE_INTRINSIC(AVX2, HorizontalAdd, HARDWARE_INTRINSIC(AVX2, HorizontalAddSaturate, 32, 2, false, {INS_invalid, INS_invalid, INS_phaddsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX2, HorizontalSubtract, 32, 2, true, {INS_invalid, INS_invalid, INS_phsubw, INS_invalid, INS_phsubd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX2, HorizontalSubtractSaturate, 32, 2, false, {INS_invalid, INS_invalid, INS_phsubsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, InsertVector128, 32, 3, false, {INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX2, InsertVector128, 32, 3, false, {INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX2, LoadAlignedVector256NonTemporal, 32, 1, false, {INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX2, MaskLoad, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmaskmovd, INS_vpmaskmovd, INS_vpmaskmovq, INS_vpmaskmovq, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX2, MaskStore, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmaskmovd, INS_vpmaskmovd, INS_vpmaskmovq, INS_vpmaskmovq, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoContainment|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, Max, 32, 2, true, {INS_pmaxsb, INS_pmaxub, INS_pmaxsw, INS_pmaxuw, INS_pmaxsd, INS_pmaxud, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(AVX2, Min, 32, 2, true, {INS_pminsb, INS_pminub, INS_pminsw, INS_pminuw, INS_pminsd, INS_pminud, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(AVX2, Max, 32, 2, true, {INS_pmaxsb, INS_pmaxub, INS_pmaxsw, INS_pmaxuw, INS_pmaxsd, INS_pmaxud, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX2, Min, 32, 2, true, {INS_pminsb, INS_pminub, INS_pminsw, INS_pminuw, INS_pminsd, INS_pminud, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_Commutative) HARDWARE_INTRINSIC(AVX2, MoveMask, 32, 1, false, {INS_pmovmskb, INS_pmovmskb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, Multiply, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuldq, INS_pmuludq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(AVX2, Multiply, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuldq, INS_pmuludq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX2, MultipleSumAbsoluteDifferences, 32, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_mpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, MultiplyAddAdjacent, 32, 2, true, {INS_invalid, INS_invalid, INS_pmaddubsw, INS_invalid, INS_pmaddwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX2, MultiplyHigh, 32, 2, true, {INS_invalid, INS_invalid, INS_pmulhw, INS_pmulhuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX2, MultiplyHighRoundScale, 32, 2, false, {INS_invalid, INS_invalid, INS_pmulhrsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX2, MultiplyLow, 32, 2, true, {INS_invalid, INS_invalid, INS_pmullw, INS_pmullw, INS_pmulld, INS_pmulld, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(AVX2, Or, 32, 2, false, {INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(AVX2, MultiplyAddAdjacent, 32, 2, true, {INS_invalid, INS_invalid, INS_pmaddubsw, INS_invalid, INS_pmaddwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX2, MultiplyHigh, 32, 2, true, {INS_invalid, INS_invalid, INS_pmulhw, INS_pmulhuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX2, MultiplyHighRoundScale, 32, 2, false, {INS_invalid, INS_invalid, INS_pmulhrsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX2, MultiplyLow, 32, 2, true, {INS_invalid, INS_invalid, INS_pmullw, INS_pmullw, INS_pmulld, INS_pmulld, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX2, Or, 32, 2, false, {INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX2, Permute2x128, 32, 3, false, {INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(AVX2, Permute4x64, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq, INS_vpermq, INS_invalid, INS_vpermpd}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX2, PermuteVar8x32, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermd, INS_vpermd, INS_invalid, INS_invalid, INS_vpermps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX2, PackSignedSaturate, 32, 2, true, {INS_packsswb, INS_invalid, INS_packssdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX2, PackUnsignedSaturate, 32, 2, true, {INS_invalid, INS_packuswb, INS_invalid, INS_packusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX2, ShiftLeftLogical, 32, 2, true, {INS_invalid, INS_invalid, INS_psllw, INS_psllw, INS_pslld, INS_pslld, INS_psllq, INS_psllq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX2, Permute4x64, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq, INS_vpermq, INS_invalid, INS_vpermpd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX2, PermuteVar8x32, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermd, INS_vpermd, INS_invalid, INS_invalid, INS_vpermps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX2, PackSignedSaturate, 32, 2, true, {INS_packsswb, INS_invalid, INS_packssdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX2, PackUnsignedSaturate, 32, 2, true, {INS_invalid, INS_packuswb, INS_invalid, INS_packusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX2, ShiftLeftLogical, 32, 2, true, {INS_invalid, INS_invalid, INS_psllw, INS_psllw, INS_pslld, INS_pslld, INS_psllq, INS_psllq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX2, ShiftLeftLogical128BitLane, 32, 2, false, {INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_pslldq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX2, ShiftLeftLogicalVariable, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsllvd, INS_vpsllvd, INS_vpsllvq, INS_vpsllvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(AVX2, ShiftRightArithmetic, 32, 2, true, {INS_invalid, INS_invalid, INS_psraw, INS_invalid, INS_psrad, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX2, ShiftRightArithmeticVariable, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsravd, INS_vpsravd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(AVX2, ShiftRightLogical, 32, 2, true, {INS_invalid, INS_invalid, INS_psrlw, INS_psrlw, INS_psrld, INS_psrld, INS_psrlq, INS_psrlq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX2, ShiftLeftLogicalVariable, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsllvd, INS_vpsllvd, INS_vpsllvq, INS_vpsllvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX2, ShiftRightArithmetic, 32, 2, true, {INS_invalid, INS_invalid, INS_psraw, INS_invalid, INS_psrad, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX2, ShiftRightArithmeticVariable, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsravd, INS_vpsravd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX2, ShiftRightLogical, 32, 2, true, {INS_invalid, INS_invalid, INS_psrlw, INS_psrlw, INS_psrld, INS_psrld, INS_psrlq, INS_psrlq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX2, ShiftRightLogical128BitLane, 32, 2, false, {INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_psrldq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX2, ShiftRightLogicalVariable, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsrlvd, INS_vpsrlvd, INS_vpsrlvq, INS_vpsrlvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(AVX2, Shuffle, 32, 2, true, {INS_pshufb, INS_pshufb, INS_invalid, INS_invalid, INS_pshufd, INS_pshufd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_MaybeIMM) -HARDWARE_INTRINSIC(AVX2, ShuffleHigh, 32, 2, false, {INS_invalid, INS_invalid, INS_pshufhw, INS_pshufhw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX2, ShuffleLow, 32, 2, false, {INS_invalid, INS_invalid, INS_pshuflw, INS_pshuflw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX2, ShiftRightLogicalVariable, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsrlvd, INS_vpsrlvd, INS_vpsrlvq, INS_vpsrlvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX2, Shuffle, 32, 2, true, {INS_pshufb, INS_pshufb, INS_invalid, INS_invalid, INS_pshufd, INS_pshufd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_MaybeIMM) +HARDWARE_INTRINSIC(AVX2, ShuffleHigh, 32, 2, false, {INS_invalid, INS_invalid, INS_pshufhw, INS_pshufhw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX2, ShuffleLow, 32, 2, false, {INS_invalid, INS_invalid, INS_pshuflw, INS_pshuflw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX2, Sign, 32, 2, true, {INS_psignb, INS_invalid, INS_psignw, INS_invalid, INS_psignd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX2, SumAbsoluteDifferences, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_psadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX2, Subtract, 32, 2, true, {INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(AVX2, SubtractSaturate, 32, 2, true, {INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX2, UnpackHigh, 32, 2, true, {INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX2, UnpackLow, 32, 2, true, {INS_punpcklbw, INS_punpcklbw, INS_punpcklwd, INS_punpcklwd, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX2, Xor, 32, 2, false, {INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(AVX2, Subtract, 32, 2, true, {INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX2, SubtractSaturate, 32, 2, true, {INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX2, UnpackHigh, 32, 2, true, {INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX2, UnpackLow, 32, 2, true, {INS_punpcklbw, INS_punpcklbw, INS_punpcklwd, INS_punpcklwd, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX2, Xor, 32, 2, false, {INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVX512F Intrinsics -HARDWARE_INTRINSIC(AVX512F, Abs, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pabsd, INS_invalid, INS_vpabsq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512F, Add, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_paddd, INS_paddd, INS_paddq, INS_paddq, INS_addps, INS_addpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512F, Abs, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pabsd, INS_invalid, INS_vpabsq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, Add, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_paddd, INS_paddd, INS_paddq, INS_paddq, INS_addps, INS_addpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) HARDWARE_INTRINSIC(AVX512F, AddScalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addss, INS_addsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, AlignRight32, 64, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_valignd, INS_valignd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F, AlignRight64, 64, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_valignq, INS_valignq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F, And, 64, 2, true, {INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_vpandq, INS_vpandq, INS_andps, INS_andpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(AVX512F, AndNot, 64, 2, true, {INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_vpandnq, INS_vpandnq, INS_andnps, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(AVX512F, BlendVariable, 64, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpblendmd, INS_vpblendmd, INS_vpblendmq, INS_vpblendmq, INS_vblendmps, INS_vblendmpd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(AVX512F, AlignRight32, 64, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_valignd, INS_valignd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, AlignRight64, 64, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_valignq, INS_valignq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, And, 64, 2, true, {INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_vpandq, INS_vpandq, INS_andps, INS_andpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, AndNot, 64, 2, true, {INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_vpandnq, INS_vpandnq, INS_andnps, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, BlendVariable, 64, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(AVX512F, BroadcastScalarToVector512, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpbroadcastd, INS_vpbroadcastd, INS_vpbroadcastq, INS_vpbroadcastq, INS_vbroadcastss, INS_vbroadcastsd}, HW_Category_SIMDScalar, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX512F, BroadcastVector128ToVector512, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti128, INS_vbroadcasti128, INS_invalid, INS_invalid, INS_vbroadcastf128, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MaybeMemoryLoad) HARDWARE_INTRINSIC(AVX512F, BroadcastVector256ToVector512, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti64x4, INS_vbroadcasti64x4, INS_invalid, INS_vbroadcastf64x4}, HW_Category_SimpleSIMD, HW_Flag_MaybeMemoryLoad) -HARDWARE_INTRINSIC(AVX512F, CompareEqual, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpcmpeqd, INS_vpcmpeqd, INS_vpcmpeqq, INS_vpcmpeqq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX512F, CompareGreaterThan, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpcmpgtd, INS_vpcmpud, INS_vpcmpgtq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(AVX512F, CompareGreaterThanOrEqual, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(AVX512F, CompareLessThan, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(AVX512F, CompareLessThanOrEqual, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(AVX512F, CompareNotEqual, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX512F, CompareNotGreaterThan, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(AVX512F, CompareNotGreaterThanOrEqual, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(AVX512F, CompareNotLessThan, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(AVX512F, CompareNotLessThanOrEqual, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(AVX512F, CompareOrdered, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(AVX512F, CompareUnordered, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(AVX512F, Compare, 64, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512F, CompareEqual, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512F, CompareGreaterThan, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512F, CompareGreaterThanOrEqual, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512F, CompareLessThan, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512F, CompareLessThanOrEqual, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512F, CompareNotEqual, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512F, CompareNotGreaterThan, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512F, CompareNotGreaterThanOrEqual, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512F, CompareNotLessThan, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512F, CompareNotLessThanOrEqual, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512F, CompareOrdered, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512F, CompareUnordered, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(AVX512F, ConvertScalarToVector128Double, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd32, INS_vcvtusi2sd32, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(AVX512F, ConvertScalarToVector128Single, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2ss32, INS_vcvtusi2ss32, INS_invalid, INS_invalid, INS_invalid, INS_cvtsd2ss}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg|HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) HARDWARE_INTRINSIC(AVX512F, ConvertToInt32, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2si, INS_cvtsd2si}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) @@ -890,149 +891,149 @@ HARDWARE_INTRINSIC(AVX512F, ConvertToVector256Int16, HARDWARE_INTRINSIC(AVX512F, ConvertToVector256Int16WithSaturation, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX512F, ConvertToVector256Int32, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_invalid, INS_cvtpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) HARDWARE_INTRINSIC(AVX512F, ConvertToVector256Int32WithSaturation, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsqd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector256Int32WithTruncation, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector256Single, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2ps}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512F, ConvertToVector256Int32WithTruncation, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttpd2dq}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512F, ConvertToVector256Single, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2ps}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) HARDWARE_INTRINSIC(AVX512F, ConvertToVector256UInt16, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX512F, ConvertToVector256UInt16WithSaturation, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX512F, ConvertToVector256UInt32, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_invalid, INS_vcvtpd2udq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_EmbRoundingCompatible) HARDWARE_INTRINSIC(AVX512F, ConvertToVector256UInt32WithSaturation, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusqd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector256UInt32WithTruncation, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttpd2udq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector512Double, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2pd, INS_vcvtudq2pd, INS_invalid, INS_invalid, INS_cvtps2pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector512Int32, 64, -1, false, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector512Int32WithTruncation, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttps2dq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512F, ConvertToVector256UInt32WithTruncation, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttpd2udq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, ConvertToVector512Double, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2pd, INS_vcvtudq2pd, INS_invalid, INS_invalid, INS_cvtps2pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, ConvertToVector512Int32, 64, -1, false, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512F, ConvertToVector512Int32WithTruncation, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttps2dq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX512F, ConvertToVector512Int64, 64, 1, true, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector512Single, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_vcvtudq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector512UInt32, 64, -1, false, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2udq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, ConvertToVector512UInt32WithTruncation, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512F, ConvertToVector512Single, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_vcvtudq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512F, ConvertToVector512UInt32, 64, -1, false, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2udq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512F, ConvertToVector512UInt32WithTruncation, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX512F, ConvertToVector512UInt64, 64, 1, true, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512F, Divide, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_divpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512F, Divide, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divps, INS_divpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) HARDWARE_INTRINSIC(AVX512F, DivideScalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divss, INS_divsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) HARDWARE_INTRINSIC(AVX512F, DuplicateEvenIndexed, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsldup, INS_movddup}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX512F, DuplicateOddIndexed, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movshdup, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F, ExtractVector128, 64, 2, true, {INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextractf128, INS_vextractf128}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F, ExtractVector256, 64, 2, true, {INS_vextracti64x4, INS_vextracti64x4, INS_vextracti64x4, INS_vextracti64x4, INS_vextracti64x4, INS_vextracti64x4, INS_vextracti64x4, INS_vextracti64x4, INS_vextractf64x4, INS_vextractf64x4}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F, Fixup, 64, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfixupimmps, INS_vfixupimmpd}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512F, ExtractVector128, 64, 2, true, {INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextractf128, INS_vextractf128}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, ExtractVector256, 64, 2, true, {INS_vextracti64x4, INS_vextracti64x4, INS_vextracti64x4, INS_vextracti64x4, INS_vextracti64x4, INS_vextracti64x4, INS_vextracti64x4, INS_vextracti64x4, INS_vextractf64x4, INS_vextractf64x4}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, Fixup, 64, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfixupimmps, INS_vfixupimmpd}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX512F, FixupScalar, 16, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfixupimmss, INS_vfixupimmsd}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512F, FusedMultiplyAdd, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ps, INS_vfmadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, FusedMultiplyAddScalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ss, INS_vfmadd213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, FusedMultiplyAddNegated, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ps, INS_vfnmadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, FusedMultiplyAddNegatedScalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ss, INS_vfnmadd213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, FusedMultiplyAddSubtract, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmaddsub213ps, INS_vfmaddsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, FusedMultiplySubtract, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ps, INS_vfmsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, FusedMultiplySubtractScalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ss, INS_vfmsub213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, FusedMultiplySubtractAdd, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsubadd213ps, INS_vfmsubadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, FusedMultiplySubtractNegated, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ps, INS_vfnmsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, FusedMultiplySubtractNegatedScalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ss, INS_vfnmsub213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, GetExponent, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetexpps, INS_vgetexppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512F, FusedMultiplyAdd, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ps, INS_vfmadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, FusedMultiplyAddScalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ss, INS_vfmadd213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512F, FusedMultiplyAddNegated, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ps, INS_vfnmadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, FusedMultiplyAddNegatedScalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ss, INS_vfnmadd213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512F, FusedMultiplyAddSubtract, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmaddsub213ps, INS_vfmaddsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, FusedMultiplySubtract, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ps, INS_vfmsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, FusedMultiplySubtractScalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ss, INS_vfmsub213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512F, FusedMultiplySubtractAdd, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsubadd213ps, INS_vfmsubadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, FusedMultiplySubtractNegated, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ps, INS_vfnmsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, FusedMultiplySubtractNegatedScalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ss, INS_vfnmsub213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(AVX512F, GetExponent, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetexpps, INS_vgetexppd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX512F, GetExponentScalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetexpss, INS_vgetexpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512F, GetMantissa, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetmantps, INS_vgetmantpd}, HW_Category_IMM, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512F, GetMantissa, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetmantps, INS_vgetmantpd}, HW_Category_IMM, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX512F, GetMantissaScalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetmantss, INS_vgetmantsd}, HW_Category_IMM, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512F, InsertVector128, 64, 3, true, {INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinsertf128, INS_vinsertf128}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F, InsertVector256, 64, 3, true, {INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinsertf64x4, INS_vinsertf64x4}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512F, InsertVector128, 64, 3, true, {INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinsertf128, INS_vinsertf128}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, InsertVector256, 64, 3, true, {INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinsertf64x4, INS_vinsertf64x4}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX512F, LoadAlignedVector512, 64, 1, true, {INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_vmovdqa64, INS_vmovdqa64, INS_movaps, INS_movapd}, HW_Category_MemoryLoad, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX512F, LoadAlignedVector512NonTemporal, 64, 1, false, {INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F, LoadVector512, 64, 1, true, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(AVX512F, Max, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmaxsd, INS_pmaxud, INS_vpmaxsq, INS_vpmaxuq, INS_maxps, INS_maxpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(AVX512F, Min, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pminsd, INS_pminud, INS_vpminsq, INS_vpminuq, INS_minps, INS_minpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(AVX512F, Multiply, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuldq, INS_pmuludq, INS_mulps, INS_mulpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512F, LoadVector512, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512F, Max, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmaxsd, INS_pmaxud, INS_vpmaxsq, INS_vpmaxuq, INS_maxps, INS_maxpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_MaybeCommutative) +HARDWARE_INTRINSIC(AVX512F, Min, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pminsd, INS_pminud, INS_vpminsq, INS_vpminuq, INS_minps, INS_minpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_MaybeCommutative) +HARDWARE_INTRINSIC(AVX512F, Multiply, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuldq, INS_pmuludq, INS_mulps, INS_mulpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) HARDWARE_INTRINSIC(AVX512F, MultiplyScalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulss, INS_mulsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, MultiplyLow, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmulld, INS_pmulld, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(AVX512F, Or, 64, 2, true, {INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_vporq, INS_vporq, INS_orps, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(AVX512F, Permute2x64, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpd}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F, Permute4x32, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F, Permute4x64, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq, INS_vpermq, INS_invalid, INS_vpermpd}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F, PermuteVar2x64, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpdvar}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F, PermuteVar4x32, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpsvar, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F, PermuteVar8x64, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq_reg, INS_vpermq_reg, INS_invalid, INS_vpermpd_reg}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX512F, PermuteVar8x64x2, 64, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2q, INS_vpermt2q, INS_invalid, INS_vpermt2pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512F, PermuteVar16x32, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermd, INS_vpermd, INS_invalid, INS_invalid, INS_vpermps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX512F, PermuteVar16x32x2, 64, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2d, INS_vpermt2d, INS_invalid, INS_invalid, INS_vpermt2ps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512F, Reciprocal14, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrcp14ps, INS_vrcp14pd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512F, MultiplyLow, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmulld, INS_pmulld, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, Or, 64, 2, true, {INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_vporq, INS_vporq, INS_orps, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, Permute2x64, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, Permute4x32, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, Permute4x64, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq, INS_vpermq, INS_invalid, INS_vpermpd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, PermuteVar2x64, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpdvar}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, PermuteVar4x32, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpsvar, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, PermuteVar8x64, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq_reg, INS_vpermq_reg, INS_invalid, INS_vpermpd_reg}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, PermuteVar8x64x2, 64, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2q, INS_vpermt2q, INS_invalid, INS_vpermt2pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, PermuteVar16x32, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermd, INS_vpermd, INS_invalid, INS_invalid, INS_vpermps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, PermuteVar16x32x2, 64, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2d, INS_vpermt2d, INS_invalid, INS_invalid, INS_vpermt2ps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, Reciprocal14, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrcp14ps, INS_vrcp14pd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX512F, Reciprocal14Scalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrcp14ss, INS_vrcp14sd}, HW_Category_SimpleSIMD, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512F, ReciprocalSqrt14, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrsqrt14ps, INS_vrsqrt14pd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512F, ReciprocalSqrt14, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrsqrt14ps, INS_vrsqrt14pd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX512F, ReciprocalSqrt14Scalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrsqrt14ss, INS_vrsqrt14sd}, HW_Category_SimpleSIMD, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512F, RotateLeft, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprold, INS_vprold, INS_vprolq, INS_vprolq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_MaybeNoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F, RotateLeftVariable, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprolvd, INS_vprolvd, INS_vprolvq, INS_vprolvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F, RotateRight, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprord, INS_vprord, INS_vprorq, INS_vprorq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_MaybeNoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F, RotateRightVariable, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprorvd, INS_vprorvd, INS_vprorvq, INS_vprorvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F, RoundScale, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrndscaleps, INS_vrndscalepd}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512F, RotateLeft, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprold, INS_vprold, INS_vprolq, INS_vprolq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_MaybeNoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, RotateLeftVariable, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprolvd, INS_vprolvd, INS_vprolvq, INS_vprolvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, RotateRight, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprord, INS_vprord, INS_vprorq, INS_vprorq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_MaybeNoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, RotateRightVariable, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprorvd, INS_vprorvd, INS_vprorvq, INS_vprorvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, RoundScale, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrndscaleps, INS_vrndscalepd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX512F, RoundScaleScalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrndscaless, INS_vrndscalesd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512F, Scale, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vscalefps, INS_vscalefpd}, HW_Category_SimpleSIMD, HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512F, Scale, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vscalefps, INS_vscalefpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) HARDWARE_INTRINSIC(AVX512F, ScaleScalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vscalefss, INS_vscalefsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, ShiftLeftLogical, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pslld, INS_pslld, INS_psllq, INS_psllq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F, ShiftLeftLogicalVariable, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsllvd, INS_vpsllvd, INS_vpsllvq, INS_vpsllvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(AVX512F, ShiftRightArithmetic, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_psrad, INS_invalid, INS_vpsraq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F, ShiftRightArithmeticVariable, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsravd, INS_invalid, INS_vpsravq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(AVX512F, ShiftRightLogical, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_psrld, INS_psrld, INS_psrlq, INS_psrlq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F, ShiftRightLogicalVariable, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsrlvd, INS_vpsrlvd, INS_vpsrlvq, INS_vpsrlvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(AVX512F, Shuffle, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pshufd, INS_pshufd, INS_invalid, INS_invalid, INS_shufps, INS_shufpd}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F, Shuffle4x128, 64, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vshufi32x4, INS_vshufi32x4, INS_vshufi64x2, INS_vshufi64x2, INS_vshuff32x4, INS_vshuff64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F, Sqrt, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_sqrtpd}, HW_Category_SimpleSIMD, HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512F, ShiftLeftLogical, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pslld, INS_pslld, INS_psllq, INS_psllq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, ShiftLeftLogicalVariable, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsllvd, INS_vpsllvd, INS_vpsllvq, INS_vpsllvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, ShiftRightArithmetic, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_psrad, INS_invalid, INS_vpsraq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, ShiftRightArithmeticVariable, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsravd, INS_invalid, INS_vpsravq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, ShiftRightLogical, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_psrld, INS_psrld, INS_psrlq, INS_psrlq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, ShiftRightLogicalVariable, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsrlvd, INS_vpsrlvd, INS_vpsrlvq, INS_vpsrlvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, Shuffle, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pshufd, INS_pshufd, INS_invalid, INS_invalid, INS_shufps, INS_shufpd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, Shuffle4x128, 64, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vshufi32x4, INS_vshufi32x4, INS_vshufi64x2, INS_vshufi64x2, INS_vshuff32x4, INS_vshuff64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, Sqrt, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_sqrtpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) HARDWARE_INTRINSIC(AVX512F, SqrtScalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtss, INS_sqrtsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, Store, 64, 2, true, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_vmovdqu64, INS_vmovdqu64, INS_movups, INS_movupd}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(AVX512F, Store, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) HARDWARE_INTRINSIC(AVX512F, StoreAligned, 64, 2, true, {INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_vmovdqa64, INS_vmovdqa64, INS_movaps, INS_movapd}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg) HARDWARE_INTRINSIC(AVX512F, StoreAlignedNonTemporal, 64, 2, true, {INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntps, INS_movntpd}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVX512F, Subtract, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_subps, INS_subpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512F, Subtract, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_subps, INS_subpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) HARDWARE_INTRINSIC(AVX512F, SubtractScalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subss, INS_subsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512F, UnpackHigh, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_unpckhps, INS_unpckhpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F, TernaryLogic, 64, 4, true, {INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogq, INS_vpternlogq, INS_vpternlogd, INS_vpternlogq}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F, UnpackLow, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_unpcklps, INS_unpcklpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F, Xor, 64, 2, true, {INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_vpxorq, INS_vpxorq, INS_xorps, INS_xorpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(AVX512F, TernaryLogic, 64, 4, true, {INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogq, INS_vpternlogq, INS_vpternlogd, INS_vpternlogq}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, UnpackHigh, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_unpckhps, INS_unpckhpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, UnpackLow, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_unpcklps, INS_unpcklpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F, Xor, 64, 2, true, {INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_vpxorq, INS_vpxorq, INS_xorps, INS_xorpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVX512F.VL Intrinsics -HARDWARE_INTRINSIC(AVX512F_VL, Abs, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpabsq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512F_VL, AlignRight32, -1, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_valignd, INS_valignd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F_VL, AlignRight64, -1, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_valignq, INS_valignq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F_VL, Max, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmaxsq, INS_vpmaxuq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(AVX512F_VL, Min, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpminsq, INS_vpminuq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(AVX512F_VL, CompareGreaterThan, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpcmpud, INS_invalid, INS_vpcmpuq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(AVX512F_VL, CompareGreaterThanOrEqual, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(AVX512F_VL, CompareLessThan, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(AVX512F_VL, CompareLessThanOrEqual, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(AVX512F_VL, CompareNotEqual, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX512F_VL, Abs, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpabsq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F_VL, AlignRight32, -1, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_valignd, INS_valignd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F_VL, AlignRight64, -1, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_valignq, INS_valignq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F_VL, Max, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmaxsq, INS_vpmaxuq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX512F_VL, Min, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpminsq, INS_vpminuq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX512F_VL, CompareGreaterThan, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512F_VL, CompareGreaterThanOrEqual, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512F_VL, CompareLessThan, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512F_VL, CompareLessThanOrEqual, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512F_VL, CompareNotEqual, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128Byte, -1, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdb, INS_vpmovdb, INS_vpmovqb, INS_vpmovqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128ByteWithSaturation, -1, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusdb, INS_invalid, INS_vpmovusqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128Double, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2pd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128Double, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2pd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128Int16, -1, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_vpmovqw, INS_vpmovqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128Int16WithSaturation, -1, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsdw, INS_invalid, INS_vpmovsqw, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128Int32, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128Int32WithSaturation, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsqd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128SByte, -1, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdb, INS_vpmovdb, INS_vpmovqb, INS_vpmovqb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128SByteWithSaturation, -1, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovsdb, INS_invalid, INS_vpmovsqb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128Single, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) +HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128Single, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128UInt16, -1, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovdw, INS_vpmovdw, INS_vpmovqw, INS_vpmovqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128UInt16WithSaturation, -1, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusdw, INS_invalid, INS_vpmovusqw, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128UInt32, -1, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovqd, INS_vpmovqd, INS_vcvtps2udq, INS_vcvtpd2udq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128UInt32WithSaturation, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmovusqd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128UInt32WithTruncation, -1, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udq, INS_vcvttpd2udq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector256Double, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2pd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector256Single, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector256UInt32, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2udq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector256UInt32WithTruncation, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512F_VL, Fixup, -1, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfixupimmps, INS_vfixupimmpd}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F_VL, GetExponent, -1, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetexpps, INS_vgetexppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F_VL, GetMantissa, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetmantps, INS_vgetmantpd}, HW_Category_IMM, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F_VL, PermuteVar2x64x2, 16, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2q, INS_vpermt2q, INS_invalid, INS_vpermt2pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512F_VL, PermuteVar4x32x2, 16, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2d, INS_vpermt2d, INS_invalid, INS_invalid, INS_vpermt2ps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512F_VL, PermuteVar4x64, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq_reg, INS_vpermq_reg, INS_invalid, INS_vpermpd_reg}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX512F_VL, PermuteVar4x64x2, 32, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2q, INS_vpermt2q, INS_invalid, INS_vpermt2pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512F_VL, PermuteVar8x32x2, 32, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2d, INS_vpermt2d, INS_invalid, INS_invalid, INS_vpermt2ps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512F_VL, Reciprocal14, -1, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrcp14ps, INS_vrcp14pd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F_VL, ReciprocalSqrt14, -1, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrsqrt14ps, INS_vrsqrt14pd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F_VL, RotateLeft, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprold, INS_vprold, INS_vprolq, INS_vprolq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_MaybeNoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F_VL, RotateLeftVariable, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprolvd, INS_vprolvd, INS_vprolvq, INS_vprolvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F_VL, RotateRight, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprord, INS_vprord, INS_vprorq, INS_vprorq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_MaybeNoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F_VL, RotateRightVariable, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprorvd, INS_vprorvd, INS_vprorvq, INS_vprorvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F_VL, RoundScale, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrndscaleps, INS_vrndscalepd}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F_VL, Scale, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vscalefps, INS_vscalefpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F_VL, ShiftRightArithmetic, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsraq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F_VL, ShiftRightArithmeticVariable, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsravq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(AVX512F_VL, Shuffle2x128, 32, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vshufi32x4, INS_vshufi32x4, INS_vshufi64x2, INS_vshufi64x2, INS_vshuff32x4, INS_vshuff64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512F_VL, TernaryLogic, -1, 4, true, {INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogq, INS_vpternlogq, INS_vpternlogd, INS_vpternlogq}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector128UInt32WithTruncation, -1, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udq, INS_vcvttpd2udq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector256Double, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2pd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector256Single, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtudq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector256UInt32, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2udq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F_VL, ConvertToVector256UInt32WithTruncation, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2udq, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F_VL, Fixup, -1, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfixupimmps, INS_vfixupimmpd}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F_VL, GetExponent, -1, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetexpps, INS_vgetexppd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F_VL, GetMantissa, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetmantps, INS_vgetmantpd}, HW_Category_IMM, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F_VL, PermuteVar2x64x2, 16, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2q, INS_vpermt2q, INS_invalid, INS_vpermt2pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F_VL, PermuteVar4x32x2, 16, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2d, INS_vpermt2d, INS_invalid, INS_invalid, INS_vpermt2ps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F_VL, PermuteVar4x64, 32, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq_reg, INS_vpermq_reg, INS_invalid, INS_vpermpd_reg}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F_VL, PermuteVar4x64x2, 32, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2q, INS_vpermt2q, INS_invalid, INS_vpermt2pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F_VL, PermuteVar8x32x2, 32, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermt2d, INS_vpermt2d, INS_invalid, INS_invalid, INS_vpermt2ps, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F_VL, Reciprocal14, -1, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrcp14ps, INS_vrcp14pd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F_VL, ReciprocalSqrt14, -1, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrsqrt14ps, INS_vrsqrt14pd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F_VL, RotateLeft, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprold, INS_vprold, INS_vprolq, INS_vprolq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_MaybeNoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F_VL, RotateLeftVariable, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprolvd, INS_vprolvd, INS_vprolvq, INS_vprolvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F_VL, RotateRight, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprord, INS_vprord, INS_vprorq, INS_vprorq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_MaybeNoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F_VL, RotateRightVariable, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vprorvd, INS_vprorvd, INS_vprorvq, INS_vprorvq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F_VL, RoundScale, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrndscaleps, INS_vrndscalepd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F_VL, Scale, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vscalefps, INS_vscalefpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F_VL, ShiftRightArithmetic, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsraq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F_VL, ShiftRightArithmeticVariable, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsravq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F_VL, Shuffle2x128, 32, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vshufi32x4, INS_vshufi32x4, INS_vshufi64x2, INS_vshufi64x2, INS_vshuff32x4, INS_vshuff64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512F_VL, TernaryLogic, -1, 4, true, {INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogq, INS_vpternlogq, INS_vpternlogd, INS_vpternlogq}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags @@ -1050,121 +1051,121 @@ HARDWARE_INTRINSIC(AVX512F_X64, ConvertToUInt64WithTruncation, // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVX512BW Intrinsics -HARDWARE_INTRINSIC(AVX512BW, Abs, 64, 1, true, {INS_pabsb, INS_invalid, INS_pabsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512BW, Add, 64, 2, true, {INS_paddb, INS_paddb, INS_paddw, INS_paddw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX512BW, AddSaturate, 64, 2, true, {INS_paddsb, INS_paddusb, INS_paddsw, INS_paddusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX512BW, AlignRight, 64, 3, false, {INS_palignr, INS_palignr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512BW, Average, 64, 2, true, {INS_invalid, INS_pavgb, INS_invalid, INS_pavgw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX512BW, BlendVariable, 64, 3, true, {INS_vpblendmb, INS_vpblendmb, INS_vpblendmw, INS_vpblendmw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) +HARDWARE_INTRINSIC(AVX512BW, Abs, 64, 1, true, {INS_pabsb, INS_invalid, INS_pabsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512BW, Add, 64, 2, true, {INS_paddb, INS_paddb, INS_paddw, INS_paddw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512BW, AddSaturate, 64, 2, true, {INS_paddsb, INS_paddusb, INS_paddsw, INS_paddusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512BW, AlignRight, 64, 3, false, {INS_palignr, INS_palignr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512BW, Average, 64, 2, true, {INS_invalid, INS_pavgb, INS_invalid, INS_pavgw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512BW, BlendVariable, 64, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(AVX512BW, BroadcastScalarToVector512, 64, 1, true, {INS_vpbroadcastb, INS_vpbroadcastb, INS_vpbroadcastw, INS_vpbroadcastw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_MaybeMemoryLoad) -HARDWARE_INTRINSIC(AVX512BW, CompareEqual, 64, 2, true, {INS_vpcmpeqb, INS_vpcmpeqb, INS_vpcmpeqw, INS_vpcmpeqw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX512BW, CompareGreaterThan, 64, 2, true, {INS_vpcmpgtb, INS_vpcmpub, INS_vpcmpgtw, INS_vpcmpuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(AVX512BW, CompareGreaterThanOrEqual, 64, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(AVX512BW, CompareLessThan, 64, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(AVX512BW, CompareLessThanOrEqual, 64, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(AVX512BW, CompareNotEqual, 64, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX512BW, CompareEqual, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512BW, CompareGreaterThan, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512BW, CompareGreaterThanOrEqual, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512BW, CompareLessThan, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512BW, CompareLessThanOrEqual, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512BW, CompareNotEqual, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(AVX512BW, ConvertToVector256Byte, 64, 1, false, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX512BW, ConvertToVector256ByteWithSaturation, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_vpmovuswb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX512BW, ConvertToVector256SByte, 64, 1, false, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX512BW, ConvertToVector256SByteWithSaturation, 64, 1, false, {INS_invalid, INS_invalid, INS_vpmovswb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX512BW, ConvertToVector512Int16, 64, 1, true, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) HARDWARE_INTRINSIC(AVX512BW, ConvertToVector512UInt16, 64, 1, true, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512BW, LoadVector512, 64, 1, true, {INS_vmovdqu8, INS_vmovdqu8, INS_vmovdqu16, INS_vmovdqu16, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(AVX512BW, Max, 64, 2, true, {INS_pmaxsb, INS_pmaxub, INS_pmaxsw, INS_pmaxuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX512BW, Min, 64, 2, true, {INS_pminsb, INS_pminub, INS_pminsw, INS_pminuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX512BW, MultiplyAddAdjacent, 64, 2, true, {INS_invalid, INS_invalid, INS_pmaddubsw, INS_invalid, INS_pmaddwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512BW, MultiplyHigh, 64, 2, true, {INS_invalid, INS_invalid, INS_pmulhw, INS_pmulhuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX512BW, MultiplyHighRoundScale, 64, 2, false, {INS_invalid, INS_invalid, INS_pmulhrsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512BW, MultiplyLow, 64, 2, false, {INS_invalid, INS_invalid, INS_pmullw, INS_pmullw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX512BW, PackSignedSaturate, 64, 2, true, {INS_packsswb, INS_invalid, INS_packssdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512BW, PackUnsignedSaturate, 64, 2, true, {INS_invalid, INS_packuswb, INS_invalid, INS_packusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512BW, PermuteVar32x16, 64, 2, false, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX512BW, PermuteVar32x16x2, 64, 3, false, {INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512BW, ShiftLeftLogical, 64, 2, false, {INS_invalid, INS_invalid, INS_psllw, INS_psllw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512BW, LoadVector512, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512BW, Max, 64, 2, true, {INS_pmaxsb, INS_pmaxub, INS_pmaxsw, INS_pmaxuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512BW, Min, 64, 2, true, {INS_pminsb, INS_pminub, INS_pminsw, INS_pminuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512BW, MultiplyAddAdjacent, 64, 2, true, {INS_invalid, INS_invalid, INS_pmaddubsw, INS_invalid, INS_pmaddwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512BW, MultiplyHigh, 64, 2, true, {INS_invalid, INS_invalid, INS_pmulhw, INS_pmulhuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512BW, MultiplyHighRoundScale, 64, 2, false, {INS_invalid, INS_invalid, INS_pmulhrsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512BW, MultiplyLow, 64, 2, false, {INS_invalid, INS_invalid, INS_pmullw, INS_pmullw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512BW, PackSignedSaturate, 64, 2, true, {INS_packsswb, INS_invalid, INS_packssdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512BW, PackUnsignedSaturate, 64, 2, true, {INS_invalid, INS_packuswb, INS_invalid, INS_packusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512BW, PermuteVar32x16, 64, 2, false, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512BW, PermuteVar32x16x2, 64, 3, false, {INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512BW, ShiftLeftLogical, 64, 2, false, {INS_invalid, INS_invalid, INS_psllw, INS_psllw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX512BW, ShiftLeftLogical128BitLane, 64, 2, false, {INS_pslldq, INS_pslldq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512BW, ShiftLeftLogicalVariable, 64, 2, false, {INS_invalid, INS_invalid, INS_vpsllvw, INS_vpsllvw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512BW, ShiftRightArithmetic, 64, 2, false, {INS_invalid, INS_invalid, INS_psraw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512BW, ShiftRightArithmeticVariable, 64, 2, false, {INS_invalid, INS_invalid, INS_vpsravw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512BW, ShiftRightLogical, 64, 2, false, {INS_invalid, INS_invalid, INS_psrlw, INS_psrlw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512BW, ShiftLeftLogicalVariable, 64, 2, false, {INS_invalid, INS_invalid, INS_vpsllvw, INS_vpsllvw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512BW, ShiftRightArithmetic, 64, 2, false, {INS_invalid, INS_invalid, INS_psraw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512BW, ShiftRightArithmeticVariable, 64, 2, false, {INS_invalid, INS_invalid, INS_vpsravw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512BW, ShiftRightLogical, 64, 2, false, {INS_invalid, INS_invalid, INS_psrlw, INS_psrlw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX512BW, ShiftRightLogical128BitLane, 64, 2, false, {INS_psrldq, INS_psrldq, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512BW, ShiftRightLogicalVariable, 64, 2, false, {INS_invalid, INS_invalid, INS_vpsrlvw, INS_vpsrlvw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512BW, Shuffle, 64, 2, false, {INS_pshufb, INS_pshufb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512BW, ShuffleHigh, 64, 2, false, {INS_invalid, INS_invalid, INS_pshufhw, INS_pshufhw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512BW, ShuffleLow, 64, 2, false, {INS_invalid, INS_invalid, INS_pshuflw, INS_pshuflw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512BW, Store, 64, 2, true, {INS_vmovdqu8, INS_vmovdqu8, INS_vmovdqu16, INS_vmovdqu16, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(AVX512BW, Subtract, 64, 2, true, {INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512BW, SubtractSaturate, 64, 2, true, {INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512BW, ShiftRightLogicalVariable, 64, 2, false, {INS_invalid, INS_invalid, INS_vpsrlvw, INS_vpsrlvw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512BW, Shuffle, 64, 2, false, {INS_pshufb, INS_pshufb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512BW, ShuffleHigh, 64, 2, false, {INS_invalid, INS_invalid, INS_pshufhw, INS_pshufhw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512BW, ShuffleLow, 64, 2, false, {INS_invalid, INS_invalid, INS_pshuflw, INS_pshuflw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512BW, Store, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVX512BW, Subtract, 64, 2, true, {INS_psubb, INS_psubb, INS_psubw, INS_psubw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512BW, SubtractSaturate, 64, 2, true, {INS_psubsb, INS_psubusb, INS_psubsw, INS_psubusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX512BW, SumAbsoluteDifferences, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_psadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512BW, SumAbsoluteDifferencesInBlock32, 64, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_vdbpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512BW, UnpackHigh, 64, 2, true, {INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512BW, UnpackLow, 64, 2, true, {INS_punpcklbw, INS_punpcklbw, INS_punpcklwd, INS_punpcklwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512BW, SumAbsoluteDifferencesInBlock32, 64, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_vdbpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512BW, UnpackHigh, 64, 2, true, {INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512BW, UnpackLow, 64, 2, true, {INS_punpcklbw, INS_punpcklbw, INS_punpcklwd, INS_punpcklwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVX512BW.VL Intrinsics -HARDWARE_INTRINSIC(AVX512BW_VL, CompareGreaterThan, -1, 2, true, {INS_invalid, INS_vpcmpub, INS_invalid, INS_vpcmpuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(AVX512BW_VL, CompareGreaterThanOrEqual, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(AVX512BW_VL, CompareLessThan, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(AVX512BW_VL, CompareLessThanOrEqual, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialImport|HW_Flag_NoCodeGen) -HARDWARE_INTRINSIC(AVX512BW_VL, CompareNotEqual, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative) +HARDWARE_INTRINSIC(AVX512BW_VL, CompareGreaterThan, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512BW_VL, CompareGreaterThanOrEqual, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512BW_VL, CompareLessThan, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512BW_VL, CompareLessThanOrEqual, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) +HARDWARE_INTRINSIC(AVX512BW_VL, CompareNotEqual, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId) HARDWARE_INTRINSIC(AVX512BW_VL, ConvertToVector128Byte, -1, 1, false, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX512BW_VL, ConvertToVector128ByteWithSaturation, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_vpmovuswb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX512BW_VL, ConvertToVector128SByte, -1, 1, false, {INS_invalid, INS_invalid, INS_vpmovwb, INS_vpmovwb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(AVX512BW_VL, ConvertToVector128SByteWithSaturation, -1, 1, false, {INS_invalid, INS_invalid, INS_vpmovswb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(AVX512BW_VL, PermuteVar8x16 , 16, 2, false, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX512BW_VL, PermuteVar8x16x2, 16, 3, false, {INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512BW_VL, PermuteVar16x16, 32, 2, false, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX512BW_VL, PermuteVar16x16x2, 32, 3, false, {INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512BW_VL, ShiftLeftLogicalVariable, -1, 2, false, {INS_invalid, INS_invalid, INS_vpsllvw, INS_vpsllvw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512BW_VL, ShiftRightArithmeticVariable, -1, 2, false, {INS_invalid, INS_invalid, INS_vpsravw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512BW_VL, ShiftRightLogicalVariable, -1, 2, false, {INS_invalid, INS_invalid, INS_vpsrlvw, INS_vpsrlvw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512BW_VL, SumAbsoluteDifferencesInBlock32, -1, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_vdbpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512BW_VL, PermuteVar8x16 , 16, 2, false, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512BW_VL, PermuteVar8x16x2, 16, 3, false, {INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512BW_VL, PermuteVar16x16, 32, 2, false, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512BW_VL, PermuteVar16x16x2, 32, 3, false, {INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512BW_VL, ShiftLeftLogicalVariable, -1, 2, false, {INS_invalid, INS_invalid, INS_vpsllvw, INS_vpsllvw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512BW_VL, ShiftRightArithmeticVariable, -1, 2, false, {INS_invalid, INS_invalid, INS_vpsravw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512BW_VL, ShiftRightLogicalVariable, -1, 2, false, {INS_invalid, INS_invalid, INS_vpsrlvw, INS_vpsrlvw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512BW_VL, SumAbsoluteDifferencesInBlock32, -1, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_vdbpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVX512CD Intrinsics -HARDWARE_INTRINSIC(AVX512CD, DetectConflicts, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpconflictd, INS_vpconflictd, INS_vpconflictq, INS_vpconflictq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512CD, LeadingZeroCount, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vplzcntd, INS_vplzcntd, INS_vplzcntq, INS_vplzcntq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512CD, DetectConflicts, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpconflictd, INS_vpconflictd, INS_vpconflictq, INS_vpconflictq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512CD, LeadingZeroCount, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vplzcntd, INS_vplzcntd, INS_vplzcntq, INS_vplzcntq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVX512CD.VL Intrinsics -HARDWARE_INTRINSIC(AVX512CD_VL, DetectConflicts, -1, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpconflictd, INS_vpconflictd, INS_vpconflictq, INS_vpconflictq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512CD_VL, LeadingZeroCount, -1, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vplzcntd, INS_vplzcntd, INS_vplzcntq, INS_vplzcntq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512CD_VL, DetectConflicts, -1, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpconflictd, INS_vpconflictd, INS_vpconflictq, INS_vpconflictq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512CD_VL, LeadingZeroCount, -1, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vplzcntd, INS_vplzcntd, INS_vplzcntq, INS_vplzcntq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVX512DQ Intrinsics -HARDWARE_INTRINSIC(AVX512DQ, And, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andps, INS_andpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX512DQ, AndNot, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andnps, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(AVX512DQ, And, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andps, INS_andpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512DQ, AndNot, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_andnps, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX512DQ, BroadcastPairScalarToVector512, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x2, INS_vbroadcasti32x2, INS_invalid, INS_invalid, INS_vbroadcastf32x2, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX512DQ, BroadcastVector128ToVector512, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti64x2, INS_vbroadcasti64x2, INS_invalid, INS_vbroadcastf64x2}, HW_Category_MemoryLoad, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX512DQ, BroadcastVector256ToVector512, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x8, INS_vbroadcasti32x8, INS_invalid, INS_invalid, INS_vbroadcastf32x8, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector256Single, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2ps, INS_vcvtuqq2ps, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector512Double, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector512Int64, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector512Int64WithTruncation, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector512UInt64, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector512UInt64WithTruncation, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512DQ, ExtractVector128, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextracti64x2, INS_vextracti64x2, INS_invalid, INS_vextractf64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512DQ, ExtractVector256, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextracti32x8, INS_vextracti32x8, INS_invalid, INS_invalid, INS_vextractf32x8, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512DQ, InsertVector128, 64, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinserti64x2, INS_vinserti64x2, INS_invalid, INS_vinsertf64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512DQ, InsertVector256, 64, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinserti32x8, INS_vinserti32x8, INS_invalid, INS_invalid, INS_vinsertf32x8, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512DQ, MultiplyLow, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmullq, INS_vpmullq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(AVX512DQ, Or, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_orps, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(AVX512DQ, Range, 64, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrangeps, INS_vrangepd}, HW_Category_IMM, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector256Single, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2ps, INS_vcvtuqq2ps, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector512Double, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector512Int64, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector512Int64WithTruncation, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector512UInt64, 64, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible) +HARDWARE_INTRINSIC(AVX512DQ, ConvertToVector512UInt64WithTruncation, 64, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512DQ, ExtractVector128, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextracti64x2, INS_vextracti64x2, INS_invalid, INS_vextractf64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512DQ, ExtractVector256, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vextracti32x8, INS_vextracti32x8, INS_invalid, INS_invalid, INS_vextractf32x8, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512DQ, InsertVector128, 64, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinserti64x2, INS_vinserti64x2, INS_invalid, INS_vinsertf64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512DQ, InsertVector256, 64, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vinserti32x8, INS_vinserti32x8, INS_invalid, INS_invalid, INS_vinsertf32x8, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512DQ, MultiplyLow, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmullq, INS_vpmullq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512DQ, Or, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_orps, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512DQ, Range, 64, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrangeps, INS_vrangepd}, HW_Category_IMM, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX512DQ, RangeScalar, 16, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrangess, INS_vrangesd}, HW_Category_IMM, HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512DQ, Reduce, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vreduceps, INS_vreducepd}, HW_Category_IMM, HW_Flag_FullRangeIMM) +HARDWARE_INTRINSIC(AVX512DQ, Reduce, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vreduceps, INS_vreducepd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX512DQ, ReduceScalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vreducess, INS_vreducesd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX512DQ, Xor, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_xorps, INS_xorpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(AVX512DQ, Xor, 64, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_xorps, INS_xorpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags @@ -1173,46 +1174,46 @@ HARDWARE_INTRINSIC(AVX512DQ, Xor, // AVX512DQ.VL Intrinsics HARDWARE_INTRINSIC(AVX512DQ_VL, BroadcastPairScalarToVector128, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x2, INS_vbroadcasti32x2, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) HARDWARE_INTRINSIC(AVX512DQ_VL, BroadcastPairScalarToVector256, 32, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti32x2, INS_vbroadcasti32x2, INS_invalid, INS_invalid, INS_vbroadcastf32x2, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector128Double, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector128Int64, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector128Int64WithTruncation, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector128Single, -1, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2ps, INS_vcvtuqq2ps, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector128UInt64, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector128UInt64WithTruncation, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector256Double, 32, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector256Int64, 32, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector256Int64WithTruncation, 32, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector256UInt64, 32, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector256UInt64WithTruncation, 32, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg) -HARDWARE_INTRINSIC(AVX512DQ_VL, Range, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrangeps, INS_vrangepd}, HW_Category_IMM, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512DQ_VL, Reduce, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vreduceps, INS_vreducepd}, HW_Category_IMM, HW_Flag_FullRangeIMM) -HARDWARE_INTRINSIC(AVX512DQ_VL, MultiplyLow, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmullq, INS_vpmullq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector128Double, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector128Int64, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector128Int64WithTruncation, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector128Single, -1, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2ps, INS_vcvtuqq2ps, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector128UInt64, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector128UInt64WithTruncation, 16, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector256Double, 32, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtqq2pd, INS_vcvtuqq2pd, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector256Int64, 32, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2qq, INS_vcvtpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector256Int64WithTruncation, 32, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2qq, INS_vcvttpd2qq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector256UInt64, 32, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvtps2uqq, INS_vcvtpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512DQ_VL, ConvertToVector256UInt64WithTruncation, 32, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcvttps2uqq, INS_vcvttpd2uqq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512DQ_VL, Range, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vrangeps, INS_vrangepd}, HW_Category_IMM, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512DQ_VL, Reduce, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vreduceps, INS_vreducepd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512DQ_VL, MultiplyLow, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmullq, INS_vpmullq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVX512VBMI Intrinsics -HARDWARE_INTRINSIC(AVX512VBMI, PermuteVar64x8, 64, 2, false, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX512VBMI, PermuteVar64x8x2, 64, 3, false, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512VBMI, PermuteVar64x8, 64, 2, false, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512VBMI, PermuteVar64x8x2, 64, 3, false, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbMaskingCompatible) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVX512VBMI.VL Intrinsics -HARDWARE_INTRINSIC(AVX512VBMI_VL, PermuteVar16x8, 16, 2, false, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX512VBMI_VL, PermuteVar16x8x2, 16, 3, false, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(AVX512VBMI_VL, PermuteVar32x8, 32, 2, false, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport) -HARDWARE_INTRINSIC(AVX512VBMI_VL, PermuteVar32x8x2, 32, 3, false, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(AVX512VBMI_VL, PermuteVar16x8, 16, 2, false, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512VBMI_VL, PermuteVar16x8x2, 16, 3, false, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512VBMI_VL, PermuteVar32x8, 32, 2, false, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(AVX512VBMI_VL, PermuteVar32x8x2, 32, 3, false, {INS_vpermt2b, INS_vpermt2b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbMaskingCompatible) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // AVXVNNI Intrinsics -HARDWARE_INTRINSIC(AVXVNNI, MultiplyWideningAndAdd, -1, 3, true, {INS_invalid, INS_vpdpbusd, INS_vpdpwssd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFloatingPointUsed|HW_Flag_BaseTypeFromSecondArg) -HARDWARE_INTRINSIC(AVXVNNI, MultiplyWideningAndAddSaturate, -1, 3, true, {INS_invalid, INS_vpdpbusds, INS_vpdpwssds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFloatingPointUsed|HW_Flag_BaseTypeFromSecondArg) +HARDWARE_INTRINSIC(AVXVNNI, MultiplyWideningAndAdd, -1, 3, true, {INS_invalid, INS_vpdpbusd, INS_vpdpwssd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(AVXVNNI, MultiplyWideningAndAddSaturate, -1, 3, true, {INS_invalid, INS_vpdpbusds, INS_vpdpwssds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromSecondArg|HW_Flag_NoEvexSemantics) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags @@ -1235,8 +1236,8 @@ HARDWARE_INTRINSIC(BMI1, AndNot, HARDWARE_INTRINSIC(BMI1, ExtractLowestSetBit, 0, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blsi, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(BMI1, GetMaskUpToLowestSetBit, 0, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blsmsk, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed) HARDWARE_INTRINSIC(BMI1, ResetLowestSetBit, 0, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blsr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed) -HARDWARE_INTRINSIC(BMI1, TrailingZeroCount, 0, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_tzcnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_MultiIns) -HARDWARE_INTRINSIC(BMI1, BitFieldExtract, 0, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bextr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_MultiIns|HW_Flag_SpecialImport|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(BMI1, TrailingZeroCount, 0, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_tzcnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(BMI1, BitFieldExtract, 0, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bextr, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoEvexSemantics) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags @@ -1247,8 +1248,8 @@ HARDWARE_INTRINSIC(BMI1_X64, AndNot, HARDWARE_INTRINSIC(BMI1_X64, ExtractLowestSetBit, 0, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blsi, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(BMI1_X64, GetMaskUpToLowestSetBit, 0, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blsmsk, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed) HARDWARE_INTRINSIC(BMI1_X64, ResetLowestSetBit, 0, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_blsr, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed) -HARDWARE_INTRINSIC(BMI1_X64, TrailingZeroCount, 0, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_tzcnt, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_MultiIns) -HARDWARE_INTRINSIC(BMI1_X64, BitFieldExtract, 0, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bextr, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_MultiIns|HW_Flag_SpecialImport|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(BMI1_X64, TrailingZeroCount, 0, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_tzcnt, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(BMI1_X64, BitFieldExtract, 0, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bextr, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_NoEvexSemantics) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags @@ -1258,7 +1259,7 @@ HARDWARE_INTRINSIC(BMI1_X64, BitFieldExtract, HARDWARE_INTRINSIC(BMI2, ParallelBitDeposit, 0, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pdep, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(BMI2, ParallelBitExtract, 0, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pext, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(BMI2, ZeroHighBits, 0, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bzhi, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialImport|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(BMI2, MultiplyNoFlags, 0, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoContainment|HW_Flag_MaybeMemoryStore|HW_Flag_MultiIns|HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(BMI2, MultiplyNoFlags, 0, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulx, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoContainment|HW_Flag_MaybeMemoryStore|HW_Flag_SpecialCodeGen|HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags @@ -1268,37 +1269,37 @@ HARDWARE_INTRINSIC(BMI2, MultiplyNoFlags, HARDWARE_INTRINSIC(BMI2_X64, ParallelBitDeposit, 0, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pdep, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(BMI2_X64, ParallelBitExtract, 0, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pext, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(BMI2_X64, ZeroHighBits, 0, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_bzhi, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_SpecialImport|HW_Flag_NoEvexSemantics) -HARDWARE_INTRINSIC(BMI2_X64, MultiplyNoFlags, 0, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulx, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoContainment|HW_Flag_MaybeMemoryStore|HW_Flag_MultiIns|HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) +HARDWARE_INTRINSIC(BMI2_X64, MultiplyNoFlags, 0, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulx, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoContainment|HW_Flag_MaybeMemoryStore|HW_Flag_SpecialCodeGen|HW_Flag_NoFloatingPointUsed|HW_Flag_NoEvexSemantics) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // FMA Intrinsics -HARDWARE_INTRINSIC(FMA, MultiplyAdd, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ps, INS_vfmadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(FMA, MultiplyAddNegated, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ps, INS_vfnmadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(FMA, MultiplyAddNegatedScalar, 16, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ss, INS_vfnmadd213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_CopyUpperBits|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(FMA, MultiplyAddScalar, 16, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ss, INS_vfmadd213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_CopyUpperBits|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(FMA, MultiplyAddSubtract, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmaddsub213ps, INS_vfmaddsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(FMA, MultiplySubtract, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ps, INS_vfmsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(FMA, MultiplySubtractAdd, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsubadd213ps, INS_vfmsubadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(FMA, MultiplySubtractNegated, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ps, INS_vfnmsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(FMA, MultiplySubtractScalar, 16, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ss, INS_vfmsub213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_CopyUpperBits|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) -HARDWARE_INTRINSIC(FMA, MultiplySubtractNegatedScalar, 16, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ss, INS_vfnmsub213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_CopyUpperBits|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic) +HARDWARE_INTRINSIC(FMA, MultiplyAdd, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ps, INS_vfmadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(FMA, MultiplyAddNegated, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ps, INS_vfnmadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(FMA, MultiplyAddNegatedScalar, 16, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ss, INS_vfnmadd213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(FMA, MultiplyAddScalar, 16, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ss, INS_vfmadd213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(FMA, MultiplyAddSubtract, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmaddsub213ps, INS_vfmaddsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(FMA, MultiplySubtract, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ps, INS_vfmsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(FMA, MultiplySubtractAdd, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsubadd213ps, INS_vfmsubadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(FMA, MultiplySubtractNegated, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ps, INS_vfnmsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) +HARDWARE_INTRINSIC(FMA, MultiplySubtractScalar, 16, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ss, INS_vfmsub213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits) +HARDWARE_INTRINSIC(FMA, MultiplySubtractNegatedScalar, 16, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ss, INS_vfnmsub213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // LZCNT Intrinsics -HARDWARE_INTRINSIC(LZCNT, LeadingZeroCount, 0, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_lzcnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns) +HARDWARE_INTRINSIC(LZCNT, LeadingZeroCount, 0, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_lzcnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // LZCNT Intrinsics -HARDWARE_INTRINSIC(LZCNT_X64, LeadingZeroCount, 0, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_lzcnt, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns) +HARDWARE_INTRINSIC(LZCNT_X64, LeadingZeroCount, 0, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_lzcnt, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags @@ -1312,14 +1313,14 @@ HARDWARE_INTRINSIC(PCLMULQDQ, CarrylessMultiply, // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // POPCNT Intrinsics -HARDWARE_INTRINSIC(POPCNT, PopCount, 0, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_popcnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns) +HARDWARE_INTRINSIC(POPCNT, PopCount, 0, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_popcnt, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags // {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE} // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // POPCNT Intrinsics -HARDWARE_INTRINSIC(POPCNT_X64, PopCount, 0, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_popcnt, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns) +HARDWARE_INTRINSIC(POPCNT_X64, PopCount, 0, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_popcnt, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen) // *************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************** // ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags @@ -1342,25 +1343,26 @@ HARDWARE_INTRINSIC(SSE41, PTEST, HARDWARE_INTRINSIC(AVX, PTEST, 0, 2, false, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX512F, KORTEST, 0, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment) HARDWARE_INTRINSIC(AVX512F, KTEST, 0, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment) -HARDWARE_INTRINSIC(AVX512F, PTESTM, 0, 2, false, {INS_vptestmb, INS_vptestmb, INS_vptestmw, INS_vptestmw, INS_vptestmd, INS_vptestmd, INS_vptestmq, INS_vptestmq, INS_vptestmd, INS_vptestmq}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F, PTESTNM, 0, 2, false, {INS_vptestnmb, INS_vptestnmb, INS_vptestnmw, INS_vptestnmw, INS_vptestnmd, INS_vptestnmd, INS_vptestnmq, INS_vptestnmq, INS_vptestnmd, INS_vptestnmq}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) +HARDWARE_INTRINSIC(AVX512F, PTESTM, 0, 2, false, {INS_vptestmb, INS_vptestmb, INS_vptestmw, INS_vptestmw, INS_vptestmd, INS_vptestmd, INS_vptestmq, INS_vptestmq, INS_vptestmd, INS_vptestmq}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(AVX512F, PTESTNM, 0, 2, false, {INS_vptestnmb, INS_vptestnmb, INS_vptestnmw, INS_vptestnmw, INS_vptestnmd, INS_vptestnmd, INS_vptestnmq, INS_vptestnmq, INS_vptestnmd, INS_vptestnmq}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible) HARDWARE_INTRINSIC(AVX512F, AddMask, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(AVX512F, AndMask, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(AVX512F, AndNotMask, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX512F, BlendVariableMask, -1, 3, true, {INS_vpblendmb, INS_vpblendmb, INS_vpblendmw, INS_vpblendmw, INS_vpblendmd, INS_vpblendmd, INS_vpblendmq, INS_vpblendmq, INS_vblendmps, INS_vblendmpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag) -HARDWARE_INTRINSIC(AVX512F, CompareEqualMask, -1, 2, true, {INS_vpcmpeqb, INS_vpcmpeqb, INS_vpcmpeqw, INS_vpcmpeqw, INS_vpcmpeqd, INS_vpcmpeqd, INS_vpcmpeqq, INS_vpcmpeqq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(AVX512F, CompareGreaterThanMask, -1, 2, true, {INS_vpcmpgtb, INS_vpcmpub, INS_vpcmpgtw, INS_vpcmpuw, INS_vpcmpgtd, INS_vpcmpud, INS_vpcmpgtq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX512F, CompareGreaterThanOrEqualMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX512F, CompareLessThanMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX512F, CompareLessThanOrEqualMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX512F, CompareNotEqualMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(AVX512F, CompareNotGreaterThanMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX512F, CompareNotGreaterThanOrEqualMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX512F, CompareNotLessThanMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX512F, CompareNotLessThanOrEqualMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX512F, CompareOrderedMask, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(AVX512F, CompareUnorderedMask, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(AVX512F, BlendVariableMask, -1, 3, true, {INS_vpblendmb, INS_vpblendmb, INS_vpblendmw, INS_vpblendmw, INS_vpblendmd, INS_vpblendmd, INS_vpblendmq, INS_vpblendmq, INS_vblendmps, INS_vblendmpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(AVX512F, CompareMask, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd}, HW_Category_IMM, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(AVX512F, CompareEqualMask, -1, 2, true, {INS_vpcmpeqb, INS_vpcmpeqb, INS_vpcmpeqw, INS_vpcmpeqw, INS_vpcmpeqd, INS_vpcmpeqd, INS_vpcmpeqq, INS_vpcmpeqq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(AVX512F, CompareGreaterThanMask, -1, 2, true, {INS_vpcmpgtb, INS_vpcmpub, INS_vpcmpgtw, INS_vpcmpuw, INS_vpcmpgtd, INS_vpcmpud, INS_vpcmpgtq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(AVX512F, CompareGreaterThanOrEqualMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(AVX512F, CompareLessThanMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(AVX512F, CompareLessThanOrEqualMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(AVX512F, CompareNotEqualMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(AVX512F, CompareNotGreaterThanMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(AVX512F, CompareNotGreaterThanOrEqualMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(AVX512F, CompareNotLessThanMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(AVX512F, CompareNotLessThanOrEqualMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(AVX512F, CompareOrderedMask, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(AVX512F, CompareUnorderedMask, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) HARDWARE_INTRINSIC(AVX512F, ConvertMaskToVector, -1, 1, true, {INS_vpmovm2b, INS_vpmovm2b, INS_vpmovm2w, INS_vpmovm2w, INS_vpmovm2d, INS_vpmovm2d, INS_vpmovm2q, INS_vpmovm2q, INS_vpmovm2d, INS_vpmovm2q}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(AVX512F, ConvertVectorToMask, -1, 1, true, {INS_vpmovb2m, INS_vpmovb2m, INS_vpmovw2m, INS_vpmovw2m, INS_vpmovd2m, INS_vpmovd2m, INS_vpmovq2m, INS_vpmovq2m, INS_vpmovd2m, INS_vpmovq2m}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(AVX512F, MoveMask, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment) diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 553e1bdf78366a..19062dde2d7b77 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -301,6 +301,8 @@ int HWIntrinsicInfo::lookupImmUpperBound(NamedIntrinsic id) { case NI_AVX_Compare: case NI_AVX_CompareScalar: + case NI_AVX512F_Compare: + case NI_AVX512F_CompareMask: { assert(!HWIntrinsicInfo::HasFullRangeImm(id)); return 31; // enum FloatComparisonMode has 32 values @@ -604,18 +606,11 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim { if (varTypeIsFloating(simdBaseType)) { - if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX)) - { - return static_cast(FloatComparisonMode::OrderedGreaterThanSignaling); - } - // CompareGreaterThan is not directly supported in hardware without AVX support. - // We will return the inverted case here and lowering will itself swap the ops - // to ensure the emitted code remains correct. This simplifies the overall logic - // here and for other use cases. + // Lowering ensures we swap the operands and change to the correct ID. - assert(id != NI_AVX_CompareGreaterThan); - return static_cast(FloatComparisonMode::OrderedLessThanSignaling); + assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX)); + return static_cast(FloatComparisonMode::OrderedGreaterThanSignaling); } else if ((id == NI_AVX512F_CompareGreaterThanMask) && varTypeIsUnsigned(simdBaseType)) { @@ -653,18 +648,11 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim { if (varTypeIsFloating(simdBaseType)) { - if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX)) - { - return static_cast(FloatComparisonMode::OrderedGreaterThanOrEqualSignaling); - } - // CompareGreaterThanOrEqual is not directly supported in hardware without AVX support. - // We will return the inverted case here and lowering will itself swap the ops - // to ensure the emitted code remains correct. This simplifies the overall logic - // here and for other use cases. + // Lowering ensures we swap the operands and change to the correct ID. - assert(id != NI_AVX_CompareGreaterThanOrEqual); - return static_cast(FloatComparisonMode::OrderedLessThanOrEqualSignaling); + assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX)); + return static_cast(FloatComparisonMode::OrderedGreaterThanOrEqualSignaling); } else { @@ -721,18 +709,11 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim { if (varTypeIsFloating(simdBaseType)) { - if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX)) - { - return static_cast(FloatComparisonMode::UnorderedNotGreaterThanSignaling); - } - // CompareNotGreaterThan is not directly supported in hardware without AVX support. - // We will return the inverted case here and lowering will itself swap the ops - // to ensure the emitted code remains correct. This simplifies the overall logic - // here and for other use cases. + // Lowering ensures we swap the operands and change to the correct ID. - assert(id != NI_AVX_CompareGreaterThan); - return static_cast(FloatComparisonMode::UnorderedNotLessThanSignaling); + assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX)); + return static_cast(FloatComparisonMode::UnorderedNotGreaterThanSignaling); } else { @@ -770,18 +751,11 @@ int HWIntrinsicInfo::lookupIval(Compiler* comp, NamedIntrinsic id, var_types sim { if (varTypeIsFloating(simdBaseType)) { - if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX)) - { - return static_cast(FloatComparisonMode::UnorderedNotGreaterThanOrEqualSignaling); - } - // CompareNotGreaterThanOrEqual is not directly supported in hardware without AVX support. - // We will return the inverted case here and lowering will itself swap the ops - // to ensure the emitted code remains correct. This simplifies the overall logic - // here and for other use cases. + // Lowering ensures we swap the operands and change to the correct ID. - assert(id != NI_AVX_CompareNotGreaterThanOrEqual); - return static_cast(FloatComparisonMode::UnorderedNotLessThanOrEqualSignaling); + assert(comp->compIsaSupportedDebugOnly(InstructionSet_AVX)); + return static_cast(FloatComparisonMode::UnorderedNotGreaterThanOrEqualSignaling); } else { @@ -2027,9 +2001,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, if (IsBaselineVector512IsaSupportedOpportunistically()) { op1 = impSIMDPopStack(); - op1 = - gtNewSimdHWIntrinsicNode(TYP_MASK, op1, NI_AVX512F_ConvertVectorToMask, simdBaseJitType, simdSize); + if (!varTypeIsMask(op1)) + { + op1 = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, NI_AVX512F_ConvertVectorToMask, simdBaseJitType, + simdSize); + } retNode = gtNewSimdHWIntrinsicNode(retType, op1, NI_AVX512F_MoveMask, simdBaseJitType, simdSize); } break; @@ -3399,6 +3376,38 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impCloneExpr(op1, &clonedOp1, CHECK_SPILL_ALL, nullptr DEBUGARG("Clone op1 for Sse.CompareScalarGreaterThan")); + switch (intrinsic) + { + case NI_SSE_CompareScalarGreaterThan: + { + intrinsic = NI_SSE_CompareScalarLessThan; + break; + } + + case NI_SSE_CompareScalarGreaterThanOrEqual: + { + intrinsic = NI_SSE_CompareScalarLessThanOrEqual; + break; + } + + case NI_SSE_CompareScalarNotGreaterThan: + { + intrinsic = NI_SSE_CompareScalarNotLessThan; + break; + } + + case NI_SSE_CompareScalarNotGreaterThanOrEqual: + { + intrinsic = NI_SSE_CompareScalarNotLessThanOrEqual; + break; + } + + default: + { + unreached(); + } + } + retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, intrinsic, simdBaseJitType, simdSize); retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, clonedOp1, retNode, NI_SSE_MoveScalar, simdBaseJitType, simdSize); @@ -3458,6 +3467,38 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op1 = impCloneExpr(op1, &clonedOp1, CHECK_SPILL_ALL, nullptr DEBUGARG("Clone op1 for Sse2.CompareScalarGreaterThan")); + switch (intrinsic) + { + case NI_SSE2_CompareScalarGreaterThan: + { + intrinsic = NI_SSE2_CompareScalarLessThan; + break; + } + + case NI_SSE2_CompareScalarGreaterThanOrEqual: + { + intrinsic = NI_SSE2_CompareScalarLessThanOrEqual; + break; + } + + case NI_SSE2_CompareScalarNotGreaterThan: + { + intrinsic = NI_SSE2_CompareScalarNotLessThan; + break; + } + + case NI_SSE2_CompareScalarNotGreaterThanOrEqual: + { + intrinsic = NI_SSE2_CompareScalarNotLessThanOrEqual; + break; + } + + default: + { + unreached(); + } + } + retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, op1, intrinsic, simdBaseJitType, simdSize); retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, clonedOp1, retNode, NI_SSE2_MoveScalar, simdBaseJitType, simdSize); @@ -4093,12 +4134,32 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, op2 = impSIMDPopStack(); op1 = impSIMDPopStack(); - op3 = gtNewSimdHWIntrinsicNode(TYP_MASK, op3, NI_AVX512F_ConvertVectorToMask, simdBaseJitType, simdSize); + if (!varTypeIsMask(op3)) + { + op3 = + gtNewSimdHWIntrinsicNode(TYP_MASK, op3, NI_AVX512F_ConvertVectorToMask, simdBaseJitType, simdSize); + } retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, op3, NI_AVX512F_BlendVariableMask, simdBaseJitType, simdSize); break; } + case NI_AVX512F_Compare: + { + assert(sig->numArgs == 3); + + op3 = impPopStack().val; + op2 = impSIMDPopStack(); + op1 = impSIMDPopStack(); + + retNode = + gtNewSimdHWIntrinsicNode(TYP_MASK, op1, op2, op3, NI_AVX512F_CompareMask, simdBaseJitType, simdSize); + retNode = + gtNewSimdHWIntrinsicNode(retType, retNode, NI_AVX512F_ConvertMaskToVector, simdBaseJitType, simdSize); + + break; + } + case NI_AVX512F_CompareEqual: case NI_AVX512BW_CompareEqual: { diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index 9262de93ea452e..4799b8b333491a 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -1143,27 +1143,36 @@ void CodeGen::inst_RV_RV_IV(instruction ins, emitAttr size, regNumber reg1, regN // and that returns a value in register // // Arguments: -// ins -- The instruction being emitted -// attr -- The emit attribute -// reg1 -- The first operand, a register -// rmOp -- The second operand, which may be a memory node or a node producing a register -// ival -- The immediate operand +// ins -- The instruction being emitted +// attr -- The emit attribute +// reg1 -- The first operand, a register +// rmOp -- The second operand, which may be a memory node or a node producing a register +// ival -- The immediate operand +// instOptions -- The options that modify how the instruction is generated // -void CodeGen::inst_RV_TT_IV(instruction ins, emitAttr attr, regNumber reg1, GenTree* rmOp, int ival) +void CodeGen::inst_RV_TT_IV( + instruction ins, emitAttr attr, regNumber reg1, GenTree* rmOp, int ival, insOpts instOptions) { emitter* emit = GetEmitter(); noway_assert(emit->emitVerifyEncodable(ins, EA_SIZE(attr), reg1)); +#if defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS) + if (CodeGenInterface::IsEmbeddedBroadcastEnabled(ins, rmOp)) + { + instOptions = AddEmbBroadcastMode(instOptions); + } +#endif // TARGET_XARCH && FEATURE_HW_INTRINSICS + OperandDesc rmOpDesc = genOperandDesc(rmOp); switch (rmOpDesc.GetKind()) { case OperandKind::ClsVar: - emit->emitIns_R_C_I(ins, attr, reg1, rmOpDesc.GetFieldHnd(), 0, ival); + emit->emitIns_R_C_I(ins, attr, reg1, rmOpDesc.GetFieldHnd(), 0, ival, instOptions); break; case OperandKind::Local: - emit->emitIns_R_S_I(ins, attr, reg1, rmOpDesc.GetVarNum(), rmOpDesc.GetLclOffset(), ival); + emit->emitIns_R_S_I(ins, attr, reg1, rmOpDesc.GetVarNum(), rmOpDesc.GetLclOffset(), ival, instOptions); break; case OperandKind::Indir: @@ -1172,12 +1181,12 @@ void CodeGen::inst_RV_TT_IV(instruction ins, emitAttr attr, regNumber reg1, GenT // temporary GT_IND to generate code with. GenTreeIndir indirForm; GenTreeIndir* indir = rmOpDesc.GetIndirForm(&indirForm); - emit->emitIns_R_A_I(ins, attr, reg1, indir, ival); + emit->emitIns_R_A_I(ins, attr, reg1, indir, ival, instOptions); } break; case OperandKind::Reg: - emit->emitIns_SIMD_R_R_I(ins, attr, reg1, rmOpDesc.GetReg(), ival); + emit->emitIns_SIMD_R_R_I(ins, attr, reg1, rmOpDesc.GetReg(), ival, instOptions); break; default: @@ -1224,7 +1233,7 @@ bool CodeGenInterface::IsEmbeddedBroadcastEnabled(instruction ins, GenTree* op) // Return Value: // The modified insOpts // -static insOpts AddEmbBroadcastMode(insOpts instOptions) +insOpts CodeGen::AddEmbBroadcastMode(insOpts instOptions) { assert((instOptions & INS_OPTS_EVEX_b_MASK) == 0); unsigned result = static_cast(instOptions); @@ -1289,7 +1298,7 @@ void CodeGen::inst_RV_RV_TT(instruction ins, } } } -#endif // TARGET_XARCH && FEATURE_HW_INTRINSICS +#endif // TARGET_XARCH && FEATURE_HW_INTRINSICS OperandDesc op2Desc = genOperandDesc(op2); @@ -1347,16 +1356,23 @@ void CodeGen::inst_RV_RV_TT(instruction ins, // and an immediate value. The result is returned in register // // Arguments: -// ins -- The instruction being emitted -// size -- The emit size attribute -// targetReg -- The target register -// op1Reg -- The first operand register -// op2 -- The second operand, which may be a memory node or a node producing a register -// ival -- The immediate operand -// isRMW -- true if the instruction is RMW; otherwise, false +// ins -- The instruction being emitted +// size -- The emit size attribute +// targetReg -- The target register +// op1Reg -- The first operand register +// op2 -- The second operand, which may be a memory node or a node producing a register +// ival -- The immediate operand +// isRMW -- true if the instruction is RMW; otherwise, false +// instOptions -- The options that modify how the instruction is generated // -void CodeGen::inst_RV_RV_TT_IV( - instruction ins, emitAttr size, regNumber targetReg, regNumber op1Reg, GenTree* op2, int8_t ival, bool isRMW) +void CodeGen::inst_RV_RV_TT_IV(instruction ins, + emitAttr size, + regNumber targetReg, + regNumber op1Reg, + GenTree* op2, + int8_t ival, + bool isRMW, + insOpts instOptions) { emitter* emit = GetEmitter(); noway_assert(emit->emitVerifyEncodable(ins, EA_SIZE(size), op1Reg)); @@ -1364,15 +1380,24 @@ void CodeGen::inst_RV_RV_TT_IV( // TODO-XArch-CQ: Commutative operations can have op1 be contained // TODO-XArch-CQ: Non-VEX encoded instructions can have both ops contained +#if defined(TARGET_XARCH) && defined(FEATURE_HW_INTRINSICS) + if (CodeGenInterface::IsEmbeddedBroadcastEnabled(ins, op2)) + { + instOptions = AddEmbBroadcastMode(instOptions); + } +#endif // TARGET_XARCH && FEATURE_HW_INTRINSICS + OperandDesc op2Desc = genOperandDesc(op2); + switch (op2Desc.GetKind()) { case OperandKind::ClsVar: - emit->emitIns_SIMD_R_R_C_I(ins, size, targetReg, op1Reg, op2Desc.GetFieldHnd(), 0, ival); + emit->emitIns_SIMD_R_R_C_I(ins, size, targetReg, op1Reg, op2Desc.GetFieldHnd(), 0, ival, instOptions); break; case OperandKind::Local: - emit->emitIns_SIMD_R_R_S_I(ins, size, targetReg, op1Reg, op2Desc.GetVarNum(), op2Desc.GetLclOffset(), ival); + emit->emitIns_SIMD_R_R_S_I(ins, size, targetReg, op1Reg, op2Desc.GetVarNum(), op2Desc.GetLclOffset(), ival, + instOptions); break; case OperandKind::Indir: @@ -1381,7 +1406,7 @@ void CodeGen::inst_RV_RV_TT_IV( // temporary GT_IND to generate code with. GenTreeIndir indirForm; GenTreeIndir* indir = op2Desc.GetIndirForm(&indirForm); - emit->emitIns_SIMD_R_R_A_I(ins, size, targetReg, op1Reg, indir, ival); + emit->emitIns_SIMD_R_R_A_I(ins, size, targetReg, op1Reg, indir, ival, instOptions); } break; @@ -1402,7 +1427,7 @@ void CodeGen::inst_RV_RV_TT_IV( op1Reg = targetReg; } - emit->emitIns_SIMD_R_R_R_I(ins, size, targetReg, op1Reg, op2Reg, ival); + emit->emitIns_SIMD_R_R_R_I(ins, size, targetReg, op1Reg, op2Reg, ival, instOptions); } break; diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index a94998f596e93a..421dee30ac7547 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -234,7 +234,7 @@ enum insOpts: unsigned INS_OPTS_EVEX_er_rz = 3, // Round towards zero - // Two-bits: 0b0001_1100 + // Three-bits: 0b0001_1100 INS_OPTS_EVEX_aaa_MASK = 0x1C, // mask for EVEX.aaa related features INS_OPTS_EVEX_em_k1 = 1 << 2, // Embedded mask uses K1 @@ -254,7 +254,7 @@ enum insOpts: unsigned // One-bit: 0b0010_0000 INS_OPTS_EVEX_z_MASK = 0x20, // mask for EVEX.z related features - INS_OPTS_EVEX_em_zero, // Embedded mask merges with zero + INS_OPTS_EVEX_em_zero = 1 << 5, // Embedded mask merges with zero }; #elif defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index 030bcffd41c6e1..fb820b2c6cf900 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -229,15 +229,15 @@ INST3(rcpps, "rcpps", IUM_WR, BAD_CODE, BAD_CODE, INST3(rcpss, "rcpss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x53), INS_TT_NONE, REX_WIG | Encoding_VEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Reciprocal of scalar single INST3(rsqrtps, "rsqrtps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x52), INS_TT_NONE, REX_WIG | Encoding_VEX) // Reciprocal Sqrt of packed singles INST3(rsqrtss, "rsqrtss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x52), INS_TT_NONE, REX_WIG | Encoding_VEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Reciprocal Sqrt of scalar single -INST3(shufps, "shufps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0xC6), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) +INST3(shufps, "shufps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0xC6), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) INST3(sfence, "sfence", IUM_RD, 0x000FF8AE, BAD_CODE, BAD_CODE, INS_TT_NONE, REX_WIG) -INST3(sqrtps, "sqrtps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x51), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Sqrt of packed singles +INST3(sqrtps, "sqrtps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x51), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Sqrt of packed singles INST3(sqrtss, "sqrtss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x51), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Sqrt of scalar single INST3(subps, "subps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5C), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Subtract packed singles INST3(subss, "subss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5C), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract scalar singles INST3(ucomiss, "ucomiss", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x2E), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Writes_PF | Writes_CF) // unordered compare singles -INST3(unpckhps, "unpckhps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x15), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) -INST3(unpcklps, "unpcklps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x14), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) +INST3(unpckhps, "unpckhps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x15), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) +INST3(unpcklps, "unpcklps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x14), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) INST3(xorps, "xorps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x57), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // XOR packed singles // SSE2 @@ -248,19 +248,19 @@ INST3(andpd, "andpd", IUM_WR, BAD_CODE, BAD_CODE, INST3(cmppd, "cmppd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC2), INS_TT_NONE, Input_64Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // compare packed doubles INST3(cmpsd, "cmpsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xC2), INS_TT_NONE, Input_64Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // compare scalar doubles INST3(comisd, "comisd", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x2F), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Writes_PF | Writes_CF) // ordered compare doubles -INST3(cvtdq2pd, "cvtdq2pd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xE6), INS_TT_HALF, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // cvt packed DWORDs to doubles -INST3(cvtdq2ps, "cvtdq2ps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5B), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // cvt packed DWORDs to singles -INST3(cvtpd2dq, "cvtpd2dq", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xE6), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // cvt packed doubles to DWORDs -INST3(cvtpd2ps, "cvtpd2ps", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5A), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // cvt packed doubles to singles -INST3(cvtps2dq, "cvtps2dq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5B), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // cvt packed singles to DWORDs -INST3(cvtps2pd, "cvtps2pd", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5A), INS_TT_HALF, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // cvt packed singles to doubles +INST3(cvtdq2pd, "cvtdq2pd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xE6), INS_TT_HALF, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt packed DWORDs to doubles +INST3(cvtdq2ps, "cvtdq2ps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5B), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt packed DWORDs to singles +INST3(cvtpd2dq, "cvtpd2dq", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xE6), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt packed doubles to DWORDs +INST3(cvtpd2ps, "cvtpd2ps", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5A), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt packed doubles to singles +INST3(cvtps2dq, "cvtps2dq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5B), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt packed singles to DWORDs +INST3(cvtps2pd, "cvtps2pd", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5A), INS_TT_HALF, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt packed singles to doubles INST3(cvtsd2si, "cvtsd2si", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2D), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_WX | Encoding_VEX | Encoding_EVEX) // cvt scalar double to DWORD INST3(cvtsd2ss, "cvtsd2ss", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5A), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar double to scalar singles INST3(cvtsi2sd32, "cvtsi2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2A), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt DWORD to scalar double INST3(cvtsi2sd64, "cvtsi2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2A), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt QWORD to scalar double INST3(cvtss2sd, "cvtss2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5A), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar single to scalar doubles -INST3(cvttpd2dq, "cvttpd2dq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE6), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // cvt with trunc packed doubles to DWORDs -INST3(cvttps2dq, "cvttps2dq", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5B), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // cvt with trunc packed singles to DWORDs +INST3(cvttpd2dq, "cvttpd2dq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE6), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt with trunc packed doubles to DWORDs +INST3(cvttps2dq, "cvttps2dq", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x5B), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt with trunc packed singles to DWORDs INST3(cvttsd2si32, "cvttsd2si", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2C), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // cvt with trunc scalar double to signed DWORDs INST3(cvttsd2si64, "cvttsd2si", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x2C), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX) // cvt with trunc scalar double to signed DWORDs INST3(divpd, "divpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5E), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Divide packed doubles @@ -288,7 +288,7 @@ INST3(movupd, "movupd", IUM_WR, PCKDBL(0x11), BAD_CODE, INST3(mulpd, "mulpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x59), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Multiply packed doubles INST3(mulsd, "mulsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x59), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply scalar doubles INST3(orpd, "orpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x56), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Or packed doubles -INST3(packssdw, "packssdw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6B), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) int to short with saturation +INST3(packssdw, "packssdw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6B), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Pack (narrow) int to short with saturation INST3(packsswb, "packsswb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x63), INS_TT_FULL_MEM, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) short to byte with saturation INST3(packuswb, "packuswb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x67), INS_TT_FULL_MEM, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) short to unsigned byte with saturation INST3(paddb, "paddb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFC), INS_TT_FULL_MEM, Input_8Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Add packed byte integers @@ -320,21 +320,21 @@ INST3(pmovmskb, "pmovmskb", IUM_WR, BAD_CODE, BAD_CODE, INST3(pmulhuw, "pmulhuw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE4), INS_TT_FULL_MEM, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply high the packed 16-bit unsigned integers INST3(pmulhw, "pmulhw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xE5), INS_TT_FULL_MEM, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Multiply high the packed 16-bit signed integers INST3(pmullw, "pmullw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD5), INS_TT_FULL_MEM, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed multiply 16 bit unsigned integers and store lower 16 bits of each result -INST3(pmuludq, "pmuludq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF4), INS_TT_FULL_MEM, Input_32Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // packed multiply 32-bit unsigned integers and store 64-bit result +INST3(pmuludq, "pmuludq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF4), INS_TT_FULL, Input_32Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // packed multiply 32-bit unsigned integers and store 64-bit result INST3(por, "por", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEB), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed bit-wise OR of two xmm regs INST3(psadbw, "psadbw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF6), INS_TT_FULL_MEM, Input_8Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Compute the sum of absolute differences of packed unsigned 8-bit integers -INST3(pshufd, "pshufd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x70), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Packed shuffle of 32-bit integers +INST3(pshufd, "pshufd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x70), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Packed shuffle of 32-bit integers INST3(pshufhw, "pshufhw", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x70), INS_TT_FULL_MEM, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX) // Shuffle the high words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1. INST3(pshuflw, "pshuflw", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x70), INS_TT_FULL_MEM, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX) // Shuffle the low words in xmm2/m128 based on the encoding in imm8 and store the result in xmm1. -INST3(pslld, "pslld", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xF2), INS_TT_FULL | INS_TT_MEM128, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift left logical of 32-bit integers +INST3(pslld, "pslld", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xF2), INS_TT_FULL | INS_TT_MEM128, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed shift left logical of 32-bit integers INST3(pslldq, "pslldq", IUM_WR, BAD_CODE, PCKDBL(0x73), BAD_CODE, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shift left logical of xmm reg by given number of bytes -INST3(psllq, "psllq", IUM_WR, BAD_CODE, PCKDBL(0x73), PCKDBL(0xF3), INS_TT_FULL | INS_TT_MEM128, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift left logical of 64-bit integers +INST3(psllq, "psllq", IUM_WR, BAD_CODE, PCKDBL(0x73), PCKDBL(0xF3), INS_TT_FULL | INS_TT_MEM128, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed shift left logical of 64-bit integers INST3(psllw, "psllw", IUM_WR, BAD_CODE, PCKDBL(0x71), PCKDBL(0xF1), INS_TT_FULL_MEM | INS_TT_MEM128, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift left logical of 16-bit integers -INST3(psrad, "psrad", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xE2), INS_TT_FULL | INS_TT_MEM128, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right arithmetic of 32-bit integers +INST3(psrad, "psrad", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xE2), INS_TT_FULL | INS_TT_MEM128, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed shift right arithmetic of 32-bit integers INST3(psraw, "psraw", IUM_WR, BAD_CODE, PCKDBL(0x71), PCKDBL(0xE1), INS_TT_FULL_MEM | INS_TT_MEM128, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right arithmetic of 16-bit integers -INST3(psrld, "psrld", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xD2), INS_TT_FULL | INS_TT_MEM128, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right logical of 32-bit integers +INST3(psrld, "psrld", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xD2), INS_TT_FULL | INS_TT_MEM128, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed shift right logical of 32-bit integers INST3(psrldq, "psrldq", IUM_WR, BAD_CODE, PCKDBL(0x73), BAD_CODE, INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shift right logical of xmm reg by given number of bytes -INST3(psrlq, "psrlq", IUM_WR, BAD_CODE, PCKDBL(0x73), PCKDBL(0xD3), INS_TT_FULL | INS_TT_MEM128, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right logical of 64-bit integers +INST3(psrlq, "psrlq", IUM_WR, BAD_CODE, PCKDBL(0x73), PCKDBL(0xD3), INS_TT_FULL | INS_TT_MEM128, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed shift right logical of 64-bit integers INST3(psrlw, "psrlw", IUM_WR, BAD_CODE, PCKDBL(0x71), PCKDBL(0xD1), INS_TT_FULL_MEM | INS_TT_MEM128, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right logical of 16-bit integers INST3(psubb, "psubb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xF8), INS_TT_FULL_MEM, Input_8Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed word (16-bit) integers INST3(psubd, "psubd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xFA), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Subtract packed double-word (32-bit) integers @@ -345,22 +345,22 @@ INST3(psubsw, "psubsw", IUM_WR, BAD_CODE, BAD_CODE, INST3(psubusb, "psubusb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD8), INS_TT_FULL_MEM, Input_8Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed unsigned 8-bit integers in b from packed unsigned 8-bit integers in a using saturation INST3(psubusw, "psubusw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD9), INS_TT_FULL_MEM, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract packed unsigned 16-bit integers in b from packed unsigned 16-bit integers in a using saturation INST3(punpckhbw, "punpckhbw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x68), INS_TT_FULL_MEM, Input_8Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ubyte to ushort (hi) -INST3(punpckhdq, "punpckhdq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6A), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) -INST3(punpckhqdq, "punpckhqdq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6D), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen uint to ulong (hi) +INST3(punpckhdq, "punpckhdq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6A), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) +INST3(punpckhqdq, "punpckhqdq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6D), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed logical (unsigned) widen uint to ulong (hi) INST3(punpckhwd, "punpckhwd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x69), INS_TT_FULL_MEM, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ushort to uint (hi) INST3(punpcklbw, "punpcklbw", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x60), INS_TT_FULL_MEM, Input_8Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ubyte to ushort (lo) -INST3(punpckldq, "punpckldq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x62), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) -INST3(punpcklqdq, "punpcklqdq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6C), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen uint to ulong (lo) +INST3(punpckldq, "punpckldq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x62), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) +INST3(punpcklqdq, "punpcklqdq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6C), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed logical (unsigned) widen uint to ulong (lo) INST3(punpcklwd, "punpcklwd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x61), INS_TT_FULL_MEM, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ushort to uint (lo) INST3(pxor, "pxor", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEF), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed bit-wise XOR of two xmm regs -INST3(shufpd, "shufpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC6), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) -INST3(sqrtpd, "sqrtpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x51), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // Sqrt of packed doubles +INST3(shufpd, "shufpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC6), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) +INST3(sqrtpd, "sqrtpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x51), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Sqrt of packed doubles INST3(sqrtsd, "sqrtsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x51), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Sqrt of scalar double INST3(subpd, "subpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x5C), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Subtract packed doubles INST3(subsd, "subsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x5C), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Subtract scalar doubles INST3(ucomisd, "ucomisd", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x2E), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Writes_PF | Writes_CF) // unordered compare doubles -INST3(unpckhpd, "unpckhpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x15), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ubyte to ushort (hi) -INST3(unpcklpd, "unpcklpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x14), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed logical (unsigned) widen ubyte to ushort (hi) +INST3(unpckhpd, "unpckhpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x15), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed logical (unsigned) widen ubyte to ushort (hi) +INST3(unpcklpd, "unpcklpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x14), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed logical (unsigned) widen ubyte to ushort (hi) INST3(xorpd, "xorpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x57), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // XOR packed doubles // SSE3 @@ -377,7 +377,7 @@ INST3(movsldup, "movsldup", IUM_WR, BAD_CODE, BAD_CODE, // SSSE3 INST3(pabsb, "pabsb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1C), INS_TT_FULL_MEM, Input_8Bit | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed absolute value of bytes -INST3(pabsd, "pabsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1E), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Packed absolute value of 32-bit integers +INST3(pabsd, "pabsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1E), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Packed absolute value of 32-bit integers INST3(pabsw, "pabsw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1D), INS_TT_FULL_MEM, Input_16Bit | REX_WIG | Encoding_VEX | Encoding_EVEX) // Packed absolute value of 16-bit integers INST3(palignr, "palignr", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0F), INS_TT_FULL_MEM, REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed Align Right INST3(phaddd, "phaddd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x02), INS_TT_NONE, Input_32Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed horizontal add @@ -413,7 +413,7 @@ INST3(extractps, "extractps", IUM_WR, SSE3A(0x17), BAD_CODE, INST3(insertps, "insertps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x21), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert packed single precision float value INST3(movntdqa, "movntdqa", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2A), INS_TT_FULL_MEM, REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Load Double Quadword Non-Temporal Aligned Hint INST3(mpsadbw, "mpsadbw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x42), INS_TT_NONE, Input_8Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Compute Multiple Packed Sums of Absolute Difference -INST3(packusdw, "packusdw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2B), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Pack (narrow) int to unsigned short with saturation +INST3(packusdw, "packusdw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2B), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Pack (narrow) int to unsigned short with saturation INST3(pblendvb, "pblendvb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x10), INS_TT_NONE, Input_8Bit | REX_W0) // Variable Blend Packed Bytes INST3(pblendw, "pblendw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0E), INS_TT_NONE, Input_16Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Words INST3(pcmpeqq, "pcmpeqq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x29), INS_TT_NONE, Input_64Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 64-bit integers for equality @@ -448,8 +448,8 @@ INST3(pmovzxwq, "pmovzxwq", IUM_WR, BAD_CODE, BAD_CODE, INST3(pmuldq, "pmuldq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x28), INS_TT_FULL, Input_32Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // packed multiply 32-bit signed integers and store 64-bit result INST3(pmulld, "pmulld", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x40), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed multiply 32 bit unsigned integers and store lower 32 bits of each result INST3(ptest, "ptest", IUM_RD, BAD_CODE, BAD_CODE, SSE38(0x17), INS_TT_NONE, REX_WIG | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF) // Packed logical compare -INST3(roundpd, "roundpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x09), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX) // Round packed double precision floating-point values -INST3(roundps, "roundps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x08), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX) // Round packed single precision floating-point values +INST3(roundpd, "roundpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x09), INS_TT_FULL, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Round packed double precision floating-point values +INST3(roundps, "roundps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x08), INS_TT_FULL, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Round packed single precision floating-point values INST3(roundsd, "roundsd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0B), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Round scalar double precision floating-point values INST3(roundss, "roundss", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0A), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0_EVEX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Round scalar single precision floating-point values @@ -472,10 +472,10 @@ INST3(vmaskmovpd, "maskmovpd", IUM_WR, SSE38(0x2F), BAD_CODE, INST3(vmaskmovps, "maskmovps", IUM_WR, SSE38(0x2E), BAD_CODE, SSE38(0x2C), INS_TT_NONE, Input_32Bit | REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Conditional SIMD Packed Single-Precision Floating-Point Loads and Stores INST3(vpblendvb, "pblendvb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x4C), INS_TT_NONE, Input_8Bit | REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Variable Blend Packed Bytes INST3(vperm2f128, "perm2f128", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x06), INS_TT_NONE, REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Permute Floating-Point Values -INST3(vpermilpd, "permilpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x05), INS_TT_FULL, Input_64Bit | REX_WX | Encoding_VEX | Encoding_EVEX) // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values -INST3(vpermilpdvar, "permilpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0D), INS_TT_FULL, Input_64Bit | REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values -INST3(vpermilps, "permilps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x04), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Permute In-Lane of Quadruples of Single-Precision Floating-Point Values -INST3(vpermilpsvar, "permilps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0C), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Permute In-Lane of Quadruples of Single-Precision Floating-Point Values +INST3(vpermilpd, "permilpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x05), INS_TT_FULL, Input_64Bit | REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values +INST3(vpermilpdvar, "permilpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0D), INS_TT_FULL, Input_64Bit | REX_WX | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values +INST3(vpermilps, "permilps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x04), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Permute In-Lane of Quadruples of Single-Precision Floating-Point Values +INST3(vpermilpsvar, "permilps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x0C), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Permute In-Lane of Quadruples of Single-Precision Floating-Point Values INST3(vtestpd, "testpd", IUM_RD, BAD_CODE, BAD_CODE, SSE38(0x0F), INS_TT_NONE, Input_64Bit | REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF) // Packed Bit Test INST3(vtestps, "testps", IUM_RD, BAD_CODE, BAD_CODE, SSE38(0x0E), INS_TT_NONE, Input_32Bit | REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF) // Packed Bit Test INST3(vzeroupper, "zeroupper", IUM_WR, 0xC577F8, BAD_CODE, BAD_CODE, INS_TT_NONE, REX_WIG | Encoding_VEX) // Zero upper 128-bits of all YMM regs (includes 2-byte fixed VEX prefix) @@ -494,10 +494,10 @@ INST3(vpbroadcastd, "pbroadcastd", IUM_WR, BAD_CODE, BAD_CODE, INST3(vpbroadcastq, "pbroadcastq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x59), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_WX | Encoding_VEX | Encoding_EVEX) // Broadcast int64 value from reg/memory to entire ymm register INST3(vpbroadcastw, "pbroadcastw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x79), INS_TT_TUPLE1_SCALAR, Input_16Bit | REX_W0 | Encoding_VEX | Encoding_EVEX) // Broadcast int16 value from reg/memory to entire ymm register INST3(vperm2i128, "perm2i128", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x46), INS_TT_NONE, REX_W0 | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Permute 128-bit halves of input register -INST3(vpermd, "permd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x36), INS_TT_FULL, Input_64Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Permute Packed Doublewords Elements -INST3(vpermpd, "permpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x01), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX) // Permute Double-Precision Floating-Point Values -INST3(vpermps, "permps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x16), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Permute Single-Precision Floating-Point Elements -INST3(vpermq, "permq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x00), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX) // Permute 64-bit of input register +INST3(vpermd, "permd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x36), INS_TT_FULL, Input_64Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Permute Packed Doublewords Elements +INST3(vpermpd, "permpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x01), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Permute Double-Precision Floating-Point Values +INST3(vpermps, "permps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x16), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Permute Single-Precision Floating-Point Elements +INST3(vpermq, "permq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x00), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Permute 64-bit of input register INST3(vpgatherdd, "pgatherdd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x90), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Dword Values Using Signed Dword INST3(vpgatherdq, "pgatherdq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x90), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Dword with Signed Dword Indices INST3(vpgatherqd, "pgatherqd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x91), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Gather Packed Dword Values Using Signed Qword @@ -512,60 +512,60 @@ INST3(vpsrlvq, "psrlvq", IUM_WR, BAD_CODE, BAD_CODE, INST3(FIRST_FMA_INSTRUCTION, "FIRST_FMA_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) // id nm um mr mi rm flags -INST3(vfmadd132pd, "fmadd132pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x98), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Add of Packed Double-Precision Floating-Point Values -INST3(vfmadd213pd, "fmadd213pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA8), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmadd231pd, "fmadd231pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB8), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmadd132ps, "fmadd132ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x98), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Add of Packed Single-Precision Floating-Point Values -INST3(vfmadd213ps, "fmadd213ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA8), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmadd231ps, "fmadd231ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB8), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmadd132pd, "fmadd132pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x98), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Fused Multiply-Add of Packed Double-Precision Floating-Point Values +INST3(vfmadd213pd, "fmadd213pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA8), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // +INST3(vfmadd231pd, "fmadd231pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB8), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // +INST3(vfmadd132ps, "fmadd132ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x98), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Fused Multiply-Add of Packed Single-Precision Floating-Point Values +INST3(vfmadd213ps, "fmadd213ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA8), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // +INST3(vfmadd231ps, "fmadd231ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB8), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // INST3(vfmadd132sd, "fmadd132sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x99), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Add of Scalar Double-Precision Floating-Point Values INST3(vfmadd213sd, "fmadd213sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA9), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // INST3(vfmadd231sd, "fmadd231sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB9), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // INST3(vfmadd132ss, "fmadd132ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x99), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Add of Scalar Single-Precision Floating-Point Values INST3(vfmadd213ss, "fmadd213ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA9), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // INST3(vfmadd231ss, "fmadd231ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB9), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmaddsub132pd, "fmaddsub132pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x96), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values -INST3(vfmaddsub213pd, "fmaddsub213pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA6), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmaddsub231pd, "fmaddsub231pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB6), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmaddsub132ps, "fmaddsub132ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x96), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values -INST3(vfmaddsub213ps, "fmaddsub213ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA6), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmaddsub231ps, "fmaddsub231ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB6), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmsubadd132pd, "fmsubadd132pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x97), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values -INST3(vfmsubadd213pd, "fmsubadd213pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA7), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmsubadd231pd, "fmsubadd231pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB7), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmsubadd132ps, "fmsubadd132ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x97), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values -INST3(vfmsubadd213ps, "fmsubadd213ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA7), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmsubadd231ps, "fmsubadd231ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB7), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmsub132pd, "fmsub132pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9A), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values -INST3(vfmsub213pd, "fmsub213pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAA), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmsub231pd, "fmsub231pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBA), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmsub132ps, "fmsub132ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9A), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values -INST3(vfmsub213ps, "fmsub213ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAA), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfmsub231ps, "fmsub231ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBA), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfmaddsub132pd, "fmaddsub132pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x96), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Fused Multiply-Alternating Add/Subtract of Packed Double-Precision Floating-Point Values +INST3(vfmaddsub213pd, "fmaddsub213pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA6), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // +INST3(vfmaddsub231pd, "fmaddsub231pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB6), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // +INST3(vfmaddsub132ps, "fmaddsub132ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x96), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Fused Multiply-Alternating Add/Subtract of Packed Single-Precision Floating-Point Values +INST3(vfmaddsub213ps, "fmaddsub213ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA6), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // +INST3(vfmaddsub231ps, "fmaddsub231ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB6), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // +INST3(vfmsubadd132pd, "fmsubadd132pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x97), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Fused Multiply-Alternating Subtract/Add of Packed Double-Precision Floating-Point Values +INST3(vfmsubadd213pd, "fmsubadd213pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA7), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // +INST3(vfmsubadd231pd, "fmsubadd231pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB7), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // +INST3(vfmsubadd132ps, "fmsubadd132ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x97), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Fused Multiply-Alternating Subtract/Add of Packed Single-Precision Floating-Point Values +INST3(vfmsubadd213ps, "fmsubadd213ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xA7), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // +INST3(vfmsubadd231ps, "fmsubadd231ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xB7), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // +INST3(vfmsub132pd, "fmsub132pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9A), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Fused Multiply-Subtract of Packed Double-Precision Floating-Point Values +INST3(vfmsub213pd, "fmsub213pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAA), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // +INST3(vfmsub231pd, "fmsub231pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBA), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // +INST3(vfmsub132ps, "fmsub132ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9A), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Fused Multiply-Subtract of Packed Single-Precision Floating-Point Values +INST3(vfmsub213ps, "fmsub213ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAA), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // +INST3(vfmsub231ps, "fmsub231ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBA), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // INST3(vfmsub132sd, "fmsub132sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9B), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Subtract of Scalar Double-Precision Floating-Point Values INST3(vfmsub213sd, "fmsub213sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAB), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // INST3(vfmsub231sd, "fmsub231sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBB), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // INST3(vfmsub132ss, "fmsub132ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9B), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Multiply-Subtract of Scalar Single-Precision Floating-Point Values INST3(vfmsub213ss, "fmsub213ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAB), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // INST3(vfmsub231ss, "fmsub231ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBB), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfnmadd132pd, "fnmadd132pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9C), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values -INST3(vfnmadd213pd, "fnmadd213pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAC), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfnmadd231pd, "fnmadd231pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBC), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfnmadd132ps, "fnmadd132ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9C), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values -INST3(vfnmadd213ps, "fnmadd213ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAC), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfnmadd231ps, "fnmadd231ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBC), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfnmadd132pd, "fnmadd132pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9C), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Fused Negative Multiply-Add of Packed Double-Precision Floating-Point Values +INST3(vfnmadd213pd, "fnmadd213pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAC), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // +INST3(vfnmadd231pd, "fnmadd231pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBC), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // +INST3(vfnmadd132ps, "fnmadd132ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9C), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Fused Negative Multiply-Add of Packed Single-Precision Floating-Point Values +INST3(vfnmadd213ps, "fnmadd213ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAC), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // +INST3(vfnmadd231ps, "fnmadd231ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBC), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // INST3(vfnmadd132sd, "fnmadd132sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9D), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Add of Scalar Double-Precision Floating-Point Values INST3(vfnmadd213sd, "fnmadd213sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAD), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // INST3(vfnmadd231sd, "fnmadd231sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBD), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // INST3(vfnmadd132ss, "fnmadd132ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9D), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Add of Scalar Single-Precision Floating-Point Values INST3(vfnmadd213ss, "fnmadd213ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAD), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // INST3(vfnmadd231ss, "fnmadd231ss", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBD), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfnmsub132pd, "fnmsub132pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9E), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values -INST3(vfnmsub213pd, "fnmsub213pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAE), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfnmsub231pd, "fnmsub231pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBE), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfnmsub132ps, "fnmsub132ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9E), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values -INST3(vfnmsub213ps, "fnmsub213ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAE), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // -INST3(vfnmsub231ps, "fnmsub231ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBE), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // +INST3(vfnmsub132pd, "fnmsub132pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9E), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Fused Negative Multiply-Subtract of Packed Double-Precision Floating-Point Values +INST3(vfnmsub213pd, "fnmsub213pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAE), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // +INST3(vfnmsub231pd, "fnmsub231pd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBE), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // +INST3(vfnmsub132ps, "fnmsub132ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9E), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Fused Negative Multiply-Subtract of Packed Single-Precision Floating-Point Values +INST3(vfnmsub213ps, "fnmsub213ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAE), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // +INST3(vfnmsub231ps, "fnmsub231ps", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBE), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // INST3(vfnmsub132sd, "fnmsub132sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0x9F), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fused Negative Multiply-Subtract of Scalar Double-Precision Floating-Point Values INST3(vfnmsub213sd, "fnmsub213sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xAF), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // INST3(vfnmsub231sd, "fnmsub231sd", IUM_RW, BAD_CODE, BAD_CODE, SSE38(0xBF), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // @@ -610,85 +610,85 @@ INST3(LAST_AVX_INSTRUCTION, "LAST_AVX_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, INST3(FIRST_AVX512_INSTRUCTION, "FIRST_AVX512_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_TT_NONE, INS_FLAGS_None) // AVX512F -INST3(kandw, "kandw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x41), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Bitwise logical AND masks -INST3(kandnw, "kandnw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x42), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Bitwise logical AND NOT masks -INST3(kmovw_gpr, "kmovw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x92), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Move from and to mask registers -INST3(kmovw_msk, "kmovw", IUM_WR, PCKFLT(0x91), BAD_CODE, PCKFLT(0x90), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Move from and to mask registers -INST3(knotw, "knotw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x44), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // NOT mask register -INST3(korw, "korw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x45), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Bitwise logical OR masks -INST3(kortestw, "kortestw", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x98), INS_TT_NONE, REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // OR masks and set flags -INST3(kshiftlw, "kshiftlw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x32), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Shift left mask registers -INST3(kshiftrw, "kshiftrw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x30), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Shift right mask registers -INST3(kunpckbw, "kunpckbw", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x4B), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Unpack for mask registers -INST3(kxnorw, "kxnorw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x46), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Bitwise logical XNOR masks -INST3(kxorw, "kxorw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x47), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Bitwise logical XOR masks -INST3(valignd, "alignd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x03), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Align doubleword vectors -INST3(valignq, "alignq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x03), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Align quadword vectors -INST3(vblendmpd, "blendmpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x65), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Float64 vectors using an OpMask control -INST3(vblendmps, "blendmps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x65), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Float32 vectors using an OpMask control -INST3(vpblendmq, "pblendmq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x64), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Int32 vectors using an OpMask control +INST3(kandw, "kandw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x41), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Bitwise logical AND masks +INST3(kandnw, "kandnw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x42), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Bitwise logical AND NOT masks +INST3(kmovw_gpr, "kmovw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x92), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Move from and to mask registers +INST3(kmovw_msk, "kmovw", IUM_WR, PCKFLT(0x91), BAD_CODE, PCKFLT(0x90), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Move from and to mask registers +INST3(knotw, "knotw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x44), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // NOT mask register +INST3(korw, "korw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x45), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Bitwise logical OR masks +INST3(kortestw, "kortestw", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x98), INS_TT_NONE, REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // OR masks and set flags +INST3(kshiftlw, "kshiftlw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x32), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Shift left mask registers +INST3(kshiftrw, "kshiftrw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x30), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Shift right mask registers +INST3(kunpckbw, "kunpckbw", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x4B), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Unpack for mask registers +INST3(kxnorw, "kxnorw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x46), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Bitwise logical XNOR masks +INST3(kxorw, "kxorw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x47), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Bitwise logical XOR masks +INST3(valignd, "alignd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x03), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Align doubleword vectors +INST3(valignq, "alignq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x03), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Align quadword vectors +INST3(vblendmpd, "blendmpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x65), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Blend Float64 vectors using an OpMask control +INST3(vblendmps, "blendmps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x65), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Blend Float32 vectors using an OpMask control +INST3(vpblendmq, "pblendmq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x64), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Blend Int32 vectors using an OpMask control INST3(vpblendmb, "pblendmb", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x66), INS_TT_FULL_MEM, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Int64 vectors using an OpMask control INST3(vbroadcastf64x2, "broadcastf64x2", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1A), INS_TT_TUPLE2, Input_64Bit | REX_W1 | Encoding_EVEX) // Broadcast packed float values read from memory to entire register INST3(vbroadcasti64x2, "broadcasti64x2", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x5A), INS_TT_TUPLE2, Input_64Bit | REX_W1 | Encoding_EVEX) // Broadcast packed integer values read from memory to entire register INST3(vbroadcastf64x4, "broadcastf64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1B), INS_TT_TUPLE2, Input_64Bit | REX_W1 | Encoding_EVEX) // Broadcast packed float values read from memory to entire register INST3(vbroadcasti64x4, "broadcasti64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x5B), INS_TT_TUPLE2, Input_64Bit | REX_W1 | Encoding_EVEX) // Broadcast packed integer values read from memory to entire register -INST3(vcmpps, "cmpps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0xC2), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // compare packed singles -INST3(vcmpss, "cmpss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xC2), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // compare scalar singles -INST3(vcmppd, "cmppd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC2), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // compare packed doubles -INST3(vcmpsd, "cmpsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xC2), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // compare scalar doubles -INST3(vcvtpd2udq, "cvtpd2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x79), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt packed doubles to unsigned DWORDs -INST3(vcvtps2udq, "cvtps2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x79), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt packed singles to unsigned DWORDs +INST3(vcmpps, "cmpps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0xC2), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // compare packed singles +INST3(vcmpss, "cmpss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xC2), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // compare scalar singles +INST3(vcmppd, "cmppd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xC2), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // compare packed doubles +INST3(vcmpsd, "cmpsd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0xC2), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // compare scalar doubles +INST3(vcvtpd2udq, "cvtpd2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x79), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt packed doubles to unsigned DWORDs +INST3(vcvtps2udq, "cvtps2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x79), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt packed singles to unsigned DWORDs INST3(vcvtsd2usi, "cvtsd2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x79), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_WX | Encoding_EVEX) // cvt scalar double to unsigned DWORD/QWORD INST3(vcvtss2usi, "cvtss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x79), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_WX | Encoding_EVEX) // cvt scalar single to unsigned DWORD/QWORD -INST3(vcvttpd2udq, "cvttpd2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x78), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt w/ truncation packed doubles to unsigned DWORDs -INST3(vcvttps2udq, "cvttps2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x78), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt w/ truncation packed singles to unsigned DWORDs +INST3(vcvttpd2udq, "cvttpd2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x78), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt w/ truncation packed doubles to unsigned DWORDs +INST3(vcvttps2udq, "cvttps2udq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x78), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt w/ truncation packed singles to unsigned DWORDs INST3(vcvttsd2usi32, "cvttsd2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x78), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_W0 | Encoding_EVEX) // cvt w/ truncation scalar double to unsigned DWORD INST3(vcvttsd2usi64, "cvttsd2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x78), INS_TT_TUPLE1_FIXED, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt w/ truncation scalar double to unsigned QWORD INST3(vcvttss2usi32, "cvttss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x78), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt w/ truncation scalar single to unsigned DWORD/QWORD INST3(vcvttss2usi64, "cvttss2usi", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x78), INS_TT_TUPLE1_FIXED, Input_32Bit | REX_W1 | Encoding_EVEX) // cvt w/ truncation scalar single to unsigned DWORD/QWORD -INST3(vcvtudq2pd, "cvtudq2pd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x7A), INS_TT_HALF, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt packed unsigned DWORDs to doubles -INST3(vcvtudq2ps, "cvtudq2ps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7A), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt packed unsigned DWORDs to singles +INST3(vcvtudq2pd, "cvtudq2pd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x7A), INS_TT_HALF, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt packed unsigned DWORDs to doubles +INST3(vcvtudq2ps, "cvtudq2ps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7A), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt packed unsigned DWORDs to singles INST3(vcvtusi2sd32, "cvtusi2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7B), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar unsigned DWORD to double INST3(vcvtusi2sd64, "cvtusi2sd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7B), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar unsigned QWORD to double INST3(vcvtusi2ss32, "cvtusi2ss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x7B), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar unsigned DWORD to single INST3(vcvtusi2ss64, "cvtusi2ss", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x7B), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // cvt scalar unsigned QWORD to single INST3(vextractf64x4, "extractf64x4", IUM_WR, SSE3A(0x1B), BAD_CODE, BAD_CODE, INS_TT_TUPLE4, Input_64Bit | REX_W1 | Encoding_EVEX) // Extract 256-bit packed double-precision floating point values INST3(vextracti64x4, "extracti64x4", IUM_WR, SSE3A(0x3B), BAD_CODE, BAD_CODE, INS_TT_TUPLE4, Input_64Bit | REX_W1 | Encoding_EVEX) // Extract 256-bit packed quadword integer values -INST3(vfixupimmpd, "fixupimmpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x54), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fixup special packed double-precision floating-point values -INST3(vfixupimmps, "fixupimmps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x54), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fixup special packed single-precision floating-point values +INST3(vfixupimmpd, "fixupimmpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x54), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Fixup special packed double-precision floating-point values +INST3(vfixupimmps, "fixupimmps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x54), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Fixup special packed single-precision floating-point values INST3(vfixupimmsd, "fixupimmsd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x55), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fixup special scalar double-precision floating-point value INST3(vfixupimmss, "fixupimmss", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x55), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Fixup special scalar single-precision floating-point value -INST3(vgetexppd, "getexppd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x42), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX) // Extract exponents of packed double-precision floating-point values -INST3(vgetexpps, "getexpps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x42), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX) // Extract exponents of packed single-precision floating-point values +INST3(vgetexppd, "getexppd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x42), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Extract exponents of packed double-precision floating-point values +INST3(vgetexpps, "getexpps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x42), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Extract exponents of packed single-precision floating-point values INST3(vgetexpsd, "getexpsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x43), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Extract exponents of scalar double-precision floating-point value INST3(vgetexpss, "getexpss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x43), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Extract exponents of scalar single-precision floating-point value -INST3(vgetmantpd, "getmantpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x26), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX) // Extract mantissas of packed double-precision floating-point values -INST3(vgetmantps, "getmantps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x26), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX) // Extract mantissas of packed single-precision floating-point values +INST3(vgetmantpd, "getmantpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x26), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Extract mantissas of packed double-precision floating-point values +INST3(vgetmantps, "getmantps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x26), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Extract mantissas of packed single-precision floating-point values INST3(vgetmantsd, "getmantsd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x27), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Extract mantissas of scalar double-precision floating-point value INST3(vgetmantss, "getmantss", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x27), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Extract mantissas of scalar single-precision floating-point value INST3(vinsertf64x4, "insertf64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1A), INS_TT_TUPLE4, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed double-precision floating point values INST3(vinserti64x4, "inserti64x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3A), INS_TT_TUPLE4, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed quadword integer values INST3(vmovdqa64, "movdqa64", IUM_WR, PCKDBL(0x7F), BAD_CODE, PCKDBL(0x6F), INS_TT_FULL_MEM, Input_64Bit | REX_W1 | Encoding_EVEX) INST3(vmovdqu64, "movdqu64", IUM_WR, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F), INS_TT_FULL_MEM, Input_64Bit | REX_W1 | Encoding_EVEX) -INST3(vpabsq, "pabsq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1F), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX) // Packed absolute value of 64-bit integers +INST3(vpabsq, "pabsq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1F), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Packed absolute value of 64-bit integers INST3(vpandq, "pandq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDB), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed bit-wise AND of two xmm regs INST3(vpandnq, "pandnq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xDF), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed bit-wise AND NOT of two xmm regs INST3(vpbroadcastd_gpr, "pbroadcastd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x7C), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX) // Broadcast int32 value from gpr to entire register INST3(vpbroadcastq_gpr, "pbroadcastq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x7C), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX) // Broadcast int64 value from gpr to entire register -INST3(vpcmpeqd, "pcmpeqd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x76), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 32-bit integers for equality -INST3(vpcmpgtd, "pcmpgtd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x66), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 32-bit signed integers for greater than -INST3(vpcmpeqq, "pcmpeqq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x29), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 64-bit integers for equality -INST3(vpcmpgtq, "pcmpgtq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x37), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed compare 64-bit integers for equality -INST3(vpermq_reg, "permq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x36), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Permute 64-bit of input register -INST3(vpermpd_reg, "permpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x16), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Permute 64-bit of input register -INST3(vpermi2d, "permi2d", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x76), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting the Index -INST3(vpermi2pd, "permi2pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x77), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting the Index -INST3(vpermi2ps, "permi2ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x77), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting the Index -INST3(vpermi2q, "permi2q", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x76), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting the Index -INST3(vpermt2d, "permt2d", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x7E), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting one Table -INST3(vpermt2pd, "permt2pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x7F), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting one Table -INST3(vpermt2ps, "permt2ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x7F), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting one Table -INST3(vpermt2q, "permt2q", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x7E), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Full Permute From Two Tables Overwriting one Table +INST3(vpcmpeqd, "pcmpeqd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x76), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed compare 32-bit integers for equality +INST3(vpcmpgtd, "pcmpgtd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x66), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed compare 32-bit signed integers for greater than +INST3(vpcmpeqq, "pcmpeqq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x29), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed compare 64-bit integers for equality +INST3(vpcmpgtq, "pcmpgtq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x37), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed compare 64-bit integers for equality +INST3(vpermq_reg, "permq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x36), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Permute 64-bit of input register +INST3(vpermpd_reg, "permpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x16), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Permute 64-bit of input register +INST3(vpermi2d, "permi2d", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x76), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Full Permute From Two Tables Overwriting the Index +INST3(vpermi2pd, "permi2pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x77), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Full Permute From Two Tables Overwriting the Index +INST3(vpermi2ps, "permi2ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x77), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Full Permute From Two Tables Overwriting the Index +INST3(vpermi2q, "permi2q", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x76), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Full Permute From Two Tables Overwriting the Index +INST3(vpermt2d, "permt2d", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x7E), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Full Permute From Two Tables Overwriting one Table +INST3(vpermt2pd, "permt2pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x7F), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Full Permute From Two Tables Overwriting one Table +INST3(vpermt2ps, "permt2ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x7F), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Full Permute From Two Tables Overwriting one Table +INST3(vpermt2q, "permt2q", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x7E), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Full Permute From Two Tables Overwriting one Table INST3(vpmaxsq, "pmaxsq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3D), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // packed maximum 64-bit signed integers INST3(vpmaxuq, "pmaxuq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x3F), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // packed maximum 64-bit unsigned integers INST3(vpminsq, "pminsq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x39), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // packed minimum 64-bit signed integers @@ -709,82 +709,82 @@ INST3(vpmovusqb, "pmovusqb", IUM_WR, PSSE38(0xF3, 0x12), BAD_ INST3(vpmovusqd, "pmovusqd", IUM_WR, PSSE38(0xF3, 0x15), BAD_CODE, PSSE38(0xF3, 0x15), INS_TT_HALF_MEM, Input_64Bit | REX_W0 | Encoding_EVEX) INST3(vpmovusqw, "pmovusqw", IUM_WR, PSSE38(0xF3, 0x14), BAD_CODE, PSSE38(0xF3, 0x14), INS_TT_QUARTER_MEM, Input_64Bit | REX_W0 | Encoding_EVEX) INST3(vporq, "porq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEB), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed bit-wise OR of two xmm regs -INST3(vprold, "prold", IUM_WR, BAD_CODE, PCKDBL(0x72), BAD_CODE, INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bit rotate left -INST3(vprolq, "prolq", IUM_WR, BAD_CODE, PCKDBL(0x72), BAD_CODE, INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bit rotate left -INST3(vprolvd, "prolvd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x15), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bit rotate left -INST3(vprolvq, "prolvq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x15), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bit rotate left -INST3(vprord, "prord", IUM_WR, BAD_CODE, PCKDBL(0x72), BAD_CODE, INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bit rotate right -INST3(vprorq, "prorq", IUM_WR, BAD_CODE, PCKDBL(0x72), BAD_CODE, INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bit rotate right -INST3(vprorvd, "prorvd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x14), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bit rotate right -INST3(vprorvq, "prorvq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x14), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bit rotate right -INST3(vpsraq, "psraq", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xE2), INS_TT_FULL | INS_TT_MEM128, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Packed shift right arithmetic of 64-bit integers +INST3(vprold, "prold", IUM_WR, BAD_CODE, PCKDBL(0x72), BAD_CODE, INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Bit rotate left +INST3(vprolq, "prolq", IUM_WR, BAD_CODE, PCKDBL(0x72), BAD_CODE, INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Bit rotate left +INST3(vprolvd, "prolvd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x15), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Bit rotate left +INST3(vprolvq, "prolvq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x15), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Bit rotate left +INST3(vprord, "prord", IUM_WR, BAD_CODE, PCKDBL(0x72), BAD_CODE, INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Bit rotate right +INST3(vprorq, "prorq", IUM_WR, BAD_CODE, PCKDBL(0x72), BAD_CODE, INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Bit rotate right +INST3(vprorvd, "prorvd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x14), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Bit rotate right +INST3(vprorvq, "prorvq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x14), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Bit rotate right +INST3(vpsraq, "psraq", IUM_WR, BAD_CODE, PCKDBL(0x72), PCKDBL(0xE2), INS_TT_FULL | INS_TT_MEM128, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed shift right arithmetic of 64-bit integers INST3(vpsravq, "psravq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x46), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Variable Bit Shift Right Arithmetic -INST3(vpternlogd, "pternlogd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x25), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bitwise Ternary Logic -INST3(vpternlogq, "pternlogq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x25), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Bitwise Ternary Logic -INST3(vptestmd, "ptestmd", IUM_RD, BAD_CODE, BAD_CODE, SSE38(0x27), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Logical AND and set mask -INST3(vptestmq, "ptestmq", IUM_RD, BAD_CODE, BAD_CODE, SSE38(0x27), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Logical AND and set mask -INST3(vptestnmd, "ptestnmd", IUM_RD, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x27), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Logical NAND and set mask -INST3(vptestnmq, "ptestnmq", IUM_RD, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x27), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Logical NAND and set mask +INST3(vpternlogd, "pternlogd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x25), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Bitwise Ternary Logic +INST3(vpternlogq, "pternlogq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x25), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Bitwise Ternary Logic +INST3(vptestmd, "ptestmd", IUM_RD, BAD_CODE, BAD_CODE, SSE38(0x27), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Logical AND and set mask +INST3(vptestmq, "ptestmq", IUM_RD, BAD_CODE, BAD_CODE, SSE38(0x27), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Logical AND and set mask +INST3(vptestnmd, "ptestnmd", IUM_RD, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x27), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Logical NAND and set mask +INST3(vptestnmq, "ptestnmq", IUM_RD, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x27), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Logical NAND and set mask INST3(vpxorq, "pxorq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xEF), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Packed bit-wise XOR of two xmm regs -INST3(vrangepd, "rangepd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x50), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Range restriction calculation from a pair of packed double-precision floating-point values -INST3(vrangeps, "rangeps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x50), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Range restriction calculation from a pair of packed single-precision floating-point values +INST3(vrangepd, "rangepd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x50), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Range restriction calculation from a pair of packed double-precision floating-point values +INST3(vrangeps, "rangeps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x50), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Range restriction calculation from a pair of packed single-precision floating-point values INST3(vrangesd, "rangesd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x51), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Range restriction calculation from a pair of scalar double-precision floating-point value INST3(vrangess, "rangess", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x51), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Range restriction calculation from a pair of scalar single-precision floating-point value -INST3(vrcp14pd, "rcp14pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4C), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX) // Compute approximate reciprocals of packed double-precision floating-point values -INST3(vrcp14ps, "rcp14ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4C), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX) // Compute approximate reciprocals of packed single-precision floating-point values +INST3(vrcp14pd, "rcp14pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4C), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Compute approximate reciprocals of packed double-precision floating-point values +INST3(vrcp14ps, "rcp14ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4C), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Compute approximate reciprocals of packed single-precision floating-point values INST3(vrcp14sd, "rcp14sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4D), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Compute approximate reciprocals of scalar double-precision floating-point value INST3(vrcp14ss, "rcp14ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4D), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Compute approximate reciprocals of scalar single-precision floating-point value -INST3(vreducepd, "reducepd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x56), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX) // Perform a reduction transformation on packed double-precision floating-point values -INST3(vreduceps, "reduceps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x56), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX) // Perform a reduction transformation on packed single-precision floating-point values +INST3(vreducepd, "reducepd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x56), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Perform a reduction transformation on packed double-precision floating-point values +INST3(vreduceps, "reduceps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x56), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Perform a reduction transformation on packed single-precision floating-point values INST3(vreducesd, "reducesd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x57), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Perform a reduction transformation on scalar double-precision floating-point value INST3(vreducess, "reducess", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x57), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Perform a reduction transformation on scalar single-precision floating-point value -INST3(vrndscalepd, "rndscalepd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x09), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX) // Round packed double-precision floating-point values to include a given number of fraction bits -INST3(vrndscaleps, "rndscaleps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x08), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX) // Round packed single-precision floating-point values to include a given number of fraction bits +INST3(vrndscalepd, "rndscalepd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x09), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Round packed double-precision floating-point values to include a given number of fraction bits +INST3(vrndscaleps, "rndscaleps", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x08), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Round packed single-precision floating-point values to include a given number of fraction bits INST3(vrndscalesd, "rndscalesd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0B), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Round scalar double-precision floating-point value to include a given number of fraction bits INST3(vrndscaless, "rndscaless", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0A), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Round scalar single-precision floating-point value to include a given number of fraction bits -INST3(vrsqrt14pd, "rsqrt14pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4E), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX) // Compute approximate reciprocals of square roots of packed double-precision floating-point values -INST3(vrsqrt14ps, "rsqrt14ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4E), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX) // Compute approximate reciprocals of square roots of packed single-precision floating-point values +INST3(vrsqrt14pd, "rsqrt14pd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4E), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Compute approximate reciprocals of square roots of packed double-precision floating-point values +INST3(vrsqrt14ps, "rsqrt14ps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4E), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Compute approximate reciprocals of square roots of packed single-precision floating-point values INST3(vrsqrt14sd, "rsqrt14sd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4F), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Compute approximate reciprocals of square roots of scalar double-precision floating-point value INST3(vrsqrt14ss, "rsqrt14ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x4F), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstSrcSrcAVXInstruction) // Compute approximate reciprocals of square roots of scalar single-precision floating-point value -INST3(vscalefpd, "scalefpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2C), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scale packed double-precision floating-point values -INST3(vscalefps, "scalefps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2C), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scale packed single-precision floating-point values +INST3(vscalefpd, "scalefpd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2C), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Scale packed double-precision floating-point values +INST3(vscalefps, "scalefps", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2C), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Scale packed single-precision floating-point values INST3(vscalefsd, "scalefsd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2D), INS_TT_TUPLE1_SCALAR, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scale scalar double-precision floating-point value INST3(vscalefss, "scalefss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x2D), INS_TT_TUPLE1_SCALAR, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Scale scalar single-precision floating-point value -INST3(vshuff32x4, "shuff32x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x23), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shuffle packed values at 128-bit granularity -INST3(vshuff64x2, "shuff64x2", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x23), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shuffle packed values at 128-bit granularity -INST3(vshufi32x4, "shufi32x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x43), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shuffle packed values at 128-bit granularity -INST3(vshufi64x2, "shufi64x2", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x43), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Shuffle packed values at 128-bit granularity +INST3(vshuff32x4, "shuff32x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x23), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Shuffle packed values at 128-bit granularity +INST3(vshuff64x2, "shuff64x2", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x23), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Shuffle packed values at 128-bit granularity +INST3(vshufi32x4, "shufi32x4", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x43), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Shuffle packed values at 128-bit granularity +INST3(vshufi64x2, "shufi64x2", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x43), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Shuffle packed values at 128-bit granularity // AVX512BW -INST3(kaddd, "kaddd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x4A), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Add two masks -INST3(kaddq, "kaddq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x4A), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Add two masks -INST3(kandd, "kandd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x41), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Bitwise logical AND masks -INST3(kandq, "kandq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x41), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Bitwise logical AND masks -INST3(kandnd, "kandnd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x42), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Bitwise logical AND NOT masks -INST3(kandnq, "kandnq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x42), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Bitwise logical AND NOT masks -INST3(kmovd_gpr, "kmovd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x92), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Move from and to mask registers -INST3(kmovd_msk, "kmovd", IUM_WR, PCKDBL(0x91), BAD_CODE, PCKDBL(0x90), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Move from and to mask registers -INST3(kmovq_gpr, "kmovq", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x92), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Move from and to mask registers -INST3(kmovq_msk, "kmovq", IUM_WR, PCKFLT(0x91), BAD_CODE, PCKFLT(0x90), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Move from and to mask registers -INST3(knotd, "knotd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x44), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // NOT mask register -INST3(knotq, "knotq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x44), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // NOT mask register -INST3(kord, "kord", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x45), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Bitwise logical OR masks -INST3(korq, "korq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x45), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Bitwise logical OR masks -INST3(kortestd, "kortestd", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x98), INS_TT_NONE, REX_W1 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // OR masks and set flags -INST3(kortestq, "kortestq", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x98), INS_TT_NONE, REX_W1 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // OR masks and set flags -INST3(kshiftld, "kshiftld", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x33), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Shift left mask registers -INST3(kshiftlq, "kshiftlq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x33), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Shift left mask registers -INST3(kshiftrd, "kshiftrd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x31), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Shift right mask registers -INST3(kshiftrq, "kshiftrq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x31), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Shift right mask registers -INST3(ktestd, "ktestd", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x99), INS_TT_NONE, REX_W1 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // Packed bit test masks and set flags -INST3(ktestq, "ktestq", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x99), INS_TT_NONE, REX_W1 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // Packed bit test masks and set flags -INST3(kunpckdq, "kunpckdq", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x4B), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Unpack for mask registers -INST3(kunpckwd, "kunpckwd", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x4B), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Unpack for mask registers -INST3(kxnord, "kxnord", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x46), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Bitwise logical XNOR masks -INST3(kxnorq, "kxnorq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x46), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Bitwise logical XNOR masks -INST3(kxord, "kxord", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x47), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Bitwise logical XOR masks -INST3(kxorq, "kxorq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x47), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Bitwise logical XOR masks -INST3(vpblendmd, "pblendmd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x64), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Byte vectors using an OpMask control +INST3(kaddd, "kaddd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x4A), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Add two masks +INST3(kaddq, "kaddq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x4A), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Add two masks +INST3(kandd, "kandd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x41), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Bitwise logical AND masks +INST3(kandq, "kandq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x41), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Bitwise logical AND masks +INST3(kandnd, "kandnd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x42), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Bitwise logical AND NOT masks +INST3(kandnq, "kandnq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x42), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Bitwise logical AND NOT masks +INST3(kmovd_gpr, "kmovd", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x92), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Move from and to mask registers +INST3(kmovd_msk, "kmovd", IUM_WR, PCKDBL(0x91), BAD_CODE, PCKDBL(0x90), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Move from and to mask registers +INST3(kmovq_gpr, "kmovq", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x92), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Move from and to mask registers +INST3(kmovq_msk, "kmovq", IUM_WR, PCKFLT(0x91), BAD_CODE, PCKFLT(0x90), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Move from and to mask registers +INST3(knotd, "knotd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x44), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // NOT mask register +INST3(knotq, "knotq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x44), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // NOT mask register +INST3(kord, "kord", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x45), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Bitwise logical OR masks +INST3(korq, "korq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x45), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Bitwise logical OR masks +INST3(kortestd, "kortestd", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x98), INS_TT_NONE, REX_W1 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // OR masks and set flags +INST3(kortestq, "kortestq", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x98), INS_TT_NONE, REX_W1 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // OR masks and set flags +INST3(kshiftld, "kshiftld", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x33), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Shift left mask registers +INST3(kshiftlq, "kshiftlq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x33), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Shift left mask registers +INST3(kshiftrd, "kshiftrd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x31), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Shift right mask registers +INST3(kshiftrq, "kshiftrq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x31), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Shift right mask registers +INST3(ktestd, "ktestd", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x99), INS_TT_NONE, REX_W1 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // Packed bit test masks and set flags +INST3(ktestq, "ktestq", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x99), INS_TT_NONE, REX_W1 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // Packed bit test masks and set flags +INST3(kunpckdq, "kunpckdq", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x4B), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Unpack for mask registers +INST3(kunpckwd, "kunpckwd", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x4B), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Unpack for mask registers +INST3(kxnord, "kxnord", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x46), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Bitwise logical XNOR masks +INST3(kxnorq, "kxnorq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x46), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Bitwise logical XNOR masks +INST3(kxord, "kxord", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x47), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Bitwise logical XOR masks +INST3(kxorq, "kxorq", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x47), INS_TT_NONE, REX_W1 | Encoding_VEX | KInstruction) // Bitwise logical XOR masks +INST3(vpblendmd, "pblendmd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x64), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction | INS_Flags_EmbeddedBroadcastSupported) // Blend Byte vectors using an OpMask control INST3(vpblendmw, "pblendmw", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x66), INS_TT_FULL_MEM, Input_16Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Word vectors using an OpMask control INST3(vdbpsadbw, "dbpsadbw", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x42), INS_TT_FULL_MEM, Input_8Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Double block packed Sum-Absolute-Differences (SAD) on unsigned bytes INST3(vmovdqu8, "movdqu8", IUM_WR, SSEFLT(0x7F), BAD_CODE, SSEFLT(0x6F), INS_TT_FULL_MEM, Input_8Bit | REX_W0 | Encoding_EVEX) @@ -818,43 +818,43 @@ INST3(vptestnmb, "ptestnmb", IUM_RD, BAD_CODE, BAD_ INST3(vptestnmw, "ptestnmw", IUM_RD, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x26), INS_TT_FULL_MEM, Input_16Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Logical NAND and set mask // AVX512CD -INST3(vpconflictd, "pconflictd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xC4), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX) // Detect conflicts within a vector of packed dword values into dense memory/register -INST3(vpconflictq, "pconflictq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xC4), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX) // Detect conflicts within a vector of packed qword values into dense memory/register -INST3(vplzcntd, "plzcntd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x44), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX) // Count the number of leading zero bits for packed dword values -INST3(vplzcntq, "plzcntq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x44), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX) // Count the number of leading zero bits for packed qword values +INST3(vpconflictd, "pconflictd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xC4), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Detect conflicts within a vector of packed dword values into dense memory/register +INST3(vpconflictq, "pconflictq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xC4), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Detect conflicts within a vector of packed qword values into dense memory/register +INST3(vplzcntd, "plzcntd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x44), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Count the number of leading zero bits for packed dword values +INST3(vplzcntq, "plzcntq", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x44), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // Count the number of leading zero bits for packed qword values // AVX512DQ -INST3(kaddb, "kaddb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x4A), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Add two masks -INST3(kaddw, "kaddw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x4A), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Add two masks -INST3(kandb, "kandb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x41), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Bitwise logical AND masks -INST3(kandnb, "kandnb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x42), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Bitwise logical AND NOT masks -INST3(kmovb_gpr, "kmovb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x92), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Move from and to mask registers -INST3(kmovb_msk, "kmovb", IUM_WR, PCKDBL(0x91), BAD_CODE, PCKDBL(0x90), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Move from and to mask registers -INST3(knotb, "knotb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x44), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // NOT mask register -INST3(korb, "korb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x45), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Bitwise logical OR masks -INST3(kortestb, "kortestb", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x98), INS_TT_NONE, REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // OR masks and set flags -INST3(kshiftlb, "kshiftlb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x32), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Shift left mask registers -INST3(kshiftrb, "kshiftrb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x30), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Shift right mask registers -INST3(ktestb, "ktestb", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x99), INS_TT_NONE, REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // Packed bit test masks and set flags -INST3(ktestw, "ktestw", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x99), INS_TT_NONE, REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // Packed bit test masks and set flags -INST3(kxnorb, "kxnorb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x46), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Bitwise logical XNOR masks -INST3(kxorb, "kxorb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x47), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Bitwise logical XOR masks +INST3(kaddb, "kaddb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x4A), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Add two masks +INST3(kaddw, "kaddw", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x4A), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Add two masks +INST3(kandb, "kandb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x41), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Bitwise logical AND masks +INST3(kandnb, "kandnb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x42), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Bitwise logical AND NOT masks +INST3(kmovb_gpr, "kmovb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x92), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Move from and to mask registers +INST3(kmovb_msk, "kmovb", IUM_WR, PCKDBL(0x91), BAD_CODE, PCKDBL(0x90), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Move from and to mask registers +INST3(knotb, "knotb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x44), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // NOT mask register +INST3(korb, "korb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x45), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Bitwise logical OR masks +INST3(kortestb, "kortestb", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x98), INS_TT_NONE, REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // OR masks and set flags +INST3(kshiftlb, "kshiftlb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x32), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Shift left mask registers +INST3(kshiftrb, "kshiftrb", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x30), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Shift right mask registers +INST3(ktestb, "ktestb", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x99), INS_TT_NONE, REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // Packed bit test masks and set flags +INST3(ktestw, "ktestw", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x99), INS_TT_NONE, REX_W0 | Encoding_VEX | Resets_OF | Resets_SF | Writes_ZF | Resets_AF | Resets_PF | Writes_CF | KInstruction) // Packed bit test masks and set flags +INST3(kxnorb, "kxnorb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x46), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Bitwise logical XNOR masks +INST3(kxorb, "kxorb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x47), INS_TT_NONE, REX_W0 | Encoding_VEX | KInstruction) // Bitwise logical XOR masks INST3(vbroadcastf32x2, "broadcastf32x2", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x19), INS_TT_TUPLE2, Input_32Bit | REX_W0 | Encoding_EVEX) // Broadcast packed float values read from memory to entire register INST3(vbroadcasti32x2, "broadcasti32x2", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x59), INS_TT_TUPLE2, Input_32Bit | REX_W0 | Encoding_EVEX) // Broadcast packed integer values read from memory to entire register INST3(vbroadcastf32x8, "broadcastf32x8", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x1B), INS_TT_TUPLE8, Input_32Bit | REX_W0 | Encoding_EVEX) // Broadcast packed float values read from memory to entire register INST3(vbroadcasti32x8, "broadcasti32x8", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x5B), INS_TT_TUPLE8, Input_32Bit | REX_W0 | Encoding_EVEX) // Broadcast packed integer values read from memory to entire register -INST3(vcvtpd2qq, "cvtpd2qq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7B), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt packed doubles to signed QWORDs -INST3(vcvtpd2uqq, "cvtpd2uqq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x79), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt packed doubles to unsigned QWORDs -INST3(vcvtps2qq, "cvtps2qq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7B), INS_TT_HALF, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt packed singles to signed QWORDs -INST3(vcvtps2uqq, "cvtps2uqq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x79), INS_TT_HALF, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt packed singles to unsigned QWORDs -INST3(vcvtqq2pd, "cvtqq2pd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xE6), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt packed signed QWORDs to doubles -INST3(vcvtqq2ps, "cvtqq2ps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5B), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt packed signed QWORDs to singles -INST3(vcvttpd2qq, "cvttpd2qq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7A), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt w/ truncation packed doubles to signed QWORDs -INST3(vcvttpd2uqq, "cvttpd2uqq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x78), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt w/ truncation packed doubles to unsigned QWORDs -INST3(vcvttps2qq, "cvttps2qq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7A), INS_TT_HALF, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt w/ truncation packed singles to signed QWORDs -INST3(vcvttps2uqq, "cvttps2uqq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x78), INS_TT_HALF, Input_32Bit | REX_W0 | Encoding_EVEX) // cvt w/ truncation packed singles to unsigned QWORDs -INST3(vcvtuqq2pd, "cvtuqq2pd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x7A), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt packed signed QWORDs to doubles -INST3(vcvtuqq2ps, "cvtuqq2ps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7A), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX) // cvt packed signed QWORDs to singles +INST3(vcvtpd2qq, "cvtpd2qq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7B), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt packed doubles to signed QWORDs +INST3(vcvtpd2uqq, "cvtpd2uqq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x79), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt packed doubles to unsigned QWORDs +INST3(vcvtps2qq, "cvtps2qq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7B), INS_TT_HALF, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt packed singles to signed QWORDs +INST3(vcvtps2uqq, "cvtps2uqq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x79), INS_TT_HALF, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt packed singles to unsigned QWORDs +INST3(vcvtqq2pd, "cvtqq2pd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xE6), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt packed signed QWORDs to doubles +INST3(vcvtqq2ps, "cvtqq2ps", IUM_WR, BAD_CODE, BAD_CODE, PCKFLT(0x5B), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt packed signed QWORDs to singles +INST3(vcvttpd2qq, "cvttpd2qq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7A), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt w/ truncation packed doubles to signed QWORDs +INST3(vcvttpd2uqq, "cvttpd2uqq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x78), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt w/ truncation packed doubles to unsigned QWORDs +INST3(vcvttps2qq, "cvttps2qq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7A), INS_TT_HALF, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt w/ truncation packed singles to signed QWORDs +INST3(vcvttps2uqq, "cvttps2uqq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x78), INS_TT_HALF, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt w/ truncation packed singles to unsigned QWORDs +INST3(vcvtuqq2pd, "cvtuqq2pd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0x7A), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt packed signed QWORDs to doubles +INST3(vcvtuqq2ps, "cvtuqq2ps", IUM_WR, BAD_CODE, BAD_CODE, SSEDBL(0x7A), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_EmbeddedBroadcastSupported) // cvt packed signed QWORDs to singles INST3(vextractf32x8, "extractf32x8", IUM_WR, SSE3A(0x1B), BAD_CODE, BAD_CODE, INS_TT_TUPLE8, Input_32Bit | REX_W0 | Encoding_EVEX) // Extract 256-bit packed double-precision floating point values INST3(vextractf64x2, "extractf64x2", IUM_WR, SSE3A(0x19), BAD_CODE, BAD_CODE, INS_TT_TUPLE2, Input_64Bit | REX_W1 | Encoding_EVEX) // Extract 256-bit packed double-precision floating point values INST3(vextracti32x8, "extracti32x8", IUM_WR, SSE3A(0x3B), BAD_CODE, BAD_CODE, INS_TT_TUPLE8, Input_32Bit | REX_W0 | Encoding_EVEX) // Extract 256-bit packed quadword integer values @@ -863,10 +863,10 @@ INST3(vinsertf32x8, "insertf32x8", IUM_WR, BAD_CODE, BAD_ INST3(vinsertf64x2, "insertf64x2", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x18), INS_TT_TUPLE2, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed double-precision floating point values INST3(vinserti32x8, "inserti32x8", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x3A), INS_TT_TUPLE8, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed quadword integer values INST3(vinserti64x2, "inserti64x2", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x38), INS_TT_TUPLE2, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Insert 256-bit packed quadword integer values -INST3(vpcmpd, "pcmpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1F), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_Is3OperandInstructionMask) -INST3(vpcmpq, "pcmpq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1F), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_Is3OperandInstructionMask) -INST3(vpcmpud, "pcmpud", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1E), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_Is3OperandInstructionMask) -INST3(vpcmpuq, "pcmpuq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1E), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_Is3OperandInstructionMask) +INST3(vpcmpd, "pcmpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1F), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_Is3OperandInstructionMask | INS_Flags_EmbeddedBroadcastSupported) +INST3(vpcmpq, "pcmpq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1F), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_Is3OperandInstructionMask | INS_Flags_EmbeddedBroadcastSupported) +INST3(vpcmpud, "pcmpud", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1E), INS_TT_FULL, Input_32Bit | REX_W0 | Encoding_EVEX | INS_Flags_Is3OperandInstructionMask | INS_Flags_EmbeddedBroadcastSupported) +INST3(vpcmpuq, "pcmpuq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x1E), INS_TT_FULL, Input_64Bit | REX_W1 | Encoding_EVEX | INS_Flags_Is3OperandInstructionMask | INS_Flags_EmbeddedBroadcastSupported) INST3(vpmovd2m, "pmovd2m", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x39), INS_TT_NONE, Input_32Bit | REX_W0 | Encoding_EVEX) INST3(vpmovm2d, "pmovm2d", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x38), INS_TT_NONE, Input_32Bit | REX_W0 | Encoding_EVEX) INST3(vpmovm2q, "pmovm2q", IUM_WR, BAD_CODE, BAD_CODE, PSSE38(0xF3, 0x38), INS_TT_NONE, Input_64Bit | REX_W1 | Encoding_EVEX) diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index aa645c5b4d7411..e8a7846d0a857a 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -360,10 +360,12 @@ RELEASE_CONFIG_INTEGER(EnableMultiRegLocals, W("EnableMultiRegLocals"), 1) // Disables inlining of all methods RELEASE_CONFIG_INTEGER(JitNoInline, W("JitNoInline"), 0) +// clang-format off + +#if defined(TARGET_AMD64) || defined(TARGET_X86) // Enable EVEX encoding for SIMD instructions when AVX-512VL is available. CONFIG_INTEGER(JitStressEvexEncoding, W("JitStressEvexEncoding"), 0) - -// clang-format off +#endif RELEASE_CONFIG_INTEGER(PreferredVectorBitWidth, W("PreferredVectorBitWidth"), 0) // The preferred decimal width, in bits, to use for any implicit vectorization emitted. A value less than 128 is treated as the system default. @@ -420,6 +422,9 @@ RELEASE_CONFIG_INTEGER(EnableArm64Sha256, W("EnableArm64Sha256"), RELEASE_CONFIG_INTEGER(EnableArm64Sve, W("EnableArm64Sve"), 1) // Allows Arm64 Sve+ hardware intrinsics to be disabled #endif +RELEASE_CONFIG_INTEGER(EnableEmbeddedBroadcast, W("EnableEmbeddedBroadcast"), 1) // Allows embedded broadcasts to be disabled +RELEASE_CONFIG_INTEGER(EnableEmbeddedMasking, W("EnableEmbeddedMasking"), 1) // Allows embedded masking to be disabled + // clang-format on #ifdef FEATURE_SIMD diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 156992b89e9d46..dd2bc51ffcfa24 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -1919,6 +1919,68 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) } // pre-AVX doesn't actually support these intrinsics in hardware so we need to swap the operands around + NamedIntrinsic newIntrinsicId = NI_Illegal; + + switch (intrinsicId) + { + case NI_SSE_CompareGreaterThan: + { + newIntrinsicId = NI_SSE_CompareLessThan; + break; + } + + case NI_SSE_CompareGreaterThanOrEqual: + { + newIntrinsicId = NI_SSE_CompareLessThanOrEqual; + break; + } + + case NI_SSE_CompareNotGreaterThan: + { + newIntrinsicId = NI_SSE_CompareNotLessThan; + break; + } + + case NI_SSE_CompareNotGreaterThanOrEqual: + { + newIntrinsicId = NI_SSE_CompareNotLessThanOrEqual; + break; + } + + case NI_SSE2_CompareGreaterThan: + { + newIntrinsicId = NI_SSE2_CompareLessThan; + break; + } + + case NI_SSE2_CompareGreaterThanOrEqual: + { + newIntrinsicId = NI_SSE2_CompareLessThanOrEqual; + break; + } + + case NI_SSE2_CompareNotGreaterThan: + { + newIntrinsicId = NI_SSE2_CompareNotLessThan; + break; + } + + case NI_SSE2_CompareNotGreaterThanOrEqual: + { + newIntrinsicId = NI_SSE2_CompareNotLessThanOrEqual; + break; + } + + default: + { + unreached(); + } + } + + assert(newIntrinsicId != NI_Illegal); + assert(intrinsicId != newIntrinsicId); + + node->ChangeHWIntrinsicId(newIntrinsicId); std::swap(node->Op(1), node->Op(2)); break; } @@ -1933,7 +1995,39 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) } assert(varTypeIsIntegral(node->GetSimdBaseType())); - // this isn't actually supported in hardware so we need to swap the operands around + // pre-AVX512 doesn't actually support these intrinsics in hardware so we need to swap the operands around + NamedIntrinsic newIntrinsicId = NI_Illegal; + + switch (intrinsicId) + { + case NI_SSE2_CompareLessThan: + { + newIntrinsicId = NI_SSE2_CompareGreaterThan; + break; + } + + case NI_SSE42_CompareLessThan: + { + newIntrinsicId = NI_SSE42_CompareGreaterThan; + break; + } + + case NI_AVX2_CompareLessThan: + { + newIntrinsicId = NI_AVX2_CompareGreaterThan; + break; + } + + default: + { + unreached(); + } + } + + assert(newIntrinsicId != NI_Illegal); + assert(intrinsicId != newIntrinsicId); + + node->ChangeHWIntrinsicId(newIntrinsicId); std::swap(node->Op(1), node->Op(2)); break; } @@ -2159,12 +2253,18 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm // node = * HWINTRINSIC simd T op_Equality GenTree* op1 = node->Op(1); + GenTree* op1Msk = op1; GenTree* op2 = node->Op(2); GenCondition cmpCnd = (cmpOp == GT_EQ) ? GenCondition::EQ : GenCondition::NE; + if (op1Msk->OperIsHWIntrinsic(NI_AVX512F_ConvertMaskToVector)) + { + op1Msk = op1Msk->AsHWIntrinsic()->Op(1); + assert(varTypeIsMask(op1Msk)); + } + if (!varTypeIsFloating(simdBaseType) && (simdSize != 64) && op2->IsVectorZero() && - comp->compOpportunisticallyDependsOn(InstructionSet_SSE41) && - !op1->OperIsHWIntrinsic(NI_AVX512F_ConvertMaskToVector)) + comp->compOpportunisticallyDependsOn(InstructionSet_SSE41) && !varTypeIsMask(op1Msk)) { // On SSE4.1 or higher we can optimize comparisons against zero to // just use PTEST. We can't support it for floating-point, however, @@ -2358,7 +2458,7 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm // This works since the upper bits are implicitly zero and so by inverting matches also become // zero, which in turn means that `AllBitsSet` will become `Zero` and other cases become non-zero - if (op1->OperIsHWIntrinsic(NI_AVX512F_ConvertMaskToVector) && op2->IsCnsVec()) + if (varTypeIsMask(op1Msk) && op2->IsCnsVec()) { // We want to specially handle the common cases of `mask op Zero` and `mask op AllBitsSet` // @@ -2367,7 +2467,7 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm // simply consume the mask directly and preserve the intended comparison by tweaking the // compare condition passed down into `KORTEST` - maskNode = op1->AsHWIntrinsic()->Op(1); + maskNode = op1Msk; assert(maskNode->TypeIs(TYP_MASK)); bool isHandled = false; @@ -2550,7 +2650,12 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm } BlockRange().Remove(op2); - BlockRange().Remove(op1); + + if (op1Msk != op1) + { + BlockRange().Remove(op1); + } + BlockRange().Remove(node); op1 = nullptr; @@ -3089,15 +3194,151 @@ GenTree* Lowering::LowerHWIntrinsicTernaryLogic(GenTreeHWIntrinsic* node) } } - if (!condition->OperIsHWIntrinsic(NI_AVX512F_ConvertMaskToVector)) + if (condition->OperIsHWIntrinsic(NI_AVX512F_ConvertMaskToVector)) { - break; + GenTree* tmp = condition->AsHWIntrinsic()->Op(1); + BlockRange().Remove(condition); + condition = tmp; } + else if (!varTypeIsMask(condition)) + { + if (!condition->OperIsHWIntrinsic()) + { + break; + } + + GenTreeHWIntrinsic* cndNode = condition->AsHWIntrinsic(); + NamedIntrinsic cndId = cndNode->GetHWIntrinsicId(); + + switch (cndId) + { + case NI_AVX_Compare: + { + cndId = NI_AVX512F_CompareMask; + break; + } + + case NI_SSE_CompareEqual: + case NI_SSE2_CompareEqual: + case NI_SSE41_CompareEqual: + case NI_AVX_CompareEqual: + case NI_AVX2_CompareEqual: + { + cndId = NI_AVX512F_CompareEqualMask; + break; + } + + case NI_SSE_CompareGreaterThan: + case NI_SSE2_CompareGreaterThan: + case NI_SSE42_CompareGreaterThan: + case NI_AVX_CompareGreaterThan: + case NI_AVX2_CompareGreaterThan: + { + cndId = NI_AVX512F_CompareGreaterThanMask; + break; + } + + case NI_SSE_CompareGreaterThanOrEqual: + case NI_SSE2_CompareGreaterThanOrEqual: + case NI_AVX_CompareGreaterThanOrEqual: + { + cndId = NI_AVX512F_CompareGreaterThanOrEqualMask; + break; + } + + case NI_SSE_CompareLessThan: + case NI_SSE2_CompareLessThan: + case NI_SSE42_CompareLessThan: + case NI_AVX_CompareLessThan: + case NI_AVX2_CompareLessThan: + { + cndId = NI_AVX512F_CompareLessThanMask; + break; + } + + case NI_SSE_CompareLessThanOrEqual: + case NI_SSE2_CompareLessThanOrEqual: + case NI_AVX_CompareLessThanOrEqual: + { + cndId = NI_AVX512F_CompareLessThanOrEqualMask; + break; + } + + case NI_SSE_CompareNotEqual: + case NI_SSE2_CompareNotEqual: + case NI_AVX_CompareNotEqual: + { + cndId = NI_AVX512F_CompareNotEqualMask; + break; + } + + case NI_SSE_CompareNotGreaterThan: + case NI_SSE2_CompareNotGreaterThan: + case NI_AVX_CompareNotGreaterThan: + { + cndId = NI_AVX512F_CompareGreaterThanMask; + break; + } + + case NI_SSE_CompareNotGreaterThanOrEqual: + case NI_SSE2_CompareNotGreaterThanOrEqual: + case NI_AVX_CompareNotGreaterThanOrEqual: + { + cndId = NI_AVX512F_CompareNotGreaterThanOrEqualMask; + break; + } + + case NI_SSE_CompareNotLessThan: + case NI_SSE2_CompareNotLessThan: + case NI_AVX_CompareNotLessThan: + { + cndId = NI_AVX512F_CompareNotLessThanMask; + break; + } + + case NI_SSE_CompareNotLessThanOrEqual: + case NI_SSE2_CompareNotLessThanOrEqual: + case NI_AVX_CompareNotLessThanOrEqual: + { + cndId = NI_AVX512F_CompareNotLessThanOrEqualMask; + break; + } + + case NI_SSE_CompareOrdered: + case NI_SSE2_CompareOrdered: + case NI_AVX_CompareOrdered: + { + cndId = NI_AVX512F_CompareOrderedMask; + break; + } - node->ResetHWIntrinsicId(NI_AVX512F_BlendVariableMask, comp, selectFalse, selectTrue, - condition->AsHWIntrinsic()->Op(1)); + case NI_SSE_CompareUnordered: + case NI_SSE2_CompareUnordered: + case NI_AVX_CompareUnordered: + { + cndId = NI_AVX512F_CompareUnorderedMask; + break; + } + + default: + { + assert(!HWIntrinsicInfo::ReturnsPerElementMask(cndId)); + cndId = NI_Illegal; + break; + } + } + + if (cndId == NI_Illegal) + { + break; + } + + cndNode->gtType = TYP_MASK; + cndNode->ChangeHWIntrinsicId(cndId); + } - BlockRange().Remove(condition); + assert(varTypeIsMask(condition)); + node->ResetHWIntrinsicId(NI_AVX512F_BlendVariableMask, comp, selectFalse, selectTrue, condition); BlockRange().Remove(op4); break; } @@ -8368,6 +8609,7 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre case NI_AVX2_ShuffleLow: case NI_AVX512F_AlignRight32: case NI_AVX512F_AlignRight64: + case NI_AVX512F_CompareMask: case NI_AVX512F_Fixup: case NI_AVX512F_GetMantissa: case NI_AVX512F_Permute2x64: @@ -8836,18 +9078,16 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre return false; } - case NI_Vector128_get_Zero: - case NI_Vector256_get_Zero: - { - // These are only containable as part of Sse41.Insert - return false; - } - case NI_SSE3_MoveAndDuplicate: case NI_AVX2_BroadcastScalarToVector128: case NI_AVX2_BroadcastScalarToVector256: case NI_AVX512F_BroadcastScalarToVector512: { + if (comp->opts.MinOpts() || !comp->canUseEmbeddedBroadcast()) + { + return false; + } + var_types parentBaseType = parentNode->GetSimdBaseType(); var_types childBaseType = hwintrinsic->GetSimdBaseType(); @@ -8930,8 +9170,15 @@ void Lowering::TryFoldCnsVecForEmbeddedBroadcast(GenTreeHWIntrinsic* parentNode, var_types simdType = parentNode->TypeGet(); var_types simdBaseType = parentNode->GetSimdBaseType(); CorInfoType simdBaseJitType = parentNode->GetSimdBaseJitType(); + unsigned simdSize = parentNode->GetSimdSize(); bool isCreatedFromScalar = true; - int elementCount = GenTreeVecCon::ElementCount(genTypeSize(simdType), simdBaseType); + + if (simdType == TYP_MASK) + { + simdType = Compiler::getSIMDTypeForSize(simdSize); + } + int elementCount = GenTreeVecCon::ElementCount(genTypeSize(simdType), simdBaseType); + switch (simdBaseType) { case TYP_FLOAT: @@ -10143,7 +10390,20 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) // contained and not a memory operand and know to invoke the special handling // so that the embedded masking can work as expected. + bool isEmbeddedMask = false; + if (op2->isEmbeddedMaskingCompatibleHWIntrinsic()) + { + isEmbeddedMask = !comp->opts.MinOpts() && comp->canUseEmbeddedMasking(); + + if (op2->isRMWHWIntrinsic(comp)) + { + // TODO-AVX512-CQ: Ensure we can support embedded operations on RMW intrinsics + isEmbeddedMask = false; + } + } + + if (isEmbeddedMask) { uint32_t maskSize = genTypeSize(simdBaseType); uint32_t operSize = genTypeSize(op2->AsHWIntrinsic()->GetSimdBaseType()); @@ -10157,6 +10417,8 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) { // When we are merging with zero, we can specialize // and avoid instantiating the vector constant. + + assert(!op2->TypeIs(TYP_MASK)); MakeSrcContained(node, op1); } break; @@ -10286,6 +10548,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) case NI_AVX2_Permute2x128: case NI_AVX512F_AlignRight32: case NI_AVX512F_AlignRight64: + case NI_AVX512F_CompareMask: case NI_AVX512F_GetMantissaScalar: case NI_AVX512F_InsertVector128: case NI_AVX512F_InsertVector256: diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 41b375acd43929..a05e880659db9c 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -3605,7 +3605,7 @@ int LinearScan::BuildDelayFreeUses(GenTree* node, else if (node->OperIsHWIntrinsic()) { assert(node->AsHWIntrinsic()->GetOperandCount() == 1); - use = BuildUse(node->AsHWIntrinsic()->Op(1), candidates); + return BuildDelayFreeUses(node->AsHWIntrinsic()->Op(1), rmwNode, candidates, useRefPositionRef); } #endif else if (!node->OperIsIndir()) diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 3d19214f8acbdd..8d376cff4e725c 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2543,7 +2543,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou for (GenTree* operand : op2->AsHWIntrinsic()->Operands()) { - assert(varTypeIsSIMD(operand)); + assert(varTypeIsSIMD(operand) || varTypeIsInt(operand)); srcCount += BuildDelayFreeUses(operand, op1); } } @@ -2556,7 +2556,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou for (GenTree* operand : op2->AsHWIntrinsic()->Operands()) { - assert(varTypeIsSIMD(operand)); + assert(varTypeIsSIMD(operand) || varTypeIsInt(operand)); srcCount += BuildOperandUses(operand); } } @@ -2789,10 +2789,11 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou if (dstCount == 1) { #if defined(TARGET_AMD64) - if (!intrinsicTree->isEvexCompatibleHWIntrinsic() && - (varTypeIsFloating(intrinsicTree->gtType) || varTypeIsSIMD(intrinsicTree->gtType))) + bool isEvexCompatible = intrinsicTree->isEvexCompatibleHWIntrinsic(); + + if (!isEvexCompatible) { - dstCandidates = lowSIMDRegs(); + dstCandidates = BuildEvexIncompatibleMask(intrinsicTree); } #endif @@ -3101,7 +3102,9 @@ void LinearScan::SetContainsAVXFlags(unsigned sizeOfSIMDVector /* = 0*/) inline regMaskTP LinearScan::BuildEvexIncompatibleMask(GenTree* tree) { #if defined(TARGET_AMD64) - if (!(varTypeIsFloating(tree->gtType) || varTypeIsSIMD(tree->gtType))) + assert(!varTypeIsMask(tree)); + + if (!varTypeIsFloating(tree->gtType) && !varTypeIsSIMD(tree->gtType)) { return RBM_NONE; } diff --git a/src/coreclr/jit/simdcodegenxarch.cpp b/src/coreclr/jit/simdcodegenxarch.cpp index aff087062f58fe..b344460a100abb 100644 --- a/src/coreclr/jit/simdcodegenxarch.cpp +++ b/src/coreclr/jit/simdcodegenxarch.cpp @@ -168,7 +168,7 @@ void CodeGen::genLoadIndTypeSimd12(GenTreeIndir* treeNode) { // Load and insert upper 4 bytes, 0x20 inserts to index 2 and 0x8 zeros index 3 GenTreeIndir indir = indirForm(TYP_SIMD16, addr); - emit->emitIns_SIMD_R_R_A_I(INS_insertps, EA_16BYTE, tgtReg, tgtReg, &indir, 0x28); + emit->emitIns_SIMD_R_R_A_I(INS_insertps, EA_16BYTE, tgtReg, tgtReg, &indir, 0x28, INS_OPTS_NONE); } else { @@ -323,7 +323,7 @@ void CodeGen::genEmitLoadLclTypeSimd12(regNumber tgtReg, unsigned lclNum, unsign emit->emitIns_R_S(INS_movsd_simd, EA_8BYTE, tgtReg, lclNum, offset); // Load and insert upper 4 byte, 0x20 inserts to index 2 and 0x8 zeros index 3 - emit->emitIns_SIMD_R_R_S_I(INS_insertps, EA_16BYTE, tgtReg, tgtReg, lclNum, offset + 8, 0x28); + emit->emitIns_SIMD_R_R_S_I(INS_insertps, EA_16BYTE, tgtReg, tgtReg, lclNum, offset + 8, 0x28, INS_OPTS_NONE); } else { @@ -530,7 +530,8 @@ void CodeGen::genSimd12UpperClear(regNumber tgtReg) // COUNT_D: 0b11 - Insert into element 3 // COUNT_S: 0b11 - Insert from element 3 - GetEmitter()->emitIns_SIMD_R_R_R_I(INS_insertps, EA_16BYTE, tgtReg, tgtReg, tgtReg, static_cast(0xF8)); + GetEmitter()->emitIns_SIMD_R_R_R_I(INS_insertps, EA_16BYTE, tgtReg, tgtReg, tgtReg, static_cast(0xF8), + INS_OPTS_NONE); } else { @@ -548,7 +549,7 @@ void CodeGen::genSimd12UpperClear(regNumber tgtReg) zroSimd12Elm3 = GetEmitter()->emitSimd16Const(constValue); } - GetEmitter()->emitIns_SIMD_R_R_C(INS_andps, EA_16BYTE, tgtReg, tgtReg, zroSimd12Elm3, 0); + GetEmitter()->emitIns_SIMD_R_R_C(INS_andps, EA_16BYTE, tgtReg, tgtReg, zroSimd12Elm3, 0, INS_OPTS_NONE); } } diff --git a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs index 20b8942132da02..ed85a8453a61cf 100644 --- a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs +++ b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_X86.cs @@ -22,21 +22,37 @@ // * outputDirectory - This should be somewhere under the obj folder for the project and is where generated tests are written // * testListFileName - This should likewise be somewhere under the obj folder and is where the list of generated tests is written -const string AlternatingOpTest_ValidationLogic = @"for (var i = 0; i < RetElementCount; i += 2) +const string AlternatingBinOpTest_ValidationLogic = @"for (var i = 0; i < RetElementCount; i += 2) { - if ({ValidateFirstResult}) + if ({ValidateFirstResult} && (!mask.HasValue || ((left[i] != default) && result[i] != mask.Value))) { succeeded = false; break; } - if ({ValidateRemainingResults}) + if ({ValidateRemainingResults} && (!mask.HasValue || ((left[i + 1] != default) && result[i + 1] != mask.Value))) { succeeded = false; break; } }"; +const string AlternatingTernOpTest_ValidationLogic = @"for (var i = 0; i < RetElementCount; i += 2) + { + if ({ValidateFirstResult} && (!mask.HasValue || ((firstOp[0] != default) && result[0] != mask.Value))) + { + succeeded = false; + break; + } + + if ({ValidateRemainingResults} && (!mask.HasValue || ((firstOp[i] != default) && result[i] != mask.Value))) + { + succeeded = false; + break; + } + }"; + + const string BooleanCmpTest_ValidationLogic = @"if ({ValidateFirstResult}) { succeeded = false; @@ -75,13 +91,13 @@ var i2 = i1 + (8 / sizeof({RetBaseType})); var i3 = (outer * (16 / sizeof({RetBaseType}))) + (inner * 2); - if ({ValidateFirstResult}) + if ({ValidateFirstResult} && (!mask.HasValue || ((left[i1] != default) && result[i1] != mask.Value))) { succeeded = false; break; } - if ({ValidateRemainingResults}) + if ({ValidateRemainingResults} && (!mask.HasValue || ((left[i2] != default) && result[i2] != mask.Value))) { succeeded = false; break; @@ -89,7 +105,7 @@ } }"; -const string SimpleOpTest_ValidationLogic = @"if ({ValidateFirstResult}) +const string SimpleUnOpTest_ValidationLogic = @"if ({ValidateFirstResult} && (!mask.HasValue || ((firstOp[0] != default) && result[0] != mask.Value))) { succeeded = false; } @@ -97,7 +113,39 @@ { for (var i = 1; i < RetElementCount; i++) { - if ({ValidateRemainingResults}) + if ({ValidateRemainingResults} && (!mask.HasValue || ((firstOp[i] != default) && result[i] != mask.Value))) + { + succeeded = false; + break; + } + } + }"; + +const string SimpleBinOpTest_ValidationLogic = @"if ({ValidateFirstResult} && (!mask.HasValue || ((left[0] != default) && result[0] != mask.Value))) + { + succeeded = false; + } + else + { + for (var i = 1; i < RetElementCount; i++) + { + if ({ValidateRemainingResults} && (!mask.HasValue || ((left[i] != default) && result[i] != mask.Value))) + { + succeeded = false; + break; + } + } + }"; + +const string SimpleTernOpTest_ValidationLogic = @"if ({ValidateFirstResult} && (!mask.HasValue || ((firstOp[0] != default) && result[0] != mask.Value))) + { + succeeded = false; + } + else + { + for (var i = 1; i < RetElementCount; i++) + { + if ({ValidateRemainingResults} && (!mask.HasValue || ((firstOp[i] != default) && result[i] != mask.Value))) { succeeded = false; break; @@ -107,16 +155,16 @@ (string templateFileName, string outputTemplateName, Dictionary templateData)[] Templates = new[] { - ("_BinaryOpTestTemplate.template", "AlternatingBinOpTest.template", new Dictionary { ["TemplateName"] = "Alternating", ["TemplateValidationLogic"] = AlternatingOpTest_ValidationLogic }), + ("_BinaryOpTestTemplate.template", "AlternatingBinOpTest.template", new Dictionary { ["TemplateName"] = "Alternating", ["TemplateValidationLogic"] = AlternatingBinOpTest_ValidationLogic }), ("_BinaryOpTestTemplate.template", "HorizontalBinOpTest.template", new Dictionary { ["TemplateName"] = "Horizontal", ["TemplateValidationLogic"] = HorizontalOpTest_ValidationLogic }), - ("_BinaryOpTestTemplate.template", "SimpleBinOpTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleOpTest_ValidationLogic }), + ("_BinaryOpTestTemplate.template", "SimpleBinOpTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleBinOpTest_ValidationLogic }), ("_BooleanBinaryOpTestTemplate.template", "BooleanBinOpTest.template", new Dictionary { ["TemplateName"] = "Boolean", ["TemplateValidationLogic"] = BooleanOpTest_ValidationLogic }), ("_BooleanBinaryOpTestTemplate.template", "BooleanCmpOpTest.template", new Dictionary { ["TemplateName"] = "Boolean", ["TemplateValidationLogic"] = BooleanCmpTest_ValidationLogic }), ("_BooleanBinaryOpTestTemplate.template", "BooleanTwoCmpOpTest.template", new Dictionary { ["TemplateName"] = "Boolean", ["TemplateValidationLogic"] = BooleanTwoCmpTest_ValidationLogic }), ("_BooleanUnaryOpTestTemplate.template", "BooleanUnOpTest.template", new Dictionary { ["TemplateName"] = "Boolean", ["TemplateValidationLogic"] = BooleanOpTest_ValidationLogic }), - ("_TernaryOpTestTemplate.template", "AlternatingTernOpTest.template", new Dictionary { ["TemplateName"] = "Alternating", ["TemplateValidationLogic"] = AlternatingOpTest_ValidationLogic }), - ("_TernaryOpTestTemplate.template", "SimpleTernOpTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleOpTest_ValidationLogic }), - ("_UnaryOpTestTemplate.template", "SimpleUnOpTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleOpTest_ValidationLogic }), + ("_TernaryOpTestTemplate.template", "AlternatingTernOpTest.template", new Dictionary { ["TemplateName"] = "Alternating", ["TemplateValidationLogic"] = AlternatingTernOpTest_ValidationLogic }), + ("_TernaryOpTestTemplate.template", "SimpleTernOpTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleTernOpTest_ValidationLogic }), + ("_UnaryOpTestTemplate.template", "SimpleUnOpTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleUnOpTest_ValidationLogic }), }; (string templateFileName, Dictionary templateData)[] Sse1Inputs = new [] @@ -331,9 +379,9 @@ ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "Or", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "(ushort)(left[0] | right[0]) != result[0]", ["ValidateRemainingResults"] = "(ushort)(left[i] | right[i]) != result[i]"}), ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "Or", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "(uint)(left[0] | right[0]) != result[0]", ["ValidateRemainingResults"] = "(uint)(left[i] | right[i]) != result[i]"}), ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "Or", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["ValidateFirstResult"] = "(ulong)(left[0] | right[0]) != result[0]", ["ValidateRemainingResults"] = "(ulong)(left[i] | right[i]) != result[i]"}), - ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "PackSignedSaturate", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "result[0] != (short)Math.Min(Math.Max(left[0], short.MinValue), short.MaxValue)", ["ValidateRemainingResults"] = "result[i] != ((i < 4) ? (short)Math.Min(Math.Max(left[i], short.MinValue), short.MaxValue) : (short)Math.Min(Math.Max(right[i&3], short.MinValue), short.MaxValue))"}), - ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "PackSignedSaturate", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int16", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "result[0] != (sbyte)Math.Min(Math.Max(left[0], sbyte.MinValue), sbyte.MaxValue)", ["ValidateRemainingResults"] = "result[i] != ((i < 8) ? (sbyte)Math.Min(Math.Max(left[i], sbyte.MinValue), sbyte.MaxValue) : (sbyte)Math.Min(Math.Max(right[i&7], sbyte.MinValue), sbyte.MaxValue))"}), - ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "PackUnsignedSaturate", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int16", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "result[0] != (byte)Math.Min(Math.Max(left[0], byte.MinValue), byte.MaxValue)", ["ValidateRemainingResults"] = "result[i] != ((i < 8) ? (byte)Math.Min(Math.Max(left[i], byte.MinValue), byte.MaxValue) : (byte)Math.Min(Math.Max(right[i&7], byte.MinValue), byte.MaxValue))"}), + ("ComplexBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "PackSignedSaturate", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "result[0] != (short)Math.Min(Math.Max(left[0], short.MinValue), short.MaxValue)", ["ValidateRemainingResults"] = "result[i] != ((i < 4) ? (short)Math.Min(Math.Max(left[i], short.MinValue), short.MaxValue) : (short)Math.Min(Math.Max(right[i&3], short.MinValue), short.MaxValue))"}), + ("ComplexBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "PackSignedSaturate", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int16", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "result[0] != (sbyte)Math.Min(Math.Max(left[0], sbyte.MinValue), sbyte.MaxValue)", ["ValidateRemainingResults"] = "result[i] != ((i < 8) ? (sbyte)Math.Min(Math.Max(left[i], sbyte.MinValue), sbyte.MaxValue) : (sbyte)Math.Min(Math.Max(right[i&7], sbyte.MinValue), sbyte.MaxValue))"}), + ("ComplexBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "PackUnsignedSaturate", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int16", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "result[0] != (byte)Math.Min(Math.Max(left[0], byte.MinValue), byte.MaxValue)", ["ValidateRemainingResults"] = "result[i] != ((i < 8) ? (byte)Math.Min(Math.Max(left[i], byte.MinValue), byte.MaxValue) : (byte)Math.Min(Math.Max(right[i&7], byte.MinValue), byte.MaxValue))"}), ("ImmUnOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "ShiftLeftLogical", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int16", ["Imm"] = "1", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "(short)(firstOp[0] << 1) != result[0]", ["ValidateRemainingResults"] = "(short)(firstOp[i] << 1) != result[i]"}), ("ImmUnOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "ShiftLeftLogical", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt16", ["Imm"] = "1", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "(ushort)(firstOp[0] << 1) != result[0]", ["ValidateRemainingResults"] = "(ushort)(firstOp[i] << 1) != result[i]"}), ("ImmUnOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "ShiftLeftLogical", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32", ["Imm"] = "1", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "(int)(firstOp[0] << 1) != result[0]", ["ValidateRemainingResults"] = "(int)(firstOp[i] << 1) != result[i]"}), @@ -409,24 +457,24 @@ ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "SubtractSaturate", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "SseVerify.SubtractSaturate(left[0], right[0], result[0])", ["ValidateRemainingResults"] = "SseVerify.SubtractSaturate(left[i], right[i], result[i])"}), ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "SubtractScalar", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(left[0] - right[0]) != BitConverter.DoubleToInt64Bits(result[0])", ["ValidateRemainingResults"] = "BitConverter.DoubleToInt64Bits(left[i]) != BitConverter.DoubleToInt64Bits(result[i])"}), ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "SumAbsoluteDifferences", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != SseVerify.SumAbsoluteDifferences(left, right, 0)", ["ValidateRemainingResults"] = "result[i] != SseVerify.SumAbsoluteDifferences(left, right, i)"}), - ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "UnpackHigh", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(result[0]) != BitConverter.DoubleToInt64Bits(left[1])", ["ValidateRemainingResults"] = "((i & 1) == 0) ? BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits(left[i/2+1]) : BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits(right[(i - 1)/2 + 1])"}), - ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "UnpackHigh", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()", ["ValidateFirstResult"] = "result[0] != left[1] || result[1] != right[1]", ["ValidateRemainingResults"] = "((i & 1) == 0) ? result[i] != left[i/2 + 1] : result[i] != right[(i - 1)/2 + 1]"}), - ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "UnpackHigh", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["ValidateFirstResult"] = "result[0] != left[1] || result[1] != right[1]", ["ValidateRemainingResults"] = "((i & 1) == 0) ? result[i] != left[i/2 + 1] : result[i] != right[(i - 1)/2 + 1]"}), - ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "UnpackHigh", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "result[0] != left[2]", ["ValidateRemainingResults"] = "((i & 1) == 0) ? result[i] != left[i/2 + 2] : result[i] != right[(i - 1)/2 + 2]"}), - ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "UnpackHigh", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "result[0] != left[2]", ["ValidateRemainingResults"] = "((i & 1) == 0) ? result[i] != left[i/2 + 2] : result[i] != right[(i - 1)/2 + 2]"}), - ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "UnpackHigh", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int16", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "result[0] != left[4]", ["ValidateRemainingResults"] = "((i & 1) == 0) ? result[i] != left[i/2 + 4] : result[i] != right[(i - 1)/2 + 4]"}), - ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "UnpackHigh", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "result[0] != left[4]", ["ValidateRemainingResults"] = "((i & 1) == 0) ? result[i] != left[i/2 + 4] : result[i] != right[(i - 1)/2 + 4]"}), - ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "UnpackHigh", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["ValidateFirstResult"] = "result[0] != left[8]", ["ValidateRemainingResults"] = "((i & 1) == 0) ? result[i] != left[i/2 + 8] : result[i] != right[(i - 1)/2 + 8]"}), - ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "UnpackHigh", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != left[8]", ["ValidateRemainingResults"] = "((i & 1) == 0) ? result[i] != left[i/2 + 8] : result[i] != right[(i - 1)/2 + 8]"}), - ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "UnpackLow", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(result[0]) != BitConverter.DoubleToInt64Bits(left[0])", ["ValidateRemainingResults"] = "((i & 1) == 0) ? BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits(left[i/2]) : BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits(right[(i - 1)/2])"}), - ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "UnpackLow", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()", ["ValidateFirstResult"] = "result[0] != left[0] || result[1] != right[0]", ["ValidateRemainingResults"] = "((i & 1) == 0) ? result[i] != left[i/2] : result[i] != right[(i - 1)/2]"}), - ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "UnpackLow", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["ValidateFirstResult"] = "result[0] != left[0] || result[1] != right[0]", ["ValidateRemainingResults"] = "((i & 1) == 0) ? result[i] != left[i/2] : result[i] != right[(i - 1)/2]"}), - ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "UnpackLow", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "result[0] != left[0]", ["ValidateRemainingResults"] = "((i & 1) == 0) ? result[i] != left[i/2] : result[i] != right[(i - 1)/2]"}), - ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "UnpackLow", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "result[0] != left[0]", ["ValidateRemainingResults"] = "((i & 1) == 0) ? result[i] != left[i/2] : result[i] != right[(i - 1)/2]"}), - ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "UnpackLow", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int16", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "result[0] != left[0]", ["ValidateRemainingResults"] = "((i & 1) == 0) ? result[i] != left[i/2] : result[i] != right[(i - 1)/2]"}), - ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "UnpackLow", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "result[0] != left[0]", ["ValidateRemainingResults"] = "((i & 1) == 0) ? result[i] != left[i/2] : result[i] != right[(i - 1)/2]"}), - ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "UnpackLow", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["ValidateFirstResult"] = "result[0] != left[0]", ["ValidateRemainingResults"] = "((i & 1) == 0) ? result[i] != left[i/2] : result[i] != right[(i - 1)/2]"}), - ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "UnpackLow", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != left[0]", ["ValidateRemainingResults"] = "((i & 1) == 0) ? result[i] != left[i/2] : result[i] != right[(i - 1)/2]"}), + ("ComplexBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "UnpackHigh", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(result[0]) != BitConverter.DoubleToInt64Bits(left[1])", ["ValidateRemainingResults"] = "((i & 1) == 0) ? BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits(left[i/2+1]) : BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits(right[(i - 1)/2 + 1])"}), + ("ComplexBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "UnpackHigh", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()", ["ValidateFirstResult"] = "result[0] != left[1] || result[1] != right[1]", ["ValidateRemainingResults"] = "((i & 1) == 0) ? result[i] != left[i/2 + 1] : result[i] != right[(i - 1)/2 + 1]"}), + ("ComplexBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "UnpackHigh", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["ValidateFirstResult"] = "result[0] != left[1] || result[1] != right[1]", ["ValidateRemainingResults"] = "((i & 1) == 0) ? result[i] != left[i/2 + 1] : result[i] != right[(i - 1)/2 + 1]"}), + ("ComplexBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "UnpackHigh", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "result[0] != left[2]", ["ValidateRemainingResults"] = "((i & 1) == 0) ? result[i] != left[i/2 + 2] : result[i] != right[(i - 1)/2 + 2]"}), + ("ComplexBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "UnpackHigh", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "result[0] != left[2]", ["ValidateRemainingResults"] = "((i & 1) == 0) ? result[i] != left[i/2 + 2] : result[i] != right[(i - 1)/2 + 2]"}), + ("ComplexBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "UnpackHigh", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int16", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "result[0] != left[4]", ["ValidateRemainingResults"] = "((i & 1) == 0) ? result[i] != left[i/2 + 4] : result[i] != right[(i - 1)/2 + 4]"}), + ("ComplexBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "UnpackHigh", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "result[0] != left[4]", ["ValidateRemainingResults"] = "((i & 1) == 0) ? result[i] != left[i/2 + 4] : result[i] != right[(i - 1)/2 + 4]"}), + ("ComplexBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "UnpackHigh", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["ValidateFirstResult"] = "result[0] != left[8]", ["ValidateRemainingResults"] = "((i & 1) == 0) ? result[i] != left[i/2 + 8] : result[i] != right[(i - 1)/2 + 8]"}), + ("ComplexBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "UnpackHigh", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != left[8]", ["ValidateRemainingResults"] = "((i & 1) == 0) ? result[i] != left[i/2 + 8] : result[i] != right[(i - 1)/2 + 8]"}), + ("ComplexBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "UnpackLow", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(result[0]) != BitConverter.DoubleToInt64Bits(left[0])", ["ValidateRemainingResults"] = "((i & 1) == 0) ? BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits(left[i/2]) : BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits(right[(i - 1)/2])"}), + ("ComplexBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "UnpackLow", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()", ["ValidateFirstResult"] = "result[0] != left[0] || result[1] != right[0]", ["ValidateRemainingResults"] = "((i & 1) == 0) ? result[i] != left[i/2] : result[i] != right[(i - 1)/2]"}), + ("ComplexBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "UnpackLow", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["ValidateFirstResult"] = "result[0] != left[0] || result[1] != right[0]", ["ValidateRemainingResults"] = "((i & 1) == 0) ? result[i] != left[i/2] : result[i] != right[(i - 1)/2]"}), + ("ComplexBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "UnpackLow", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "result[0] != left[0]", ["ValidateRemainingResults"] = "((i & 1) == 0) ? result[i] != left[i/2] : result[i] != right[(i - 1)/2]"}), + ("ComplexBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "UnpackLow", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "result[0] != left[0]", ["ValidateRemainingResults"] = "((i & 1) == 0) ? result[i] != left[i/2] : result[i] != right[(i - 1)/2]"}), + ("ComplexBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "UnpackLow", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int16", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "result[0] != left[0]", ["ValidateRemainingResults"] = "((i & 1) == 0) ? result[i] != left[i/2] : result[i] != right[(i - 1)/2]"}), + ("ComplexBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "UnpackLow", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "result[0] != left[0]", ["ValidateRemainingResults"] = "((i & 1) == 0) ? result[i] != left[i/2] : result[i] != right[(i - 1)/2]"}), + ("ComplexBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "UnpackLow", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["ValidateFirstResult"] = "result[0] != left[0]", ["ValidateRemainingResults"] = "((i & 1) == 0) ? result[i] != left[i/2] : result[i] != right[(i - 1)/2]"}), + ("ComplexBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "UnpackLow", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != left[0]", ["ValidateRemainingResults"] = "((i & 1) == 0) ? result[i] != left[i/2] : result[i] != right[(i - 1)/2]"}), ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "Xor", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "(BitConverter.DoubleToInt64Bits(left[0]) ^ BitConverter.DoubleToInt64Bits(right[0])) != BitConverter.DoubleToInt64Bits(result[0])", ["ValidateRemainingResults"] = "(BitConverter.DoubleToInt64Bits(left[i]) ^ BitConverter.DoubleToInt64Bits(right[i])) != BitConverter.DoubleToInt64Bits(result[i])"}), ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "Xor", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "(byte)(left[0] ^ right[0]) != result[0]", ["ValidateRemainingResults"] = "(byte)(left[i] ^ right[i]) != result[i]"}), ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Sse2", ["LoadIsa"] = "Sse2", ["Method"] = "Xor", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int16", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "(short)(left[0] ^ right[0]) != result[0]", ["ValidateRemainingResults"] = "(short)(left[i] ^ right[i]) != result[i]"}), @@ -497,16 +545,16 @@ (string templateFileName, Dictionary templateData)[] Sse41Inputs = new [] { - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["NextValueOp3"] = "(byte)(((i & 1) == 0) ? 128 : 1)", ["ValidateFirstResult"] = "((thirdOp[0] >> 7) & 1) == 1 ? secondOp[0] != result[0] : firstOp[0] != result[0]", ["ValidateRemainingResults"] = "((thirdOp[i] >> 7) & 1) == 1 ? secondOp[i] != result[i] : firstOp[i] != result[i]"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "(double)(((i & 1) == 0) ? -0.0 : 1.0)", ["ValidateFirstResult"] = "((BitConverter.DoubleToInt64Bits(thirdOp[0]) >> 63) & 1) == 1 ? BitConverter.DoubleToInt64Bits(secondOp[0]) != BitConverter.DoubleToInt64Bits(result[0]) : BitConverter.DoubleToInt64Bits(firstOp[0]) != BitConverter.DoubleToInt64Bits(result[0])", ["ValidateRemainingResults"] = "((BitConverter.DoubleToInt64Bits(thirdOp[i]) >> 63) & 1) == 1 ? BitConverter.DoubleToInt64Bits(secondOp[i]) != BitConverter.DoubleToInt64Bits(result[i]) : BitConverter.DoubleToInt64Bits(firstOp[i]) != BitConverter.DoubleToInt64Bits(result[i])"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte", ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "(sbyte)(((i & 1) == 0) ? -128 : 1)", ["ValidateFirstResult"] = "((thirdOp[0] >> 7) & 1) == 1 ? secondOp[0] != result[0] : firstOp[0] != result[0]", ["ValidateRemainingResults"] = "((thirdOp[i] >> 7) & 1) == 1 ? secondOp[i] != result[i] : firstOp[i] != result[i]"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "(float)(((i & 1) == 0) ? -0.0 : 1.0)", ["ValidateFirstResult"] = "((BitConverter.SingleToInt32Bits(thirdOp[0]) >> 31) & 1) == 1 ? BitConverter.SingleToInt32Bits(secondOp[0]) != BitConverter.SingleToInt32Bits(result[0]) : BitConverter.SingleToInt32Bits(firstOp[0]) != BitConverter.SingleToInt32Bits(result[0])", ["ValidateRemainingResults"] = "((BitConverter.SingleToInt32Bits(thirdOp[i]) >> 31) & 1) == 1 ? BitConverter.SingleToInt32Bits(secondOp[i]) != BitConverter.SingleToInt32Bits(result[i]) : BitConverter.SingleToInt32Bits(firstOp[i]) != BitConverter.SingleToInt32Bits(result[i])"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int16", ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int16", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Int16", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp3"] = "(((i & 1) == 0) ? Convert.ToInt16(\"0xFFFF\", 16) : (short)0)", ["ValidateFirstResult"] = "(thirdOp[0] != 0) ? secondOp[0] != result[0] : firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(thirdOp[i] != 0) ? secondOp[i] != result[i] : firstOp[i] != result[i]"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt16", ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "(((i & 1) == 0) ? Convert.ToUInt16(\"0xFFFF\", 16) : (ushort)0)", ["ValidateFirstResult"] = "(thirdOp[0] != 0) ? secondOp[0] != result[0] : firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(thirdOp[i] != 0) ? secondOp[i] != result[i] : firstOp[i] != result[i]"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32", ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Int32", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp3"] = "(((i & 1) == 0) ? Convert.ToInt32(\"0xFFFFFFFF\", 16) : (int)0)", ["ValidateFirstResult"] = "(thirdOp[0] != 0) ? secondOp[0] != result[0] : firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(thirdOp[i] != 0) ? secondOp[i] != result[i] : firstOp[i] != result[i]"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32", ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "UInt32", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp3"] = "(((i & 1) == 0) ? Convert.ToUInt32(\"0xFFFFFFFF\", 16) : (uint)0)", ["ValidateFirstResult"] = "(thirdOp[0] != 0) ? secondOp[0] != result[0] : firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(thirdOp[i] != 0) ? secondOp[i] != result[i] : firstOp[i] != result[i]"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int64", ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Int64", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp3"] = "(((i & 1) == 0) ? Convert.ToInt64(\"0xFFFFFFFFFFFFFFFF\", 16) : (long)0)", ["ValidateFirstResult"] = "(thirdOp[0] != 0) ? secondOp[0] != result[0] : firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(thirdOp[i] != 0) ? secondOp[i] != result[i] : firstOp[i] != result[i]"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt64", ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "UInt64", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp3"] = "(((i & 1) == 0) ? Convert.ToUInt64(\"0xFFFFFFFFFFFFFFFF\", 16): (ulong)0)", ["ValidateFirstResult"] = "(thirdOp[0] != 0) ? secondOp[0] != result[0] : firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(thirdOp[i] != 0) ? secondOp[i] != result[i] : firstOp[i] != result[i]"}), + ("ComplexTernOpTest.template", new Dictionary { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Byte", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["NextValueOp3"] = "(byte)(((i & 1) == 0) ? 128 : 1)", ["ValidateFirstResult"] = "((thirdOp[0] >> 7) & 1) == 1 ? secondOp[0] != result[0] : firstOp[0] != result[0]", ["ValidateRemainingResults"] = "((thirdOp[i] >> 7) & 1) == 1 ? secondOp[i] != result[i] : firstOp[i] != result[i]"}), + ("ComplexTernOpTest.template", new Dictionary { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "(double)(((i & 1) == 0) ? -0.0 : 1.0)", ["ValidateFirstResult"] = "((BitConverter.DoubleToInt64Bits(thirdOp[0]) >> 63) & 1) == 1 ? BitConverter.DoubleToInt64Bits(secondOp[0]) != BitConverter.DoubleToInt64Bits(result[0]) : BitConverter.DoubleToInt64Bits(firstOp[0]) != BitConverter.DoubleToInt64Bits(result[0])", ["ValidateRemainingResults"] = "((BitConverter.DoubleToInt64Bits(thirdOp[i]) >> 63) & 1) == 1 ? BitConverter.DoubleToInt64Bits(secondOp[i]) != BitConverter.DoubleToInt64Bits(result[i]) : BitConverter.DoubleToInt64Bits(firstOp[i]) != BitConverter.DoubleToInt64Bits(result[i])"}), + ("ComplexTernOpTest.template", new Dictionary { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte", ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "(sbyte)(((i & 1) == 0) ? -128 : 1)", ["ValidateFirstResult"] = "((thirdOp[0] >> 7) & 1) == 1 ? secondOp[0] != result[0] : firstOp[0] != result[0]", ["ValidateRemainingResults"] = "((thirdOp[i] >> 7) & 1) == 1 ? secondOp[i] != result[i] : firstOp[i] != result[i]"}), + ("ComplexTernOpTest.template", new Dictionary { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "(float)(((i & 1) == 0) ? -0.0 : 1.0)", ["ValidateFirstResult"] = "((BitConverter.SingleToInt32Bits(thirdOp[0]) >> 31) & 1) == 1 ? BitConverter.SingleToInt32Bits(secondOp[0]) != BitConverter.SingleToInt32Bits(result[0]) : BitConverter.SingleToInt32Bits(firstOp[0]) != BitConverter.SingleToInt32Bits(result[0])", ["ValidateRemainingResults"] = "((BitConverter.SingleToInt32Bits(thirdOp[i]) >> 31) & 1) == 1 ? BitConverter.SingleToInt32Bits(secondOp[i]) != BitConverter.SingleToInt32Bits(result[i]) : BitConverter.SingleToInt32Bits(firstOp[i]) != BitConverter.SingleToInt32Bits(result[i])"}), + ("ComplexTernOpTest.template", new Dictionary { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int16", ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int16", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Int16", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp3"] = "(((i & 1) == 0) ? Convert.ToInt16(\"0xFFFF\", 16) : (short)0)", ["ValidateFirstResult"] = "(thirdOp[0] != 0) ? secondOp[0] != result[0] : firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(thirdOp[i] != 0) ? secondOp[i] != result[i] : firstOp[i] != result[i]"}), + ("ComplexTernOpTest.template", new Dictionary { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt16", ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "(((i & 1) == 0) ? Convert.ToUInt16(\"0xFFFF\", 16) : (ushort)0)", ["ValidateFirstResult"] = "(thirdOp[0] != 0) ? secondOp[0] != result[0] : firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(thirdOp[i] != 0) ? secondOp[i] != result[i] : firstOp[i] != result[i]"}), + ("ComplexTernOpTest.template", new Dictionary { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32", ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Int32", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp3"] = "(((i & 1) == 0) ? Convert.ToInt32(\"0xFFFFFFFF\", 16) : (int)0)", ["ValidateFirstResult"] = "(thirdOp[0] != 0) ? secondOp[0] != result[0] : firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(thirdOp[i] != 0) ? secondOp[i] != result[i] : firstOp[i] != result[i]"}), + ("ComplexTernOpTest.template", new Dictionary { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32", ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "UInt32", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp3"] = "(((i & 1) == 0) ? Convert.ToUInt32(\"0xFFFFFFFF\", 16) : (uint)0)", ["ValidateFirstResult"] = "(thirdOp[0] != 0) ? secondOp[0] != result[0] : firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(thirdOp[i] != 0) ? secondOp[i] != result[i] : firstOp[i] != result[i]"}), + ("ComplexTernOpTest.template", new Dictionary { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int64", ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "Int64", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp3"] = "(((i & 1) == 0) ? Convert.ToInt64(\"0xFFFFFFFFFFFFFFFF\", 16) : (long)0)", ["ValidateFirstResult"] = "(thirdOp[0] != 0) ? secondOp[0] != result[0] : firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(thirdOp[i] != 0) ? secondOp[i] != result[i] : firstOp[i] != result[i]"}), + ("ComplexTernOpTest.template", new Dictionary { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt64", ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64", ["Op3VectorType"] = "Vector128", ["Op3BaseType"] = "UInt64", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp3"] = "(((i & 1) == 0) ? Convert.ToUInt64(\"0xFFFFFFFFFFFFFFFF\", 16): (ulong)0)", ["ValidateFirstResult"] = "(thirdOp[0] != 0) ? secondOp[0] != result[0] : firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(thirdOp[i] != 0) ? secondOp[i] != result[i] : firstOp[i] != result[i]"}), ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Ceiling", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(result[0]) != BitConverter.DoubleToInt64Bits(Math.Ceiling(firstOp[0]))", ["ValidateRemainingResults"] = "BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits(Math.Ceiling(firstOp[i]))"}), ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse", ["Method"] = "Ceiling", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits(MathF.Ceiling(firstOp[0]))", ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(MathF.Ceiling(firstOp[i]))"}), ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "CeilingScalar", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(result[0]) != BitConverter.DoubleToInt64Bits(Math.Ceiling(right[0]))", ["ValidateRemainingResults"] = "BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits(left[i])"}), @@ -555,7 +603,7 @@ ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Min", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32", ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "result[0] != Math.Min(left[0], right[0])", ["ValidateRemainingResults"] = "result[i] != Math.Min(left[i], right[i])"}), ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "MultiplyLow", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32", ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "result[0] != BitConverter.ToInt32(BitConverter.GetBytes(((long)(left[0])) * right[0]), 0)", ["ValidateRemainingResults"] = "result[i] != BitConverter.ToInt32(BitConverter.GetBytes(((long)(left[i])) * right[i]), 0)"}), ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "MultiplyLow", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32", ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "result[0] != BitConverter.ToUInt32(BitConverter.GetBytes(((ulong)(left[0])) * right[0]), 0)", ["ValidateRemainingResults"] = "result[i] != BitConverter.ToUInt32(BitConverter.GetBytes(((ulong)(left[i])) * right[i]), 0)"}), - ("HorizontalBinOpTest.template", new Dictionary { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "PackUnsignedSaturate", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt16", ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "result[i1] != ((left[i3 - inner] > 0xFFFF) ? 0xFFFF : ((left[i3 - inner] < 0) ? 0 : BitConverter.ToUInt16(BitConverter.GetBytes(left[i3 - inner]), 0)))", ["ValidateRemainingResults"] = "result[i2] != ((right[i3 - inner] > 0xFFFF) ? 0xFFFF : ((right[i3 - inner] < 0) ? 0 : BitConverter.ToUInt16(BitConverter.GetBytes(right[i3 - inner]), 0)))"}), + ("ComplexBinOpTest.template", new Dictionary { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "PackUnsignedSaturate", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt16", ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "result[0] != (ushort)Math.Clamp(left[0], ushort.MinValue, ushort.MaxValue)", ["ValidateRemainingResults"] = "result[i] != ((i < 4) ? (ushort)Math.Clamp(left[i], ushort.MinValue, ushort.MaxValue) : (ushort)Math.Clamp(right[i&3], ushort.MinValue, ushort.MaxValue))"}), ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "RoundCurrentDirection", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(result[0]) != BitConverter.DoubleToInt64Bits(Math.Round(firstOp[0]))", ["ValidateRemainingResults"] = "BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits(Math.Round(firstOp[i]))"}), ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse", ["Method"] = "RoundCurrentDirection", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits(MathF.Round(firstOp[0]))", ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(MathF.Round(firstOp[i]))"}), ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "RoundCurrentDirectionScalar", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(result[0]) != BitConverter.DoubleToInt64Bits(Math.Round(right[0]))", ["ValidateRemainingResults"] = "BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits(left[i])"}), @@ -635,14 +683,14 @@ { ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "Add", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(left[0] + right[0]) != BitConverter.DoubleToInt64Bits(result[0])", ["ValidateRemainingResults"] = "BitConverter.DoubleToInt64Bits(left[i] + right[i]) != BitConverter.DoubleToInt64Bits(result[i])"}), ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "Add", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(left[0] + right[0]) != BitConverter.SingleToInt32Bits(result[0])", ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(left[i] + right[i]) != BitConverter.SingleToInt32Bits(result[i])"}), - ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "AddSubtract", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(left[0] - right[0]) != BitConverter.DoubleToInt64Bits(result[0])", ["ValidateRemainingResults"] = "(((i & 1) != 0) && (BitConverter.DoubleToInt64Bits(left[i] + right[i]) != BitConverter.DoubleToInt64Bits(result[i]))) || (((i & 1) == 0) && (BitConverter.DoubleToInt64Bits(left[i] - right[i]) != BitConverter.DoubleToInt64Bits(result[i])))"}), - ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "AddSubtract", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(left[0] - right[0]) != BitConverter.SingleToInt32Bits(result[0])", ["ValidateRemainingResults"] = "(((i & 1) != 0) && (BitConverter.SingleToInt32Bits(left[i] + right[i]) != BitConverter.SingleToInt32Bits(result[i]))) || (((i & 1) == 0) && (BitConverter.SingleToInt32Bits(left[i] - right[i]) != BitConverter.SingleToInt32Bits(result[i])))"}), + ("AlternatingBinOpTest.template",new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "AddSubtract", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(left[i] - right[i]) != BitConverter.DoubleToInt64Bits(result[i])", ["ValidateRemainingResults"] = "BitConverter.DoubleToInt64Bits(left[i + 1] + right[i + 1]) != BitConverter.DoubleToInt64Bits(result[i + 1])"}), + ("AlternatingBinOpTest.template",new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "AddSubtract", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(left[i] - right[i]) != BitConverter.SingleToInt32Bits(result[i])", ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(left[i + 1] + right[i + 1]) != BitConverter.SingleToInt32Bits(result[i + 1])"}), ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "And", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "(BitConverter.DoubleToInt64Bits(left[0]) & BitConverter.DoubleToInt64Bits(right[0])) != BitConverter.DoubleToInt64Bits(result[0])", ["ValidateRemainingResults"] = "(BitConverter.DoubleToInt64Bits(left[i]) & BitConverter.DoubleToInt64Bits(right[i])) != BitConverter.DoubleToInt64Bits(result[i])"}), ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "And", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "(BitConverter.SingleToInt32Bits(left[0]) & BitConverter.SingleToInt32Bits(right[0])) != BitConverter.SingleToInt32Bits(result[0])", ["ValidateRemainingResults"] = "(BitConverter.SingleToInt32Bits(left[i]) & BitConverter.SingleToInt32Bits(right[i])) != BitConverter.SingleToInt32Bits(result[i])"}), ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "AndNot", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "((~BitConverter.DoubleToInt64Bits(left[0])) & BitConverter.DoubleToInt64Bits(right[0])) != BitConverter.DoubleToInt64Bits(result[0])", ["ValidateRemainingResults"] = "((~BitConverter.DoubleToInt64Bits(left[i])) & BitConverter.DoubleToInt64Bits(right[i])) != BitConverter.DoubleToInt64Bits(result[i])"}), ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "AndNot", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "((~BitConverter.SingleToInt32Bits(left[0])) & BitConverter.SingleToInt32Bits(right[0])) != BitConverter.SingleToInt32Bits(result[0])", ["ValidateRemainingResults"] = "((~BitConverter.SingleToInt32Bits(left[i])) & BitConverter.SingleToInt32Bits(right[i])) != BitConverter.SingleToInt32Bits(result[i])"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "(double)(((i & 1) == 0) ? -0.0 : 1.0)", ["ValidateFirstResult"] = "((BitConverter.DoubleToInt64Bits(thirdOp[0]) >> 63) & 1) == 1 ? BitConverter.DoubleToInt64Bits(secondOp[0]) != BitConverter.DoubleToInt64Bits(result[0]) : BitConverter.DoubleToInt64Bits(firstOp[0]) != BitConverter.DoubleToInt64Bits(result[0])", ["ValidateRemainingResults"] = "((BitConverter.DoubleToInt64Bits(thirdOp[i]) >> 63) & 1) == 1 ? BitConverter.DoubleToInt64Bits(secondOp[i]) != BitConverter.DoubleToInt64Bits(result[i]) : BitConverter.DoubleToInt64Bits(firstOp[i]) != BitConverter.DoubleToInt64Bits(result[i])"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "(float)(((i & 1) == 0) ? -0.0 : 1.0)", ["ValidateFirstResult"] = "((BitConverter.SingleToInt32Bits(thirdOp[0]) >> 31) & 1) == 1 ? BitConverter.SingleToInt32Bits(secondOp[0]) != BitConverter.SingleToInt32Bits(result[0]) : BitConverter.SingleToInt32Bits(firstOp[0]) != BitConverter.SingleToInt32Bits(result[0])", ["ValidateRemainingResults"] = "((BitConverter.SingleToInt32Bits(thirdOp[i]) >> 31) & 1) == 1 ? BitConverter.SingleToInt32Bits(secondOp[i]) != BitConverter.SingleToInt32Bits(result[i]) : BitConverter.SingleToInt32Bits(firstOp[i]) != BitConverter.SingleToInt32Bits(result[i])"}), + ("ComplexTernOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "(double)(((i & 1) == 0) ? -0.0 : 1.0)", ["ValidateFirstResult"] = "((BitConverter.DoubleToInt64Bits(thirdOp[0]) >> 63) & 1) == 1 ? BitConverter.DoubleToInt64Bits(secondOp[0]) != BitConverter.DoubleToInt64Bits(result[0]) : BitConverter.DoubleToInt64Bits(firstOp[0]) != BitConverter.DoubleToInt64Bits(result[0])", ["ValidateRemainingResults"] = "((BitConverter.DoubleToInt64Bits(thirdOp[i]) >> 63) & 1) == 1 ? BitConverter.DoubleToInt64Bits(secondOp[i]) != BitConverter.DoubleToInt64Bits(result[i]) : BitConverter.DoubleToInt64Bits(firstOp[i]) != BitConverter.DoubleToInt64Bits(result[i])"}), + ("ComplexTernOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "(float)(((i & 1) == 0) ? -0.0 : 1.0)", ["ValidateFirstResult"] = "((BitConverter.SingleToInt32Bits(thirdOp[0]) >> 31) & 1) == 1 ? BitConverter.SingleToInt32Bits(secondOp[0]) != BitConverter.SingleToInt32Bits(result[0]) : BitConverter.SingleToInt32Bits(firstOp[0]) != BitConverter.SingleToInt32Bits(result[0])", ["ValidateRemainingResults"] = "((BitConverter.SingleToInt32Bits(thirdOp[i]) >> 31) & 1) == 1 ? BitConverter.SingleToInt32Bits(secondOp[i]) != BitConverter.SingleToInt32Bits(result[i]) : BitConverter.SingleToInt32Bits(firstOp[i]) != BitConverter.SingleToInt32Bits(result[i])"}), ("LoadUnOpTest.template", new Dictionary { ["Isa"] = "Avx", ["Method"] = "BroadcastScalarToVector128", ["RetVectorType"]="Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(firstOp[0]) != BitConverter.SingleToInt32Bits(result[0])", ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(firstOp[0]) != BitConverter.SingleToInt32Bits(result[i])"}), ("LoadUnOpTest.template", new Dictionary { ["Isa"] = "Avx", ["Method"] = "BroadcastScalarToVector256", ["RetVectorType"]="Vector256", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(firstOp[0]) != BitConverter.SingleToInt32Bits(result[0])", ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(firstOp[0]) != BitConverter.SingleToInt32Bits(result[i])"}), ("LoadUnOpTest.template", new Dictionary { ["Isa"] = "Avx", ["Method"] = "BroadcastScalarToVector256", ["RetVectorType"]="Vector256", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(firstOp[0]) != BitConverter.DoubleToInt64Bits(result[0])", ["ValidateRemainingResults"] = "BitConverter.DoubleToInt64Bits(firstOp[0]) != BitConverter.DoubleToInt64Bits(result[i])"}), @@ -679,9 +727,9 @@ ("SimpleUnOpConvTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "ConvertToVector256Single", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int32", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits((float)firstOp[0])", ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits((float)firstOp[i])"}), ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "Divide", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(left[0] / right[0]) != BitConverter.DoubleToInt64Bits(result[0])", ["ValidateRemainingResults"] = "BitConverter.DoubleToInt64Bits(left[i] / right[i]) != BitConverter.DoubleToInt64Bits(result[i])"}), ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "Divide", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(left[0] / right[0]) != BitConverter.SingleToInt32Bits(result[0])", ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(left[i] / right[i]) != BitConverter.SingleToInt32Bits(result[i])"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "DuplicateEvenIndexed", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(firstOp[0]) != BitConverter.DoubleToInt64Bits(result[0])", ["ValidateRemainingResults"] = "((i & 1) == 0) ? (BitConverter.DoubleToInt64Bits(firstOp[i]) != BitConverter.DoubleToInt64Bits(result[i])) : (BitConverter.DoubleToInt64Bits(firstOp[i - 1]) != BitConverter.DoubleToInt64Bits(result[i]))"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "DuplicateEvenIndexed", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(firstOp[0]) != BitConverter.SingleToInt32Bits(result[0])", ["ValidateRemainingResults"] = "((i & 1) == 0) ? (BitConverter.SingleToInt32Bits(firstOp[i]) != BitConverter.SingleToInt32Bits(result[i])) : (BitConverter.SingleToInt32Bits(firstOp[i - 1]) != BitConverter.SingleToInt32Bits(result[i]))"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "DuplicateOddIndexed", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(firstOp[1]) != BitConverter.SingleToInt32Bits(result[0])", ["ValidateRemainingResults"] = "((i & 1) == 0) ? (BitConverter.SingleToInt32Bits(firstOp[i + 1]) != BitConverter.SingleToInt32Bits(result[i])) : (BitConverter.SingleToInt32Bits(firstOp[i]) != BitConverter.SingleToInt32Bits(result[i]))"}), + ("ComplexUnOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "DuplicateEvenIndexed", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(firstOp[0]) != BitConverter.DoubleToInt64Bits(result[0])", ["ValidateRemainingResults"] = "((i & 1) == 0) ? (BitConverter.DoubleToInt64Bits(firstOp[i]) != BitConverter.DoubleToInt64Bits(result[i])) : (BitConverter.DoubleToInt64Bits(firstOp[i - 1]) != BitConverter.DoubleToInt64Bits(result[i]))"}), + ("ComplexUnOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "DuplicateEvenIndexed", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(firstOp[0]) != BitConverter.SingleToInt32Bits(result[0])", ["ValidateRemainingResults"] = "((i & 1) == 0) ? (BitConverter.SingleToInt32Bits(firstOp[i]) != BitConverter.SingleToInt32Bits(result[i])) : (BitConverter.SingleToInt32Bits(firstOp[i - 1]) != BitConverter.SingleToInt32Bits(result[i]))"}), + ("ComplexUnOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "DuplicateOddIndexed", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(firstOp[1]) != BitConverter.SingleToInt32Bits(result[0])", ["ValidateRemainingResults"] = "((i & 1) == 0) ? (BitConverter.SingleToInt32Bits(firstOp[i + 1]) != BitConverter.SingleToInt32Bits(result[i])) : (BitConverter.SingleToInt32Bits(firstOp[i]) != BitConverter.SingleToInt32Bits(result[i]))"}), ("ImmUnOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "ExtractVector128", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Single", ["Imm"] = "1", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits(firstOp[4])", ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(firstOp[i+4])"}), ("ImmUnOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "ExtractVector128", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Double", ["Imm"] = "1", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(result[0]) != BitConverter.DoubleToInt64Bits(firstOp[2])", ["ValidateRemainingResults"] = "BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits(firstOp[i+2])"}), ("ImmUnOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "ExtractVector128", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Byte", ["Imm"] = "1", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != firstOp[16]", ["ValidateRemainingResults"] = "result[i] != firstOp[i+16]"}), @@ -740,8 +788,8 @@ ("ImmBinOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "Permute2x128", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "UInt32", ["Imm"] = "2", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "result[0] != right[0]", ["ValidateRemainingResults"] = "result[i] != (i < 4 ? right[i] : left[i-4])"}), ("ImmBinOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "Permute2x128", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int64", ["Imm"] = "2", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()", ["ValidateFirstResult"] = "result[0] != right[0]", ["ValidateRemainingResults"] = "result[i] != (i < 2 ? right[i] : left[i-2])"}), ("ImmBinOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "Permute2x128", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "UInt64", ["Imm"] = "2", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["ValidateFirstResult"] = "result[0] != right[0]", ["ValidateRemainingResults"] = "result[i] != (i < 2 ? right[i] : left[i-2])"}), - ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "PermuteVar", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "1", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(left[1]) != BitConverter.SingleToInt32Bits(result[0])", ["ValidateRemainingResults"] = "i > 3 ? (BitConverter.SingleToInt32Bits(left[5]) != BitConverter.SingleToInt32Bits(result[i])) : (BitConverter.SingleToInt32Bits(left[1]) != BitConverter.SingleToInt32Bits(result[i]))"}), - ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "PermuteVar", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int64", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "(long)1", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(left[0]) != BitConverter.DoubleToInt64Bits(result[0])", ["ValidateRemainingResults"] = "i > 1 ? (BitConverter.DoubleToInt64Bits(left[2]) != BitConverter.DoubleToInt64Bits(result[i])) : (BitConverter.DoubleToInt64Bits(left[0]) != BitConverter.DoubleToInt64Bits(result[i]))"}), + ("ComplexBinOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "PermuteVar", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "1", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(left[1]) != BitConverter.SingleToInt32Bits(result[0])", ["ValidateRemainingResults"] = "i > 3 ? (BitConverter.SingleToInt32Bits(left[5]) != BitConverter.SingleToInt32Bits(result[i])) : (BitConverter.SingleToInt32Bits(left[1]) != BitConverter.SingleToInt32Bits(result[i]))"}), + ("ComplexBinOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "PermuteVar", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int64", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "(long)1", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(left[0]) != BitConverter.DoubleToInt64Bits(result[0])", ["ValidateRemainingResults"] = "i > 1 ? (BitConverter.DoubleToInt64Bits(left[2]) != BitConverter.DoubleToInt64Bits(result[i])) : (BitConverter.DoubleToInt64Bits(left[0]) != BitConverter.DoubleToInt64Bits(result[i]))"}), ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "RoundCurrentDirection", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(result[0]) != BitConverter.DoubleToInt64Bits(Math.Round(firstOp[0]))", ["ValidateRemainingResults"] = "BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits(Math.Round(firstOp[i]))"}), ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "RoundCurrentDirection", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits(MathF.Round(firstOp[0]))", ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(MathF.Round(firstOp[i]))"}), ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "RoundToNearestInteger", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(result[0]) != BitConverter.DoubleToInt64Bits(Math.Round(firstOp[0], MidpointRounding.AwayFromZero))", ["ValidateRemainingResults"] = "BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits(Math.Round(firstOp[i], MidpointRounding.AwayFromZero))"}), @@ -790,8 +838,8 @@ ("LoadBinOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "MaskLoad", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits((BitConverter.SingleToInt32Bits(right[0]) < 0) ? left[0] : 0)", ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits((BitConverter.SingleToInt32Bits(right[i]) < 0) ? left[i] : 0)"}), ("StoreBinOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "MaskStore", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(result[0]) != BitConverter.DoubleToInt64Bits((BitConverter.DoubleToInt64Bits(left[0]) < 0) ? right[0] : BitConverter.DoubleToInt64Bits(result[0]))", ["ValidateRemainingResults"] = "BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits((BitConverter.DoubleToInt64Bits(left[i]) < 0) ? right[i] : BitConverter.DoubleToInt64Bits(result[i]))"}), ("StoreBinOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "MaskStore", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits((BitConverter.SingleToInt32Bits(left[0]) < 0) ? right[0] : BitConverter.SingleToInt32Bits(result[0]))", ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits((BitConverter.SingleToInt32Bits(left[i]) < 0) ? right[i] : BitConverter.SingleToInt32Bits(result[i]))"}), - ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "PermuteVar", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "1", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(left[1]) != BitConverter.SingleToInt32Bits(result[0])", ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(left[1]) != BitConverter.SingleToInt32Bits(result[i])"}), - ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "PermuteVar", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "(long)1", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(left[0]) != BitConverter.DoubleToInt64Bits(result[0])", ["ValidateRemainingResults"] = "BitConverter.DoubleToInt64Bits(left[0]) != BitConverter.DoubleToInt64Bits(result[i])"}), + ("ComplexBinOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "PermuteVar", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "1", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(left[1]) != BitConverter.SingleToInt32Bits(result[0])", ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(left[1]) != BitConverter.SingleToInt32Bits(result[i])"}), + ("ComplexBinOpTest.template", new Dictionary { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "PermuteVar", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "(long)1", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(left[0]) != BitConverter.DoubleToInt64Bits(result[0])", ["ValidateRemainingResults"] = "BitConverter.DoubleToInt64Bits(left[0]) != BitConverter.DoubleToInt64Bits(result[i])"}), }; (string templateFileName, Dictionary templateData)[] Avx2Inputs = new [] @@ -865,34 +913,34 @@ ("ImmBinOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx", ["Method"] = "Blend", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "UInt32", ["Imm"] = "2", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "result[0] != (((2 & (1 << 0)) == 0) ? left[0] : right[0])", ["ValidateRemainingResults"] = "result[i] != (((2 & (1 << i)) == 0) ? left[i] : right[i])"}), ("ImmBinOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx", ["Method"] = "Blend", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "UInt32", ["Imm"] = "4", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "result[0] != (((4 & (1 << 0)) == 0) ? left[0] : right[0])", ["ValidateRemainingResults"] = "result[i] != (((4 & (1 << i)) == 0) ? left[i] : right[i])"}), ("ImmBinOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx", ["Method"] = "Blend", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "UInt32", ["Imm"] = "85", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "result[0] != (((85 & (1 << 0)) == 0) ? left[0] : right[0])", ["ValidateRemainingResults"] = "result[i] != (((85 & (1 << i)) == 0) ? left[i] : right[i])"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Byte", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["NextValueOp3"] = "(byte)(((i & 1) == 0) ? 128 : 1)", ["ValidateFirstResult"] = "((thirdOp[0] >> 7) & 1) == 1 ? secondOp[0] != result[0] : firstOp[0] != result[0]", ["ValidateRemainingResults"] = "((thirdOp[i] >> 7) & 1) == 1 ? secondOp[i] != result[i] : firstOp[i] != result[i]"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "(sbyte)(((i & 1) == 0) ? -128 : 1)", ["ValidateFirstResult"] = "((thirdOp[0] >> 7) & 1) == 1 ? secondOp[0] != result[0] : firstOp[0] != result[0]", ["ValidateRemainingResults"] = "((thirdOp[i] >> 7) & 1) == 1 ? secondOp[i] != result[i] : firstOp[i] != result[i]"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int16", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "Int16", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp3"] = "(((i & 1) == 0) ? Convert.ToInt16(\"0xFFFF\", 16) : (short)0)", ["ValidateFirstResult"] = "(thirdOp[0] != 0) ? secondOp[0] != result[0] : firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(thirdOp[i] != 0) ? secondOp[i] != result[i] : firstOp[i] != result[i]"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "(((i & 1) == 0) ? Convert.ToUInt16(\"0xFFFF\", 16) : (ushort)0)", ["ValidateFirstResult"] = "(thirdOp[0] != 0) ? secondOp[0] != result[0] : firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(thirdOp[i] != 0) ? secondOp[i] != result[i] : firstOp[i] != result[i]"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int32", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "Int32", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp3"] = "(((i & 1) == 0) ? Convert.ToInt32(\"0xFFFFFFFF\", 16) : (int)0)", ["ValidateFirstResult"] = "(thirdOp[0] != 0) ? secondOp[0] != result[0] : firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(thirdOp[i] != 0) ? secondOp[i] != result[i] : firstOp[i] != result[i]"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "UInt32", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "UInt32", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp3"] = "(((i & 1) == 0) ? Convert.ToUInt32(\"0xFFFFFFFF\", 16) : (uint)0)", ["ValidateFirstResult"] = "(thirdOp[0] != 0) ? secondOp[0] != result[0] : firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(thirdOp[i] != 0) ? secondOp[i] != result[i] : firstOp[i] != result[i]"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int64", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "Int64", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp3"] = "(((i & 1) == 0) ? Convert.ToInt64(\"0xFFFFFFFFFFFFFFFF\", 16) : (long)0)", ["ValidateFirstResult"] = "(thirdOp[0] != 0) ? secondOp[0] != result[0] : firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(thirdOp[i] != 0) ? secondOp[i] != result[i] : firstOp[i] != result[i]"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "UInt64", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "UInt64", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp3"] = "(((i & 1) == 0) ? Convert.ToUInt64(\"0xFFFFFFFFFFFFFFFF\", 16): (ulong)0)", ["ValidateFirstResult"] = "(thirdOp[0] != 0) ? secondOp[0] != result[0] : firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(thirdOp[i] != 0) ? secondOp[i] != result[i] : firstOp[i] != result[i]"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastScalarToVector128", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastScalarToVector128", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastScalarToVector128", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int16", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastScalarToVector128", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt16", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastScalarToVector128", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastScalarToVector128", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastScalarToVector128", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastScalarToVector128", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse", ["Method"] = "BroadcastScalarToVector128", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(firstOp[0]) != BitConverter.SingleToInt32Bits(result[0])", ["ValidateRemainingResults"] = "(BitConverter.SingleToInt32Bits(firstOp[0]) != BitConverter.SingleToInt32Bits(result[i]))"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastScalarToVector128", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(firstOp[0]) != BitConverter.DoubleToInt64Bits(result[0])", ["ValidateRemainingResults"] = "(BitConverter.DoubleToInt64Bits(firstOp[0]) != BitConverter.DoubleToInt64Bits(result[i]))"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastScalarToVector256", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastScalarToVector256", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastScalarToVector256", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int16", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastScalarToVector256", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt16", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastScalarToVector256", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastScalarToVector256", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastScalarToVector256", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastScalarToVector256", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse", ["Method"] = "BroadcastScalarToVector256", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(firstOp[0]) != BitConverter.SingleToInt32Bits(result[0])", ["ValidateRemainingResults"] = "(BitConverter.SingleToInt32Bits(firstOp[0]) != BitConverter.SingleToInt32Bits(result[i]))"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastScalarToVector256", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(firstOp[0]) != BitConverter.DoubleToInt64Bits(result[0])", ["ValidateRemainingResults"] = "(BitConverter.DoubleToInt64Bits(firstOp[0]) != BitConverter.DoubleToInt64Bits(result[i]))"}), + ("ComplexTernOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Byte", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["NextValueOp3"] = "(byte)(((i & 1) == 0) ? 128 : 1)", ["ValidateFirstResult"] = "((thirdOp[0] >> 7) & 1) == 1 ? secondOp[0] != result[0] : firstOp[0] != result[0]", ["ValidateRemainingResults"] = "((thirdOp[i] >> 7) & 1) == 1 ? secondOp[i] != result[i] : firstOp[i] != result[i]"}), + ("ComplexTernOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "(sbyte)(((i & 1) == 0) ? -128 : 1)", ["ValidateFirstResult"] = "((thirdOp[0] >> 7) & 1) == 1 ? secondOp[0] != result[0] : firstOp[0] != result[0]", ["ValidateRemainingResults"] = "((thirdOp[i] >> 7) & 1) == 1 ? secondOp[i] != result[i] : firstOp[i] != result[i]"}), + ("ComplexTernOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int16", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "Int16", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp3"] = "(((i & 1) == 0) ? Convert.ToInt16(\"0xFFFF\", 16) : (short)0)", ["ValidateFirstResult"] = "(thirdOp[0] != 0) ? secondOp[0] != result[0] : firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(thirdOp[i] != 0) ? secondOp[i] != result[i] : firstOp[i] != result[i]"}), + ("ComplexTernOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "(((i & 1) == 0) ? Convert.ToUInt16(\"0xFFFF\", 16) : (ushort)0)", ["ValidateFirstResult"] = "(thirdOp[0] != 0) ? secondOp[0] != result[0] : firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(thirdOp[i] != 0) ? secondOp[i] != result[i] : firstOp[i] != result[i]"}), + ("ComplexTernOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int32", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "Int32", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp3"] = "(((i & 1) == 0) ? Convert.ToInt32(\"0xFFFFFFFF\", 16) : (int)0)", ["ValidateFirstResult"] = "(thirdOp[0] != 0) ? secondOp[0] != result[0] : firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(thirdOp[i] != 0) ? secondOp[i] != result[i] : firstOp[i] != result[i]"}), + ("ComplexTernOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "UInt32", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "UInt32", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp3"] = "(((i & 1) == 0) ? Convert.ToUInt32(\"0xFFFFFFFF\", 16) : (uint)0)", ["ValidateFirstResult"] = "(thirdOp[0] != 0) ? secondOp[0] != result[0] : firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(thirdOp[i] != 0) ? secondOp[i] != result[i] : firstOp[i] != result[i]"}), + ("ComplexTernOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int64", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "Int64", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp3"] = "(((i & 1) == 0) ? Convert.ToInt64(\"0xFFFFFFFFFFFFFFFF\", 16) : (long)0)", ["ValidateFirstResult"] = "(thirdOp[0] != 0) ? secondOp[0] != result[0] : firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(thirdOp[i] != 0) ? secondOp[i] != result[i] : firstOp[i] != result[i]"}), + ("ComplexTernOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "UInt64", ["Op3VectorType"] = "Vector256", ["Op3BaseType"] = "UInt64", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp3"] = "(((i & 1) == 0) ? Convert.ToUInt64(\"0xFFFFFFFFFFFFFFFF\", 16): (ulong)0)", ["ValidateFirstResult"] = "(thirdOp[0] != 0) ? secondOp[0] != result[0] : firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(thirdOp[i] != 0) ? secondOp[i] != result[i] : firstOp[i] != result[i]"}), + ("BroadcastTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastScalarToVector128", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), + ("BroadcastTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastScalarToVector128", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), + ("BroadcastTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastScalarToVector128", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int16", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), + ("BroadcastTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastScalarToVector128", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt16", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), + ("BroadcastTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastScalarToVector128", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), + ("BroadcastTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastScalarToVector128", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), + ("BroadcastTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastScalarToVector128", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), + ("BroadcastTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastScalarToVector128", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), + ("BroadcastTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse", ["Method"] = "BroadcastScalarToVector128", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(firstOp[0]) != BitConverter.SingleToInt32Bits(result[0])", ["ValidateRemainingResults"] = "(BitConverter.SingleToInt32Bits(firstOp[0]) != BitConverter.SingleToInt32Bits(result[i]))"}), + ("BroadcastTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastScalarToVector128", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(firstOp[0]) != BitConverter.DoubleToInt64Bits(result[0])", ["ValidateRemainingResults"] = "(BitConverter.DoubleToInt64Bits(firstOp[0]) != BitConverter.DoubleToInt64Bits(result[i]))"}), + ("BroadcastTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastScalarToVector256", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), + ("BroadcastTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastScalarToVector256", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), + ("BroadcastTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastScalarToVector256", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int16", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), + ("BroadcastTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastScalarToVector256", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt16", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), + ("BroadcastTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastScalarToVector256", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), + ("BroadcastTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastScalarToVector256", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), + ("BroadcastTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastScalarToVector256", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), + ("BroadcastTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastScalarToVector256", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), + ("BroadcastTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse", ["Method"] = "BroadcastScalarToVector256", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(firstOp[0]) != BitConverter.SingleToInt32Bits(result[0])", ["ValidateRemainingResults"] = "(BitConverter.SingleToInt32Bits(firstOp[0]) != BitConverter.SingleToInt32Bits(result[i]))"}), + ("BroadcastTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastScalarToVector256", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(firstOp[0]) != BitConverter.DoubleToInt64Bits(result[0])", ["ValidateRemainingResults"] = "(BitConverter.DoubleToInt64Bits(firstOp[0]) != BitConverter.DoubleToInt64Bits(result[i]))"}), ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx", ["Method"] = "CompareEqual", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != ((left[0] == right[0]) ? unchecked((byte)(-1)) : 0)", ["ValidateRemainingResults"] = "result[i] != ((left[i] == right[i]) ? unchecked((byte)(-1)) : 0)"}), ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx", ["Method"] = "CompareEqual", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int16", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "result[0] != ((left[0] == right[0]) ? unchecked((short)(-1)) : 0)", ["ValidateRemainingResults"] = "result[i] != ((left[i] == right[i]) ? unchecked((short)(-1)) : 0)"}), ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx", ["Method"] = "CompareEqual", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "result[0] != ((left[0] == right[0]) ? unchecked((int)(-1)) : 0)", ["ValidateRemainingResults"] = "result[i] != ((left[i] == right[i]) ? unchecked((int)(-1)) : 0)"}), @@ -961,10 +1009,10 @@ ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx", ["Method"] = "Or", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "UInt16", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "(ushort)(left[0] | right[0]) != result[0]", ["ValidateRemainingResults"] = "(ushort)(left[i] | right[i]) != result[i]"}), ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx", ["Method"] = "Or", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "UInt32", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "(uint)(left[0] | right[0]) != result[0]", ["ValidateRemainingResults"] = "(uint)(left[i] | right[i]) != result[i]"}), ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx", ["Method"] = "Or", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "UInt64", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["ValidateFirstResult"] = "(ulong)(left[0] | right[0]) != result[0]", ["ValidateRemainingResults"] = "(ulong)(left[i] | right[i]) != result[i]"}), - ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx", ["Method"] = "PackUnsignedSaturate", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "result[0] != (ushort)Math.Clamp(left[0], ushort.MinValue, ushort.MaxValue)", ["ValidateRemainingResults"] = "result[i] != (i < 4 ? (ushort)Math.Clamp(left[i], ushort.MinValue, ushort.MaxValue) : (i < 8 && i > 3 ? (ushort)Math.Clamp(right[i&3], ushort.MinValue, ushort.MaxValue) : (i < 12 && i > 7 ? (ushort)Math.Clamp(left[i-4], ushort.MinValue, ushort.MaxValue) : (ushort)Math.Clamp(right[i-8], ushort.MinValue, ushort.MaxValue))))"}), - ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx", ["Method"] = "PackUnsignedSaturate", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int16", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "result[0] != (byte)Math.Clamp(left[0], byte.MinValue, byte.MaxValue)", ["ValidateRemainingResults"] = "result[i] != (i < 8 ? (byte)Math.Clamp(left[i], byte.MinValue, byte.MaxValue) : (i < 16 && i > 7 ? (byte)Math.Clamp(right[i&7], byte.MinValue, byte.MaxValue) : (i < 24 && i > 15 ? (byte)Math.Clamp(left[i-8], byte.MinValue, byte.MaxValue) : (byte)Math.Clamp(right[i-16], byte.MinValue, byte.MaxValue))))"}), - ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx", ["Method"] = "PackSignedSaturate", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "result[0] != (short)Math.Clamp(left[0], short.MinValue, short.MaxValue)", ["ValidateRemainingResults"] = "result[i] != (i < 4 ? (short)Math.Clamp(left[i], short.MinValue, short.MaxValue) : (i < 8 && i > 3 ? (short)Math.Clamp(right[i&3], short.MinValue, short.MaxValue) : (i < 12 && i > 7 ? (short)Math.Clamp(left[i-4], short.MinValue, short.MaxValue) : (short)Math.Clamp(right[i-8], short.MinValue, short.MaxValue))))"}), - ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx", ["Method"] = "PackSignedSaturate", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int16", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "result[0] != (sbyte)Math.Clamp(left[0], sbyte.MinValue, sbyte.MaxValue)", ["ValidateRemainingResults"] = "result[i] != (i < 8 ? (sbyte)Math.Clamp(left[i], sbyte.MinValue, sbyte.MaxValue) : (i < 16 && i > 7 ? (sbyte)Math.Clamp(right[i&7], sbyte.MinValue, sbyte.MaxValue) : (i < 24 && i > 15 ? (sbyte)Math.Clamp(left[i-8], sbyte.MinValue, sbyte.MaxValue) : (sbyte)Math.Clamp(right[i-16], sbyte.MinValue, sbyte.MaxValue))))"}), + ("ComplexBinOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx", ["Method"] = "PackUnsignedSaturate", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "result[0] != (ushort)Math.Clamp(left[0], ushort.MinValue, ushort.MaxValue)", ["ValidateRemainingResults"] = "result[i] != (i < 4 ? (ushort)Math.Clamp(left[i], ushort.MinValue, ushort.MaxValue) : (i < 8 && i > 3 ? (ushort)Math.Clamp(right[i&3], ushort.MinValue, ushort.MaxValue) : (i < 12 && i > 7 ? (ushort)Math.Clamp(left[i-4], ushort.MinValue, ushort.MaxValue) : (ushort)Math.Clamp(right[i-8], ushort.MinValue, ushort.MaxValue))))"}), + ("ComplexBinOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx", ["Method"] = "PackUnsignedSaturate", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int16", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "result[0] != (byte)Math.Clamp(left[0], byte.MinValue, byte.MaxValue)", ["ValidateRemainingResults"] = "result[i] != (i < 8 ? (byte)Math.Clamp(left[i], byte.MinValue, byte.MaxValue) : (i < 16 && i > 7 ? (byte)Math.Clamp(right[i&7], byte.MinValue, byte.MaxValue) : (i < 24 && i > 15 ? (byte)Math.Clamp(left[i-8], byte.MinValue, byte.MaxValue) : (byte)Math.Clamp(right[i-16], byte.MinValue, byte.MaxValue))))"}), + ("ComplexBinOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx", ["Method"] = "PackSignedSaturate", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "result[0] != (short)Math.Clamp(left[0], short.MinValue, short.MaxValue)", ["ValidateRemainingResults"] = "result[i] != (i < 4 ? (short)Math.Clamp(left[i], short.MinValue, short.MaxValue) : (i < 8 && i > 3 ? (short)Math.Clamp(right[i&3], short.MinValue, short.MaxValue) : (i < 12 && i > 7 ? (short)Math.Clamp(left[i-4], short.MinValue, short.MaxValue) : (short)Math.Clamp(right[i-8], short.MinValue, short.MaxValue))))"}), + ("ComplexBinOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx", ["Method"] = "PackSignedSaturate", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int16", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "result[0] != (sbyte)Math.Clamp(left[0], sbyte.MinValue, sbyte.MaxValue)", ["ValidateRemainingResults"] = "result[i] != (i < 8 ? (sbyte)Math.Clamp(left[i], sbyte.MinValue, sbyte.MaxValue) : (i < 16 && i > 7 ? (sbyte)Math.Clamp(right[i&7], sbyte.MinValue, sbyte.MaxValue) : (i < 24 && i > 15 ? (sbyte)Math.Clamp(left[i-8], sbyte.MinValue, sbyte.MaxValue) : (sbyte)Math.Clamp(right[i-16], sbyte.MinValue, sbyte.MaxValue))))"}), ("ImmBinOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx", ["Method"] = "Permute2x128", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int32", ["Imm"] = "2", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "result[0] != right[0]", ["ValidateRemainingResults"] = "i > 3 ? (result[i] != left[i - 4]) : (result[i] != right[i])"}), ("ImmBinOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx", ["Method"] = "Permute2x128", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "UInt32", ["Imm"] = "2", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "result[0] != right[0]", ["ValidateRemainingResults"] = "i > 3 ? (result[i] != left[i - 4]) : (result[i] != right[i])"}), ("ImmBinOpTest.template", new Dictionary { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx", ["Method"] = "Permute2x128", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int64", ["Imm"] = "2", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()", ["ValidateFirstResult"] = "result[0] != right[0]", ["ValidateRemainingResults"] = "i > 1 ? (result[i] != left[i - 2]) : (result[i] != right[i])"}), @@ -1125,18 +1173,18 @@ ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "AndNot", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "(ushort)(~left[0] & right[0]) != result[0]", ["ValidateRemainingResults"] = "(ushort)(~left[i] & right[i]) != result[i]"}), ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "AndNot", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "(uint)(~left[0] & right[0]) != result[0]", ["ValidateRemainingResults"] = "(uint)(~left[i] & right[i]) != result[i]"}), ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "AndNot", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["ValidateFirstResult"] = "(ulong)(~left[0] & right[0]) != result[0]", ["ValidateRemainingResults"] = "(ulong)(~left[i] & right[i]) != result[i]"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "(double)(((i & 1) == 0) ? -0.0 : 0.0)", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(result[0]) != BitConverter.DoubleToInt64Bits(double.IsNegative(thirdOp[0]) ? secondOp[0] : firstOp[0])", ["ValidateRemainingResults"] = "BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits(double.IsNegative(thirdOp[i]) ? secondOp[i] : firstOp[i])"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int32", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Int32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp3"] = "(int)(((i & 1) == 0) ? -1 : 0)", ["ValidateFirstResult"] = "result[0] != ((thirdOp[0] != 0) ? secondOp[0] : firstOp[0])", ["ValidateRemainingResults"] = "result[i] != ((thirdOp[i] != 0) ? secondOp[i] : firstOp[i])"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int64", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Int64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp3"] = "(long)(((i & 1) == 0) ? -1 : 0)", ["ValidateFirstResult"] = "result[0] != ((thirdOp[0] != 0) ? secondOp[0] : firstOp[0])", ["ValidateRemainingResults"] = "result[i] != ((thirdOp[i] != 0) ? secondOp[i] : firstOp[i])"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "(float)(((i & 1) == 0) ? -0.0 : 0.0)", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits(float.IsNegative(thirdOp[0]) ? secondOp[0] : firstOp[0])", ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(float.IsNegative(thirdOp[i]) ? secondOp[i] : firstOp[i])"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt32", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "UInt32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp3"] = "(uint)(((i & 1) == 0) ? -1 : 0)", ["ValidateFirstResult"] = "result[0] != ((thirdOp[0] != 0) ? secondOp[0] : firstOp[0])", ["ValidateRemainingResults"] = "result[i] != ((thirdOp[i] != 0) ? secondOp[i] : firstOp[i])"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt64", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "UInt64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp3"] = "(ulong)(((i & 1) == 0) ? -1: 0)", ["ValidateFirstResult"] = "result[0] != ((thirdOp[0] != 0) ? secondOp[0] : firstOp[0])", ["ValidateRemainingResults"] = "result[i] != ((thirdOp[i] != 0) ? secondOp[i] : firstOp[i])"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "BroadcastScalarToVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "BroadcastScalarToVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "BroadcastScalarToVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "BroadcastScalarToVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "BroadcastScalarToVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(firstOp[0]) != BitConverter.SingleToInt32Bits(result[0])", ["ValidateRemainingResults"] = "(BitConverter.SingleToInt32Bits(firstOp[0]) != BitConverter.SingleToInt32Bits(result[i]))"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "BroadcastScalarToVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(firstOp[0]) != BitConverter.DoubleToInt64Bits(result[0])", ["ValidateRemainingResults"] = "(BitConverter.DoubleToInt64Bits(firstOp[0]) != BitConverter.DoubleToInt64Bits(result[i]))"}), + ("ComplexTernOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "(double)(((i & 1) == 0) ? -0.0 : 0.0)", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(result[0]) != BitConverter.DoubleToInt64Bits(double.IsNegative(thirdOp[0]) ? secondOp[0] : firstOp[0])", ["ValidateRemainingResults"] = "BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits(double.IsNegative(thirdOp[i]) ? secondOp[i] : firstOp[i])"}), + ("ComplexTernOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int32", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Int32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp3"] = "(int)(((i & 1) == 0) ? -1 : 0)", ["ValidateFirstResult"] = "result[0] != ((thirdOp[0] != 0) ? secondOp[0] : firstOp[0])", ["ValidateRemainingResults"] = "result[i] != ((thirdOp[i] != 0) ? secondOp[i] : firstOp[i])"}), + ("ComplexTernOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int64", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int64", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Int64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp3"] = "(long)(((i & 1) == 0) ? -1 : 0)", ["ValidateFirstResult"] = "result[0] != ((thirdOp[0] != 0) ? secondOp[0] : firstOp[0])", ["ValidateRemainingResults"] = "result[i] != ((thirdOp[i] != 0) ? secondOp[i] : firstOp[i])"}), + ("ComplexTernOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "(float)(((i & 1) == 0) ? -0.0 : 0.0)", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits(float.IsNegative(thirdOp[0]) ? secondOp[0] : firstOp[0])", ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(float.IsNegative(thirdOp[i]) ? secondOp[i] : firstOp[i])"}), + ("ComplexTernOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt32", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "UInt32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp3"] = "(uint)(((i & 1) == 0) ? -1 : 0)", ["ValidateFirstResult"] = "result[0] != ((thirdOp[0] != 0) ? secondOp[0] : firstOp[0])", ["ValidateRemainingResults"] = "result[i] != ((thirdOp[i] != 0) ? secondOp[i] : firstOp[i])"}), + ("ComplexTernOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt64", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "UInt64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp3"] = "(ulong)(((i & 1) == 0) ? -1: 0)", ["ValidateFirstResult"] = "result[0] != ((thirdOp[0] != 0) ? secondOp[0] : firstOp[0])", ["ValidateRemainingResults"] = "result[i] != ((thirdOp[i] != 0) ? secondOp[i] : firstOp[i])"}), + ("BroadcastTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "BroadcastScalarToVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), + ("BroadcastTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "BroadcastScalarToVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), + ("BroadcastTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "BroadcastScalarToVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), + ("BroadcastTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "BroadcastScalarToVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), + ("BroadcastTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "BroadcastScalarToVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(firstOp[0]) != BitConverter.SingleToInt32Bits(result[0])", ["ValidateRemainingResults"] = "(BitConverter.SingleToInt32Bits(firstOp[0]) != BitConverter.SingleToInt32Bits(result[i]))"}), + ("BroadcastTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "BroadcastScalarToVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(firstOp[0]) != BitConverter.DoubleToInt64Bits(result[0])", ["ValidateRemainingResults"] = "(BitConverter.DoubleToInt64Bits(firstOp[0]) != BitConverter.DoubleToInt64Bits(result[i]))"}), ("LoadUnOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["Method"] = "BroadcastVector128ToVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "result[i] != firstOp[i & 3]"}), ("LoadUnOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["Method"] = "BroadcastVector128ToVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "result[i] != firstOp[i & 3]"}), ("LoadUnOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["Method"] = "BroadcastVector128ToVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(firstOp[0]) != BitConverter.SingleToInt32Bits(result[0])", ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(firstOp[i & 3])"}), @@ -1241,9 +1289,9 @@ ("SimpleUnOpConvTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "ConvertToVector512UInt32WithTruncation", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "result[0] != (uint)firstOp[0]", ["ValidateRemainingResults"] = "result[i] != (uint)firstOp[i]"}), ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "Divide", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(left[0] / right[0]) != BitConverter.DoubleToInt64Bits(result[0])", ["ValidateRemainingResults"] = "BitConverter.DoubleToInt64Bits(left[i] / right[i]) != BitConverter.DoubleToInt64Bits(result[i])"}), ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "Divide", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(left[0] / right[0]) != BitConverter.SingleToInt32Bits(result[0])", ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(left[i] / right[i]) != BitConverter.SingleToInt32Bits(result[i])"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "DuplicateEvenIndexed", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(firstOp[0]) != BitConverter.DoubleToInt64Bits(result[0])", ["ValidateRemainingResults"] = "((i & 1) == 0) ? (BitConverter.DoubleToInt64Bits(firstOp[i]) != BitConverter.DoubleToInt64Bits(result[i])) : (BitConverter.DoubleToInt64Bits(firstOp[i - 1]) != BitConverter.DoubleToInt64Bits(result[i]))"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "DuplicateEvenIndexed", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(firstOp[0]) != BitConverter.SingleToInt32Bits(result[0])", ["ValidateRemainingResults"] = "((i & 1) == 0) ? (BitConverter.SingleToInt32Bits(firstOp[i]) != BitConverter.SingleToInt32Bits(result[i])) : (BitConverter.SingleToInt32Bits(firstOp[i - 1]) != BitConverter.SingleToInt32Bits(result[i]))"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "DuplicateOddIndexed", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(firstOp[1]) != BitConverter.SingleToInt32Bits(result[0])", ["ValidateRemainingResults"] = "((i & 1) == 0) ? (BitConverter.SingleToInt32Bits(firstOp[i + 1]) != BitConverter.SingleToInt32Bits(result[i])) : (BitConverter.SingleToInt32Bits(firstOp[i]) != BitConverter.SingleToInt32Bits(result[i]))"}), + ("ComplexUnOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "DuplicateEvenIndexed", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(firstOp[0]) != BitConverter.DoubleToInt64Bits(result[0])", ["ValidateRemainingResults"] = "((i & 1) == 0) ? (BitConverter.DoubleToInt64Bits(firstOp[i]) != BitConverter.DoubleToInt64Bits(result[i])) : (BitConverter.DoubleToInt64Bits(firstOp[i - 1]) != BitConverter.DoubleToInt64Bits(result[i]))"}), + ("ComplexUnOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "DuplicateEvenIndexed", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(firstOp[0]) != BitConverter.SingleToInt32Bits(result[0])", ["ValidateRemainingResults"] = "((i & 1) == 0) ? (BitConverter.SingleToInt32Bits(firstOp[i]) != BitConverter.SingleToInt32Bits(result[i])) : (BitConverter.SingleToInt32Bits(firstOp[i - 1]) != BitConverter.SingleToInt32Bits(result[i]))"}), + ("ComplexUnOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "DuplicateOddIndexed", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(firstOp[1]) != BitConverter.SingleToInt32Bits(result[0])", ["ValidateRemainingResults"] = "((i & 1) == 0) ? (BitConverter.SingleToInt32Bits(firstOp[i + 1]) != BitConverter.SingleToInt32Bits(result[i])) : (BitConverter.SingleToInt32Bits(firstOp[i]) != BitConverter.SingleToInt32Bits(result[i]))"}), ("ImmUnOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "ExtractVector128", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Imm"] = "1", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits(firstOp[4])", ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(firstOp[i+4])"}), ("ImmUnOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "ExtractVector128", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Imm"] = "1", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(result[0]) != BitConverter.DoubleToInt64Bits(firstOp[2])", ["ValidateRemainingResults"] = "BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits(firstOp[i+2])"}), ("ImmUnOpTest.template", new Dictionary { ["Isa"] = "Avx512F", ["LoadIsa"] = "Avx512F", ["Method"] = "ExtractVector128", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Byte", ["Imm"] = "1", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != firstOp[16]", ["ValidateRemainingResults"] = "result[i] != firstOp[i+16]"}), @@ -2085,14 +2133,14 @@ ("ImmBinOpTest.template", new Dictionary { ["Isa"] = "Avx512BW", ["LoadIsa"] = "Avx512F", ["Method"] = "AlignRight", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Byte", ["Imm"] = "250", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != 0", ["ValidateRemainingResults"] = "result[i] != 0"}), ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512BW", ["LoadIsa"] = "Avx512F", ["Method"] = "Average", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "(byte)((left[0] + right[0] + 1) >> 1) != result[0]", ["ValidateRemainingResults"] = "(byte)((left[i] + right[i] + 1) >> 1) != result[i]"}), ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512BW", ["LoadIsa"] = "Avx512F", ["Method"] = "Average", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "(ushort)((left[0] + right[0] + 1) >> 1) != result[0]", ["ValidateRemainingResults"] = "(ushort)((left[i] + right[i] + 1) >> 1) != result[i]"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "Avx512BW", ["LoadIsa"] = "Avx512F", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Byte", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["NextValueOp3"] = "(byte)(((i & 1) == 0) ? -1 : 0)", ["ValidateFirstResult"] = "result[0] != ((thirdOp[0] != 0) ? secondOp[0] : firstOp[0])", ["ValidateRemainingResults"] = "result[i] != ((thirdOp[i] != 0) ? secondOp[i] : firstOp[i])"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "Avx512BW", ["LoadIsa"] = "Avx512F", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int16", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp3"] = "(short)(((i & 1) == 0) ? -1 : 0)", ["ValidateFirstResult"] = "result[0] != ((thirdOp[0] != 0) ? secondOp[0] : firstOp[0])", ["ValidateRemainingResults"] = "result[i] != ((thirdOp[i] != 0) ? secondOp[i] : firstOp[i])"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "Avx512BW", ["LoadIsa"] = "Avx512F", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "(sbyte)(((i & 1) == 0) ? -1 : 0)", ["ValidateFirstResult"] = "result[0] != ((thirdOp[0] != 0) ? secondOp[0] : firstOp[0])", ["ValidateRemainingResults"] = "result[i] != ((thirdOp[i] != 0) ? secondOp[i] : firstOp[i])"}), - ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "Avx512BW", ["LoadIsa"] = "Avx512F", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "(ushort)(((i & 1) == 0) ? -1 : 0)", ["ValidateFirstResult"] = "result[0] != ((thirdOp[0] != 0) ? secondOp[0] : firstOp[0])", ["ValidateRemainingResults"] = "result[i] != ((thirdOp[i] != 0) ? secondOp[i] : firstOp[i])"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx512BW", ["LoadIsa"] = "Avx512F", ["Method"] = "BroadcastScalarToVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx512BW", ["LoadIsa"] = "Avx512F", ["Method"] = "BroadcastScalarToVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx512BW", ["LoadIsa"] = "Avx512F", ["Method"] = "BroadcastScalarToVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx512BW", ["LoadIsa"] = "Avx512F", ["Method"] = "BroadcastScalarToVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), + ("ComplexTernOpTest.template", new Dictionary { ["Isa"] = "Avx512BW", ["LoadIsa"] = "Avx512F", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Byte", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["NextValueOp3"] = "(byte)(((i & 1) == 0) ? -1 : 0)", ["ValidateFirstResult"] = "result[0] != ((thirdOp[0] != 0) ? secondOp[0] : firstOp[0])", ["ValidateRemainingResults"] = "result[i] != ((thirdOp[i] != 0) ? secondOp[i] : firstOp[i])"}), + ("ComplexTernOpTest.template", new Dictionary { ["Isa"] = "Avx512BW", ["LoadIsa"] = "Avx512F", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int16", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp3"] = "(short)(((i & 1) == 0) ? -1 : 0)", ["ValidateFirstResult"] = "result[0] != ((thirdOp[0] != 0) ? secondOp[0] : firstOp[0])", ["ValidateRemainingResults"] = "result[i] != ((thirdOp[i] != 0) ? secondOp[i] : firstOp[i])"}), + ("ComplexTernOpTest.template", new Dictionary { ["Isa"] = "Avx512BW", ["LoadIsa"] = "Avx512F", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "SByte", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "SByte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp3"] = "(sbyte)(((i & 1) == 0) ? -1 : 0)", ["ValidateFirstResult"] = "result[0] != ((thirdOp[0] != 0) ? secondOp[0] : firstOp[0])", ["ValidateRemainingResults"] = "result[i] != ((thirdOp[i] != 0) ? secondOp[i] : firstOp[i])"}), + ("ComplexTernOpTest.template", new Dictionary { ["Isa"] = "Avx512BW", ["LoadIsa"] = "Avx512F", ["Method"] = "BlendVariable", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt16", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp3"] = "(ushort)(((i & 1) == 0) ? -1 : 0)", ["ValidateFirstResult"] = "result[0] != ((thirdOp[0] != 0) ? secondOp[0] : firstOp[0])", ["ValidateRemainingResults"] = "result[i] != ((thirdOp[i] != 0) ? secondOp[i] : firstOp[i])"}), + ("BroadcastTest.template", new Dictionary { ["Isa"] = "Avx512BW", ["LoadIsa"] = "Avx512F", ["Method"] = "BroadcastScalarToVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), + ("BroadcastTest.template", new Dictionary { ["Isa"] = "Avx512BW", ["LoadIsa"] = "Avx512F", ["Method"] = "BroadcastScalarToVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "SByte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), + ("BroadcastTest.template", new Dictionary { ["Isa"] = "Avx512BW", ["LoadIsa"] = "Avx512F", ["Method"] = "BroadcastScalarToVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), + ("BroadcastTest.template", new Dictionary { ["Isa"] = "Avx512BW", ["LoadIsa"] = "Avx512F", ["Method"] = "BroadcastScalarToVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(firstOp[0] != result[i])"}), ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512BW", ["LoadIsa"] = "Avx512F", ["Method"] = "CompareEqual", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Byte", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Byte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetByte()", ["ValidateFirstResult"] = "result[0] != ((left[0] == right[0]) ? unchecked((byte)(-1)) : 0)", ["ValidateRemainingResults"] = "result[i] != ((left[i] == right[i]) ? unchecked((byte)(-1)) : 0)"}), ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512BW", ["LoadIsa"] = "Avx512F", ["Method"] = "CompareEqual", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "result[0] != ((left[0] == right[0]) ? unchecked((short)(-1)) : 0)", ["ValidateRemainingResults"] = "result[i] != ((left[i] == right[i]) ? unchecked((short)(-1)) : 0)"}), ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512BW", ["LoadIsa"] = "Avx512F", ["Method"] = "CompareEqual", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["ValidateFirstResult"] = "result[0] != ((left[0] == right[0]) ? unchecked((sbyte)(-1)) : 0)", ["ValidateRemainingResults"] = "result[i] != ((left[i] == right[i]) ? unchecked((sbyte)(-1)) : 0)"}), @@ -2142,10 +2190,10 @@ ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512BW", ["LoadIsa"] = "Avx512F", ["Method"] = "MultiplyHighRoundScale", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "result[0] != (short)((((left[0] * right[0]) >> 14) + 1) >> 1)", ["ValidateRemainingResults"] = "result[i] != (short)((((left[i] * right[i]) >> 14) + 1) >> 1)"}), ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512BW", ["LoadIsa"] = "Avx512F", ["Method"] = "MultiplyLow", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "result[0] != BitConverter.ToInt16(BitConverter.GetBytes(((int)(left[0])) * right[0]), 0)", ["ValidateRemainingResults"] = "result[i] != BitConverter.ToInt16(BitConverter.GetBytes(((int)(left[i])) * right[i]), 0)"}), ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512BW", ["LoadIsa"] = "Avx512F", ["Method"] = "MultiplyLow", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "result[0] != BitConverter.ToUInt16(BitConverter.GetBytes(((uint)(left[0])) * right[0]), 0)", ["ValidateRemainingResults"] = "result[i] != BitConverter.ToUInt16(BitConverter.GetBytes(((uint)(left[i])) * right[i]), 0)"}), - ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512BW", ["LoadIsa"] = "Avx512F", ["Method"] = "PackSignedSaturate", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "result[0] != (short)Math.Clamp(left[0], short.MinValue, short.MaxValue)", ["ValidateRemainingResults"] = "result[i] != ((i & 7) < 4 ? (short)Math.Clamp(left[i - ((i / 8) * 4)], short.MinValue, short.MaxValue) : (short)Math.Clamp(right[i - 4 - ((i / 8) * 4)], short.MinValue, short.MaxValue))"}), - ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512BW", ["LoadIsa"] = "Avx512F", ["Method"] = "PackSignedSaturate", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "result[0] != (sbyte)Math.Clamp(left[0], sbyte.MinValue, sbyte.MaxValue)", ["ValidateRemainingResults"] = "result[i] != ((i & 15) < 8 ? (sbyte)Math.Clamp(left[i- ((i / 16) * 8)], sbyte.MinValue, sbyte.MaxValue) : (sbyte)Math.Clamp(right[i - 8 - ((i / 16) * 8)], sbyte.MinValue, sbyte.MaxValue))"}), - ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512BW", ["LoadIsa"] = "Avx512F", ["Method"] = "PackUnsignedSaturate", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "result[0] != (byte)Math.Clamp(left[0], byte.MinValue, byte.MaxValue)", ["ValidateRemainingResults"] = "result[i] != ((i & 15) < 8 ? (byte)Math.Clamp(left[i- ((i / 16) * 8)], byte.MinValue, byte.MaxValue) : (byte)Math.Clamp(right[i - 8 - ((i / 16) * 8)], byte.MinValue, byte.MaxValue))"}), - ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512BW", ["LoadIsa"] = "Avx512F", ["Method"] = "PackUnsignedSaturate", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "result[0] != (ushort)Math.Clamp(left[0], ushort.MinValue, ushort.MaxValue)", ["ValidateRemainingResults"] = "result[i] != ((i & 7) < 4 ? (ushort)Math.Clamp(left[i- ((i / 8) * 4)], ushort.MinValue, ushort.MaxValue) : (ushort)Math.Clamp(right[i - 4 - ((i / 8) * 4)], ushort.MinValue, ushort.MaxValue))"}), + ("ComplexBinOpTest.template", new Dictionary { ["Isa"] = "Avx512BW", ["LoadIsa"] = "Avx512F", ["Method"] = "PackSignedSaturate", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "result[0] != (short)Math.Clamp(left[0], short.MinValue, short.MaxValue)", ["ValidateRemainingResults"] = "result[i] != ((i & 7) < 4 ? (short)Math.Clamp(left[i - ((i / 8) * 4)], short.MinValue, short.MaxValue) : (short)Math.Clamp(right[i - 4 - ((i / 8) * 4)], short.MinValue, short.MaxValue))"}), + ("ComplexBinOpTest.template", new Dictionary { ["Isa"] = "Avx512BW", ["LoadIsa"] = "Avx512F", ["Method"] = "PackSignedSaturate", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "result[0] != (sbyte)Math.Clamp(left[0], sbyte.MinValue, sbyte.MaxValue)", ["ValidateRemainingResults"] = "result[i] != ((i & 15) < 8 ? (sbyte)Math.Clamp(left[i- ((i / 16) * 8)], sbyte.MinValue, sbyte.MaxValue) : (sbyte)Math.Clamp(right[i - 8 - ((i / 16) * 8)], sbyte.MinValue, sbyte.MaxValue))"}), + ("ComplexBinOpTest.template", new Dictionary { ["Isa"] = "Avx512BW", ["LoadIsa"] = "Avx512F", ["Method"] = "PackUnsignedSaturate", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Byte", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "result[0] != (byte)Math.Clamp(left[0], byte.MinValue, byte.MaxValue)", ["ValidateRemainingResults"] = "result[i] != ((i & 15) < 8 ? (byte)Math.Clamp(left[i- ((i / 16) * 8)], byte.MinValue, byte.MaxValue) : (byte)Math.Clamp(right[i - 8 - ((i / 16) * 8)], byte.MinValue, byte.MaxValue))"}), + ("ComplexBinOpTest.template", new Dictionary { ["Isa"] = "Avx512BW", ["LoadIsa"] = "Avx512F", ["Method"] = "PackUnsignedSaturate", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "result[0] != (ushort)Math.Clamp(left[0], ushort.MinValue, ushort.MaxValue)", ["ValidateRemainingResults"] = "result[i] != ((i & 7) < 4 ? (ushort)Math.Clamp(left[i- ((i / 8) * 4)], ushort.MinValue, ushort.MaxValue) : (ushort)Math.Clamp(right[i - 4 - ((i / 8) * 4)], ushort.MinValue, ushort.MaxValue))"}), ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512BW", ["LoadIsa"] = "Avx512F", ["Method"] = "PermuteVar32x16", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "result[0] != left[(right[0] & 31)]", ["ValidateRemainingResults"] = "result[i] != left[(right[i] & 31)]"}), ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512BW", ["LoadIsa"] = "Avx512F", ["Method"] = "PermuteVar32x16", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "UInt16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt16()", ["ValidateFirstResult"] = "result[0] != left[(right[0] & 31)]", ["ValidateRemainingResults"] = "result[i] != left[(right[i] & 31)]"}), ("SimpleTernOpTest.template", new Dictionary { ["Isa"] = "Avx512BW", ["LoadIsa"] = "Avx512F", ["Method"] = "PermuteVar32x16x2", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Int16", ["Op3VectorType"] = "Vector512", ["Op3BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["NextValueOp3"] = "TestLibrary.Generator.GetInt16()", ["ValidateFirstResult"] = "result[0] != (((secondOp[0] & 63) <= 31) ? firstOp[secondOp[0] & 31] : thirdOp[secondOp[0] & 31])", ["ValidateRemainingResults"] = "result[i] != (((secondOp[i] & 63) <= 31) ? firstOp[secondOp[i] & 31] : thirdOp[secondOp[i] & 31])"}), @@ -2305,9 +2353,9 @@ ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512DQ", ["LoadIsa"] = "Avx512F", ["Method"] = "And", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "(BitConverter.SingleToInt32Bits(left[0]) & BitConverter.SingleToInt32Bits(right[0])) != BitConverter.SingleToInt32Bits(result[0])", ["ValidateRemainingResults"] = "(BitConverter.SingleToInt32Bits(left[i]) & BitConverter.SingleToInt32Bits(right[i])) != BitConverter.SingleToInt32Bits(result[i])"}), ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512DQ", ["LoadIsa"] = "Avx512F", ["Method"] = "AndNot", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "((~BitConverter.DoubleToInt64Bits(left[0])) & BitConverter.DoubleToInt64Bits(right[0])) != BitConverter.DoubleToInt64Bits(result[0])", ["ValidateRemainingResults"] = "((~BitConverter.DoubleToInt64Bits(left[i])) & BitConverter.DoubleToInt64Bits(right[i])) != BitConverter.DoubleToInt64Bits(result[i])"}), ("SimpleBinOpTest.template", new Dictionary { ["Isa"] = "Avx512DQ", ["LoadIsa"] = "Avx512F", ["Method"] = "AndNot", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector512", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector512", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "((~BitConverter.SingleToInt32Bits(left[0])) & BitConverter.SingleToInt32Bits(right[0])) != BitConverter.SingleToInt32Bits(result[0])", ["ValidateRemainingResults"] = "((~BitConverter.SingleToInt32Bits(left[i])) & BitConverter.SingleToInt32Bits(right[i])) != BitConverter.SingleToInt32Bits(result[i])"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx512DQ", ["LoadIsa"] = "Avx512F", ["Method"] = "BroadcastPairScalarToVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(result[i] != firstOp[i & 1])"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx512DQ", ["LoadIsa"] = "Avx512F", ["Method"] = "BroadcastPairScalarToVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(result[i] != firstOp[i & 1])"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx512DQ", ["LoadIsa"] = "Avx512F", ["Method"] = "BroadcastPairScalarToVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(firstOp[0]) != BitConverter.DoubleToInt64Bits(result[0])", ["ValidateRemainingResults"] = "(BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(firstOp[i & 1]))"}), + ("BroadcastTest.template", new Dictionary { ["Isa"] = "Avx512DQ", ["LoadIsa"] = "Avx512F", ["Method"] = "BroadcastPairScalarToVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(result[i] != firstOp[i & 1])"}), + ("BroadcastTest.template", new Dictionary { ["Isa"] = "Avx512DQ", ["LoadIsa"] = "Avx512F", ["Method"] = "BroadcastPairScalarToVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(result[i] != firstOp[i & 1])"}), + ("BroadcastTest.template", new Dictionary { ["Isa"] = "Avx512DQ", ["LoadIsa"] = "Avx512F", ["Method"] = "BroadcastPairScalarToVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(firstOp[0]) != BitConverter.DoubleToInt64Bits(result[0])", ["ValidateRemainingResults"] = "(BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(firstOp[i & 1]))"}), ("LoadUnOpTest.template", new Dictionary { ["Isa"] = "Avx512DQ", ["Method"] = "BroadcastVector128ToVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "result[i] != firstOp[i & 1]"}), ("LoadUnOpTest.template", new Dictionary { ["Isa"] = "Avx512DQ", ["Method"] = "BroadcastVector128ToVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "result[i] != firstOp[i & 1]"}), ("LoadUnOpTest.template", new Dictionary { ["Isa"] = "Avx512DQ", ["Method"] = "BroadcastVector128ToVector512", ["RetVectorType"] = "Vector512", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(firstOp[0]) != BitConverter.DoubleToInt64Bits(result[0])", ["ValidateRemainingResults"] = "BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits(firstOp[i & 1])"}), @@ -2396,8 +2444,8 @@ (string templateFileName, Dictionary templateData)[] Avx512DQ_VL_Vector128Inputs = new [] { - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx512DQ.VL", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastPairScalarToVector128", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(result[i] != firstOp[i & 1])"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx512DQ.VL", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastPairScalarToVector128", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(result[i] != firstOp[i & 1])"}), + ("BroadcastTest.template", new Dictionary { ["Isa"] = "Avx512DQ.VL", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastPairScalarToVector128", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(result[i] != firstOp[i & 1])"}), + ("BroadcastTest.template", new Dictionary { ["Isa"] = "Avx512DQ.VL", ["LoadIsa"] = "Sse2", ["Method"] = "BroadcastPairScalarToVector128", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(result[i] != firstOp[i & 1])"}), ("SimpleUnOpConvTest.template", new Dictionary { ["Isa"] = "Avx512DQ.VL", ["LoadIsa"] = "Sse2", ["Method"] = "ConvertToVector128Double", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(result[0]) != BitConverter.DoubleToInt64Bits(firstOp[0])", ["ValidateRemainingResults"] = "BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits(firstOp[i])"}), ("SimpleUnOpConvTest.template", new Dictionary { ["Isa"] = "Avx512DQ.VL", ["LoadIsa"] = "Sse2", ["Method"] = "ConvertToVector128Double", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(TestLibrary.Generator.GetUInt64() & 0x7FFF_FFFF_FFFF_FFFFUL)", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(result[0]) != BitConverter.DoubleToInt64Bits(firstOp[0])", ["ValidateRemainingResults"] = "BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits(firstOp[i])"}), ("SimpleUnOpConvTest.template", new Dictionary { ["Isa"] = "Avx512DQ.VL", ["LoadIsa"] = "Sse2", ["Method"] = "ConvertToVector128Int64", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "result[0] != (long)double.Round(firstOp[0])", ["ValidateRemainingResults"] = "result[i] != (long)double.Round(firstOp[i])"}), @@ -2424,9 +2472,9 @@ (string templateFileName, Dictionary templateData)[] Avx512DQ_VL_Vector256Inputs = new [] { - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx512DQ.VL", ["LoadIsa"] = "Avx", ["Method"] = "BroadcastPairScalarToVector256", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(result[i] != firstOp[i & 1])"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx512DQ.VL", ["LoadIsa"] = "Avx", ["Method"] = "BroadcastPairScalarToVector256", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(result[i] != firstOp[i & 1])"}), - ("SimpleUnOpTest.template", new Dictionary { ["Isa"] = "Avx512DQ.VL", ["LoadIsa"] = "Avx", ["Method"] = "BroadcastPairScalarToVector256", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(firstOp[0]) != BitConverter.DoubleToInt64Bits(result[0])", ["ValidateRemainingResults"] = "(BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(firstOp[i & 1]))"}), + ("BroadcastTest.template", new Dictionary { ["Isa"] = "Avx512DQ.VL", ["LoadIsa"] = "Avx", ["Method"] = "BroadcastPairScalarToVector256", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(result[i] != firstOp[i & 1])"}), + ("BroadcastTest.template", new Dictionary { ["Isa"] = "Avx512DQ.VL", ["LoadIsa"] = "Avx", ["Method"] = "BroadcastPairScalarToVector256", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "firstOp[0] != result[0]", ["ValidateRemainingResults"] = "(result[i] != firstOp[i & 1])"}), + ("BroadcastTest.template", new Dictionary { ["Isa"] = "Avx512DQ.VL", ["LoadIsa"] = "Avx", ["Method"] = "BroadcastPairScalarToVector256", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(firstOp[0]) != BitConverter.DoubleToInt64Bits(result[0])", ["ValidateRemainingResults"] = "(BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(firstOp[i & 1]))"}), ("SimpleUnOpConvTest.template", new Dictionary { ["Isa"] = "Avx512DQ.VL", ["LoadIsa"] = "Avx", ["Method"] = "ConvertToVector256Double", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int64", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(result[0]) != BitConverter.DoubleToInt64Bits(firstOp[0])", ["ValidateRemainingResults"] = "BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits(firstOp[i])"}), ("SimpleUnOpConvTest.template", new Dictionary { ["Isa"] = "Avx512DQ.VL", ["LoadIsa"] = "Avx", ["Method"] = "ConvertToVector256Double", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt64", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "(TestLibrary.Generator.GetUInt64() & 0x7FFF_FFFF_FFFF_FFFFUL)", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(result[0]) != BitConverter.DoubleToInt64Bits(firstOp[0])", ["ValidateRemainingResults"] = "BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits(firstOp[i])"}), ("SimpleUnOpConvTest.template", new Dictionary { ["Isa"] = "Avx512DQ.VL", ["LoadIsa"] = "Avx", ["Method"] = "ConvertToVector256Int64", ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int64", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "result[0] != (long)double.Round(firstOp[0])", ["ValidateRemainingResults"] = "result[i] != (long)double.Round(firstOp[i])"}), @@ -2680,12 +2728,18 @@ { ["SseVerify"] = @"..\..\X86\Shared\SseVerify.cs", ["Avx512Verify"] = @"..\..\X86\Shared\Avx512Verify.cs", + ["AlternatingBinaryOpTest"] = @"..\..\X86\Shared\SimpleBinOpTest_DataTable.cs", + ["AlternatingTernaryOpTest"] = @"..\..\X86\Shared\SimpleTernOpTest_DataTable.cs", + ["ComplexBinaryOpTest"] = @"..\..\X86\Shared\SimpleBinOpTest_DataTable.cs", + ["ComplexTernaryOpTest"] = @"..\..\X86\Shared\SimpleTernOpTest_DataTable.cs", + ["ComplexUnaryOpTest"] = @"..\..\X86\Shared\SimpleUnOpTest_DataTable.cs", + ["HorizontalBinaryOpTest"] = @"..\..\X86\Shared\SimpleBinOpTest_DataTable.cs", ["ScalarSimdUnaryOpTest"] = @"..\..\X86\Shared\ScalarSimdUnOpTest_DataTable.cs", ["SimdScalarUnaryOpTest"] = @"..\..\X86\Shared\SimdScalarUnOpTest_DataTable.cs", - ["SimpleTernaryOpTest"] = @"..\..\X86\Shared\SimpleTernOpTest_DataTable.cs", + ["SimpleBinaryOpConvTest"] = @"..\..\X86\Shared\SimpleBinOpConvTest_DataTable.cs", ["SimpleBinaryOpTest"] = @"..\..\X86\Shared\SimpleBinOpTest_DataTable.cs", + ["SimpleTernaryOpTest"] = @"..\..\X86\Shared\SimpleTernOpTest_DataTable.cs", ["SimpleUnaryOpTest"] = @"..\..\X86\Shared\SimpleUnOpTest_DataTable.cs", - ["SimpleBinaryOpConvTest"] = @"..\..\X86\Shared\SimpleBinOpConvTest_DataTable.cs", }; bool isImmTemplate(string name) diff --git a/src/tests/JIT/HardwareIntrinsics/General/Shared/VectorBinaryOpTest.template b/src/tests/JIT/HardwareIntrinsics/General/Shared/VectorBinaryOpTest.template index 54b276bdca1a59..dd919c86c7e923 100644 --- a/src/tests/JIT/HardwareIntrinsics/General/Shared/VectorBinaryOpTest.template +++ b/src/tests/JIT/HardwareIntrinsics/General/Shared/VectorBinaryOpTest.template @@ -29,6 +29,18 @@ namespace JIT.HardwareIntrinsics.General // Validates calling via reflection works, using Unsafe.Read test.RunReflectionScenario_UnsafeRead(); + // Validates broadcast functionality works + test.RunBroadcastScenario(); + + // Validates masking with a value functionality works + test.RunMaskingValueScenario(); + + // Validates masking with zero functionality works + test.RunMaskingZeroScenario(); + + // Validates broadcast + masking functionality works + test.RunBroadcastAndMaskingScenario(); + // Validates passing a static member works test.RunClsVarScenario(); @@ -220,6 +232,77 @@ namespace JIT.HardwareIntrinsics.General ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); } + public void RunBroadcastScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBroadcastScenario)); + + var result = {Isa}.{Method}( + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + {Op2VectorType}.Create<{Op2BaseType}>(Unsafe.Read<{Op2BaseType}>(_dataTable.inArray2Ptr)) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr, isBroadcast: true); + } + + public void RunMaskingValueScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunMaskingValueScenario)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + + var result = {RetVectorType}.ConditionalSelect( + {RetVectorType}.Equals<{RetBaseType}>(op1, {RetVectorType}<{RetBaseType}>.Zero), + {Isa}.{Method}( + op1, + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr) + ), + {RetVectorType}.Create<{RetBaseType}>(1) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr, mask: 1); + } + + + public void RunMaskingZeroScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunMaskingZeroScenario)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + + var result = {RetVectorType}.ConditionalSelect( + {RetVectorType}.Equals<{RetBaseType}>(op1, {RetVectorType}<{RetBaseType}>.Zero), + {Isa}.{Method}( + op1, + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr) + ), + {RetVectorType}<{RetBaseType}>.Zero + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr, mask: 0); + } + + public void RunBroadcastAndMaskingScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBroadcastAndMaskingScenario)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + + var result = {RetVectorType}.ConditionalSelect( + {RetVectorType}.Equals<{RetBaseType}>(op1, {RetVectorType}<{RetBaseType}>.Zero), + {Isa}.{Method}( + op1, + {Op2VectorType}.Create<{Op2BaseType}>(Unsafe.Read<{Op2BaseType}>(_dataTable.inArray2Ptr)) + ), + {RetVectorType}<{RetBaseType}>.Zero + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr, isBroadcast: true, mask: 0); + } + public void RunClsVarScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario)); @@ -285,7 +368,7 @@ namespace JIT.HardwareIntrinsics.General test.RunStructFldScenario(this); } - private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, {Op2VectorType}<{Op2BaseType}> op2, void* result, [CallerMemberName] string method = "") + private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, {Op2VectorType}<{Op2BaseType}> op2, void* result, bool isBroadcast = false, {RetBaseType}? mask = null, [CallerMemberName] string method = "") { {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; {Op2BaseType}[] inArray2 = new {Op2BaseType}[Op2ElementCount]; @@ -295,10 +378,10 @@ namespace JIT.HardwareIntrinsics.General Unsafe.WriteUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray2[0]), op2); Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); - ValidateResult(inArray1, inArray2, outArray, method); + ValidateResult(inArray1, inArray2, outArray, isBroadcast, mask, method); } - private void ValidateResult(void* op1, void* op2, void* result, [CallerMemberName] string method = "") + private void ValidateResult(void* op1, void* op2, void* result, bool isBroadcast = false, {RetBaseType}? mask = null, [CallerMemberName] string method = "") { {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; {Op2BaseType}[] inArray2 = new {Op2BaseType}[Op2ElementCount]; @@ -308,14 +391,19 @@ namespace JIT.HardwareIntrinsics.General Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray2[0]), ref Unsafe.AsRef(op2), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); - ValidateResult(inArray1, inArray2, outArray, method); + ValidateResult(inArray1, inArray2, outArray, isBroadcast, mask, method); } - private void ValidateResult({Op1BaseType}[] left, {Op2BaseType}[] right, {RetBaseType}[] result, [CallerMemberName] string method = "") + private void ValidateResult({Op1BaseType}[] left, {Op2BaseType}[] right, {RetBaseType}[] result, bool isBroadcast, {RetBaseType}? mask, [CallerMemberName] string method = "") { bool succeeded = true; - if ({ValidateFirstResult}) + if (isBroadcast) + { + right.AsSpan().Fill(right[0]); + } + + if ({ValidateFirstResult} && (!mask.HasValue || ((left[0] != default) && result[0] != mask.Value))) { succeeded = false; } @@ -323,7 +411,7 @@ namespace JIT.HardwareIntrinsics.General { for (var i = 1; i < RetElementCount; i++) { - if ({ValidateRemainingResults}) + if ({ValidateRemainingResults} && (!mask.HasValue || ((left[i] != default) && result[i] != mask.Value))) { succeeded = false; break; diff --git a/src/tests/JIT/HardwareIntrinsics/General/Shared/VectorBinaryOperatorTest.template b/src/tests/JIT/HardwareIntrinsics/General/Shared/VectorBinaryOperatorTest.template index 4d953387db0fe6..8d2f62ed861f48 100644 --- a/src/tests/JIT/HardwareIntrinsics/General/Shared/VectorBinaryOperatorTest.template +++ b/src/tests/JIT/HardwareIntrinsics/General/Shared/VectorBinaryOperatorTest.template @@ -29,6 +29,18 @@ namespace JIT.HardwareIntrinsics.General // Validates calling via reflection works, using Unsafe.Read test.RunReflectionScenario_UnsafeRead(); + // Validates broadcast functionality works + test.RunBroadcastScenario(); + + // Validates masking with a value functionality works + test.RunMaskingValueScenario(); + + // Validates masking with zero functionality works + test.RunMaskingZeroScenario(); + + // Validates broadcast + masking functionality works + test.RunBroadcastAndMaskingScenario(); + // Validates passing a static member works test.RunClsVarScenario(); @@ -200,6 +212,64 @@ namespace JIT.HardwareIntrinsics.General ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); } + public void RunBroadcastScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBroadcastScenario)); + + var result = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr) {Opcode} {Op2VectorType}.Create<{Op2BaseType}>(Unsafe.Read<{Op2BaseType}>(_dataTable.inArray2Ptr)); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr, isBroadcast: true); + } + + public void RunMaskingValueScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunMaskingValueScenario)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + + var result = {RetVectorType}.ConditionalSelect( + {RetVectorType}.Equals<{RetBaseType}>(op1, {RetVectorType}<{RetBaseType}>.Zero), + op1 {Opcode} Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr), + {RetVectorType}.Create<{RetBaseType}>(1) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr, mask: 1); + } + + public void RunMaskingZeroScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunMaskingZeroScenario)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + + var result = {RetVectorType}.ConditionalSelect( + {RetVectorType}.Equals<{RetBaseType}>(op1, {RetVectorType}<{RetBaseType}>.Zero), + op1 {Opcode} Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr), + {RetVectorType}<{RetBaseType}>.Zero + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr, mask: 0); + } + + public void RunBroadcastAndMaskingScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBroadcastAndMaskingScenario)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + + var result = {RetVectorType}.ConditionalSelect( + {RetVectorType}.Equals<{RetBaseType}>(op1, {RetVectorType}<{RetBaseType}>.Zero), + op1 {Opcode} {Op2VectorType}.Create<{Op2BaseType}>(Unsafe.Read<{Op2BaseType}>(_dataTable.inArray2Ptr)), + {RetVectorType}<{RetBaseType}>.Zero + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr, isBroadcast: true, mask: 0); + } + public void RunClsVarScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario)); @@ -262,7 +332,7 @@ namespace JIT.HardwareIntrinsics.General test.RunStructFldScenario(this); } - private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, {Op2VectorType}<{Op2BaseType}> op2, void* result, [CallerMemberName] string method = "") + private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, {Op2VectorType}<{Op2BaseType}> op2, void* result, bool isBroadcast = false, {RetBaseType}? mask = null, [CallerMemberName] string method = "") { {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; {Op2BaseType}[] inArray2 = new {Op2BaseType}[Op2ElementCount]; @@ -272,10 +342,10 @@ namespace JIT.HardwareIntrinsics.General Unsafe.WriteUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray2[0]), op2); Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); - ValidateResult(inArray1, inArray2, outArray, method); + ValidateResult(inArray1, inArray2, outArray, isBroadcast, mask, method); } - private void ValidateResult(void* op1, void* op2, void* result, [CallerMemberName] string method = "") + private void ValidateResult(void* op1, void* op2, void* result, bool isBroadcast = false, {RetBaseType}? mask = null, [CallerMemberName] string method = "") { {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; {Op2BaseType}[] inArray2 = new {Op2BaseType}[Op2ElementCount]; @@ -285,14 +355,19 @@ namespace JIT.HardwareIntrinsics.General Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray2[0]), ref Unsafe.AsRef(op2), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); - ValidateResult(inArray1, inArray2, outArray, method); + ValidateResult(inArray1, inArray2, outArray, isBroadcast, mask, method); } - private void ValidateResult({Op1BaseType}[] left, {Op2BaseType}[] right, {RetBaseType}[] result, [CallerMemberName] string method = "") + private void ValidateResult({Op1BaseType}[] left, {Op2BaseType}[] right, {RetBaseType}[] result, bool isBroadcast, {RetBaseType}? mask, [CallerMemberName] string method = "") { bool succeeded = true; - if ({ValidateFirstResult}) + if (isBroadcast) + { + right.AsSpan().Fill(right[0]); + } + + if ({ValidateFirstResult} && (!mask.HasValue || ((left[0] != default) && result[0] != mask.Value))) { succeeded = false; } @@ -300,7 +375,7 @@ namespace JIT.HardwareIntrinsics.General { for (var i = 1; i < RetElementCount; i++) { - if ({ValidateRemainingResults}) + if ({ValidateRemainingResults} && (!mask.HasValue || ((left[i] != default) && result[i] != mask.Value))) { succeeded = false; break; diff --git a/src/tests/JIT/HardwareIntrinsics/General/Shared/VectorTernaryOpTest.template b/src/tests/JIT/HardwareIntrinsics/General/Shared/VectorTernaryOpTest.template index dd51391b96ddd9..7018b5ab67c7ed 100644 --- a/src/tests/JIT/HardwareIntrinsics/General/Shared/VectorTernaryOpTest.template +++ b/src/tests/JIT/HardwareIntrinsics/General/Shared/VectorTernaryOpTest.template @@ -29,6 +29,18 @@ namespace JIT.HardwareIntrinsics.General // Validates calling via reflection works, using Unsafe.Read test.RunReflectionScenario_UnsafeRead(); + // Validates broadcast functionality works + test.RunBroadcastScenario(); + + // Validates masking with a value functionality works + test.RunMaskingValueScenario(); + + // Validates masking with zero functionality works + test.RunMaskingZeroScenario(); + + // Validates broadcast + masking functionality + test.RunBroadcastAndMaskingScenario(); + // Validates passing a static member works test.RunClsVarScenario(); @@ -244,6 +256,80 @@ namespace JIT.HardwareIntrinsics.General ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr); } + public void RunBroadcastScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBroadcastScenario)); + + var result = {Isa}.{Method}( + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr), + {Op3VectorType}.Create<{Op3BaseType}>(Unsafe.Read<{Op3BaseType}>(_dataTable.inArray3Ptr)) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr, isBroadcast: true); + } + + public void RunMaskingValueScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunMaskingValueScenario)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + + var result = {RetVectorType}.ConditionalSelect( + {RetVectorType}.Equals<{RetBaseType}>(op1, {RetVectorType}<{RetBaseType}>.Zero), + {Isa}.{Method}( + op1, + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr), + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray3Ptr) + ), + {RetVectorType}.Create<{RetBaseType}>(1) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr, mask: 1); + } + + public void RunMaskingZeroScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunMaskingZeroScenario)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + + var result = {RetVectorType}.ConditionalSelect( + {RetVectorType}.Equals<{RetBaseType}>(op1, {RetVectorType}<{RetBaseType}>.Zero), + {Isa}.{Method}( + op1, + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr), + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray3Ptr) + ), + {RetVectorType}<{RetBaseType}>.Zero + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr, mask: 0); + } + + public void RunBroadcastAndMaskingScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBroadcastAndMaskingScenario)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + + var result = {RetVectorType}.ConditionalSelect( + {RetVectorType}.Equals<{RetBaseType}>(op1, {RetVectorType}<{RetBaseType}>.Zero), + {Isa}.{Method}( + op1, + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr), + {Op3VectorType}.Create<{Op3BaseType}>(Unsafe.Read<{Op3BaseType}>(_dataTable.inArray3Ptr)) + ), + {RetVectorType}<{RetBaseType}>.Zero + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr, isBroadcast: true, mask: 0); + } + public void RunClsVarScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario)); @@ -311,7 +397,7 @@ namespace JIT.HardwareIntrinsics.General test.RunStructFldScenario(this); } - private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, {Op2VectorType}<{Op2BaseType}> op2, {Op3VectorType}<{Op3BaseType}> op3, void* result, [CallerMemberName] string method = "") + private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, {Op2VectorType}<{Op2BaseType}> op2, {Op3VectorType}<{Op3BaseType}> op3, void* result, bool isBroadcast = false, {RetBaseType}? mask = null, [CallerMemberName] string method = "") { {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; {Op2BaseType}[] inArray2 = new {Op2BaseType}[Op2ElementCount]; @@ -323,10 +409,10 @@ namespace JIT.HardwareIntrinsics.General Unsafe.WriteUnaligned(ref Unsafe.As<{Op3BaseType}, byte>(ref inArray3[0]), op3); Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); - ValidateResult(inArray1, inArray2, inArray3, outArray, method); + ValidateResult(inArray1, inArray2, inArray3, outArray, isBroadcast, mask, method); } - private void ValidateResult(void* op1, void* op2, void* op3, void* result, [CallerMemberName] string method = "") + private void ValidateResult(void* op1, void* op2, void* op3, void* result, bool isBroadcast = false, {RetBaseType}? mask = null, [CallerMemberName] string method = "") { {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; {Op2BaseType}[] inArray2 = new {Op2BaseType}[Op2ElementCount]; @@ -338,14 +424,19 @@ namespace JIT.HardwareIntrinsics.General Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op3BaseType}, byte>(ref inArray3[0]), ref Unsafe.AsRef(op3), (uint)Unsafe.SizeOf<{Op3VectorType}<{Op3BaseType}>>()); Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); - ValidateResult(inArray1, inArray2, inArray3, outArray, method); + ValidateResult(inArray1, inArray2, inArray3, outArray, isBroadcast, mask, method); } - private void ValidateResult({Op1BaseType}[] firstOp, {Op2BaseType}[] secondOp, {Op3BaseType}[] thirdOp, {RetBaseType}[] result, [CallerMemberName] string method = "") + private void ValidateResult({Op1BaseType}[] firstOp, {Op2BaseType}[] secondOp, {Op3BaseType}[] thirdOp, {RetBaseType}[] result, bool isBroadcast, {RetBaseType}? mask, [CallerMemberName] string method = "") { bool succeeded = true; - if ({ValidateFirstResult}) + if (isBroadcast) + { + thirdOp.AsSpan().Fill(thirdOp[0]); + } + + if ({ValidateFirstResult} && (!mask.HasValue || ((firstOp[0] != default) && result[0] != mask.Value))) { succeeded = false; } @@ -353,7 +444,7 @@ namespace JIT.HardwareIntrinsics.General { for (var i = 1; i < RetElementCount; i++) { - if ({ValidateRemainingResults}) + if ({ValidateRemainingResults} && (!mask.HasValue || ((firstOp[i] != default) && result[i] != mask.Value))) { succeeded = false; break; diff --git a/src/tests/JIT/HardwareIntrinsics/General/Shared/VectorUnaryOpTest.template b/src/tests/JIT/HardwareIntrinsics/General/Shared/VectorUnaryOpTest.template index eb80b1cbde78ed..15f73f76b59ebc 100644 --- a/src/tests/JIT/HardwareIntrinsics/General/Shared/VectorUnaryOpTest.template +++ b/src/tests/JIT/HardwareIntrinsics/General/Shared/VectorUnaryOpTest.template @@ -29,6 +29,18 @@ namespace JIT.HardwareIntrinsics.General // Validates calling via reflection works, using Unsafe.Read test.RunReflectionScenario_UnsafeRead(); + // Validates broadcast functionality works + test.RunBroadcastScenario(); + + // Validates masking with a value functionality works + test.RunMaskingValueScenario(); + + // Validates masking with zero functionality works + test.RunMaskingZeroScenario(); + + // Validates broadcast + masking functionality + test.RunBroadcastAndMaskingScenario(); + // Validates passing a static member works test.RunClsVarScenario(); @@ -196,6 +208,72 @@ namespace JIT.HardwareIntrinsics.General ValidateResult(_dataTable.inArray1Ptr, _dataTable.outArrayPtr); } + public void RunBroadcastScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBroadcastScenario)); + + var result = {Isa}.{Method}( + {Op1VectorType}.Create<{Op1BaseType}>(Unsafe.Read<{Op1BaseType}>(_dataTable.inArray1Ptr)) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.outArrayPtr, isBroadcast: true); + } + + public void RunMaskingValueScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunMaskingValueScenario)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + + var result = {RetVectorType}.ConditionalSelect( + {RetVectorType}.Equals<{RetBaseType}>(op1, {RetVectorType}<{RetBaseType}>.Zero), + {Isa}.{Method}( + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr) + ), + {RetVectorType}.Create<{RetBaseType}>(1) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.outArrayPtr, mask: 1); + } + + public void RunMaskingZeroScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunMaskingZeroScenario)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + + var result = {RetVectorType}.ConditionalSelect( + {RetVectorType}.Equals<{RetBaseType}>(op1, {RetVectorType}<{RetBaseType}>.Zero), + {Isa}.{Method}( + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr) + ), + {RetVectorType}<{RetBaseType}>.Zero + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.outArrayPtr, mask: 0); + } + + public void RunBroadcastAndMaskingScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBroadcastAndMaskingScenario)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + + var result = {RetVectorType}.ConditionalSelect( + {RetVectorType}.Equals<{RetBaseType}>(op1, {RetVectorType}<{RetBaseType}>.Zero), + {Isa}.{Method}( + {Op1VectorType}.Create<{Op1BaseType}>(Unsafe.Read<{Op1BaseType}>(_dataTable.inArray1Ptr)) + ), + {RetVectorType}<{RetBaseType}>.Zero + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.outArrayPtr, isBroadcast: true, mask: 0); + } + public void RunClsVarScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario)); @@ -259,7 +337,7 @@ namespace JIT.HardwareIntrinsics.General test.RunStructFldScenario(this); } - private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, void* result, [CallerMemberName] string method = "") + private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, void* result, bool isBroadcast = false, {RetBaseType}? mask = null, [CallerMemberName] string method = "") { {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; @@ -267,10 +345,10 @@ namespace JIT.HardwareIntrinsics.General Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), op1); Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); - ValidateResult(inArray1, outArray, method); + ValidateResult(inArray1, outArray, isBroadcast, mask, method); } - private void ValidateResult(void* op1, void* result, [CallerMemberName] string method = "") + private void ValidateResult(void* op1, void* result, bool isBroadcast = false, {RetBaseType}? mask = null, [CallerMemberName] string method = "") { {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; @@ -278,14 +356,19 @@ namespace JIT.HardwareIntrinsics.General Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), ref Unsafe.AsRef(op1), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); - ValidateResult(inArray1, outArray, method); + ValidateResult(inArray1, outArray, isBroadcast, mask, method); } - private void ValidateResult({Op1BaseType}[] firstOp, {RetBaseType}[] result, [CallerMemberName] string method = "") + private void ValidateResult({Op1BaseType}[] firstOp, {RetBaseType}[] result, bool isBroadcast, {RetBaseType}? mask, [CallerMemberName] string method = "") { bool succeeded = true; - if ({ValidateFirstResult}) + if (isBroadcast) + { + firstOp.AsSpan().Fill(firstOp[0]); + } + + if ({ValidateFirstResult} && (!mask.HasValue || ((firstOp[0] != default) && result[0] != mask.Value))) { succeeded = false; } @@ -293,7 +376,7 @@ namespace JIT.HardwareIntrinsics.General { for (var i = 1; i < RetElementCount; i++) { - if ({ValidateRemainingResults}) + if ({ValidateRemainingResults} && (!mask.HasValue || ((firstOp[i] != default) && result[i] != mask.Value))) { succeeded = false; break; diff --git a/src/tests/JIT/HardwareIntrinsics/General/Shared/VectorUnaryOperatorTest.template b/src/tests/JIT/HardwareIntrinsics/General/Shared/VectorUnaryOperatorTest.template index 833b83cb90a016..616c2eb2728fd4 100644 --- a/src/tests/JIT/HardwareIntrinsics/General/Shared/VectorUnaryOperatorTest.template +++ b/src/tests/JIT/HardwareIntrinsics/General/Shared/VectorUnaryOperatorTest.template @@ -29,6 +29,18 @@ namespace JIT.HardwareIntrinsics.General // Validates calling via reflection works, using Unsafe.Read test.RunReflectionScenario_UnsafeRead(); + // Validates broadcast functionality works + test.RunBroadcastScenario(); + + // Validates masking with a value functionality works + test.RunMaskingValueScenario(); + + // Validates masking with zero functionality works + test.RunMaskingZeroScenario(); + + // Validates broadcast + masking functionality + test.RunBroadcastAndMaskingScenario(); + // Validates passing a static member works test.RunClsVarScenario(); @@ -179,6 +191,64 @@ namespace JIT.HardwareIntrinsics.General ValidateResult(_dataTable.inArray1Ptr, _dataTable.outArrayPtr); } + public void RunBroadcastScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBroadcastScenario)); + + var result = {Opcode}{Op1VectorType}.Create<{Op1BaseType}>(Unsafe.Read<{Op1BaseType}>(_dataTable.inArray1Ptr)); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.outArrayPtr, isBroadcast: true); + } + + public void RunMaskingValueScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunMaskingValueScenario)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + + var result = {RetVectorType}.ConditionalSelect( + {RetVectorType}.Equals<{RetBaseType}>(op1, {RetVectorType}<{RetBaseType}>.Zero), + {Opcode}Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + {RetVectorType}.Create<{RetBaseType}>(1) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.outArrayPtr, mask: 1); + } + + public void RunMaskingZeroScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunMaskingZeroScenario)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + + var result = {RetVectorType}.ConditionalSelect( + {RetVectorType}.Equals<{RetBaseType}>(op1, {RetVectorType}<{RetBaseType}>.Zero), + {Opcode}Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + {RetVectorType}<{RetBaseType}>.Zero + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.outArrayPtr, mask: 0); + } + + public void RunBroadcastAndMaskingScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBroadcastAndMaskingScenario)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + + var result = {RetVectorType}.ConditionalSelect( + {RetVectorType}.Equals<{RetBaseType}>(op1, {RetVectorType}<{RetBaseType}>.Zero), + {Opcode}{Op1VectorType}.Create<{Op1BaseType}>(Unsafe.Read<{Op1BaseType}>(_dataTable.inArray1Ptr)), + {RetVectorType}<{RetBaseType}>.Zero + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.outArrayPtr, isBroadcast: true, mask: 0); + } + public void RunClsVarScenario() { TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario)); @@ -240,7 +310,7 @@ namespace JIT.HardwareIntrinsics.General test.RunStructFldScenario(this); } - private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, void* result, [CallerMemberName] string method = "") + private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, void* result, bool isBroadcast = false, {RetBaseType}? mask = null, [CallerMemberName] string method = "") { {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; @@ -248,10 +318,10 @@ namespace JIT.HardwareIntrinsics.General Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), op1); Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); - ValidateResult(inArray1, outArray, method); + ValidateResult(inArray1, outArray, isBroadcast, mask, method); } - private void ValidateResult(void* op1, void* result, [CallerMemberName] string method = "") + private void ValidateResult(void* op1, void* result, bool isBroadcast = false, {RetBaseType}? mask = null, [CallerMemberName] string method = "") { {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; @@ -259,14 +329,19 @@ namespace JIT.HardwareIntrinsics.General Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), ref Unsafe.AsRef(op1), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); - ValidateResult(inArray1, outArray, method); + ValidateResult(inArray1, outArray, isBroadcast, mask, method); } - private void ValidateResult({Op1BaseType}[] firstOp, {RetBaseType}[] result, [CallerMemberName] string method = "") + private void ValidateResult({Op1BaseType}[] firstOp, {RetBaseType}[] result, bool isBroadcast, {RetBaseType}? mask, [CallerMemberName] string method = "") { bool succeeded = true; - if ({ValidateFirstResult}) + if (isBroadcast) + { + firstOp.AsSpan().Fill(firstOp[0]); + } + + if ({ValidateFirstResult} && (!mask.HasValue || ((firstOp[0] != default) && result[0] != mask.Value))) { succeeded = false; } @@ -274,7 +349,7 @@ namespace JIT.HardwareIntrinsics.General { for (var i = 1; i < RetElementCount; i++) { - if ({ValidateRemainingResults}) + if ({ValidateRemainingResults} && (!mask.HasValue || ((firstOp[i] != default) && result[i] != mask.Value))) { succeeded = false; break; diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Shared/BroadcastTest.template b/src/tests/JIT/HardwareIntrinsics/X86/Shared/BroadcastTest.template new file mode 100644 index 00000000000000..7c0bf494d44f44 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Shared/BroadcastTest.template @@ -0,0 +1,283 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/****************************************************************************** + * This file is auto-generated from a template file by the GenerateTests.csx * + * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make * + * changes, please update the corresponding template and run according to the * + * directions listed in the file. * + ******************************************************************************/ + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using Xunit; + +namespace JIT.HardwareIntrinsics.X86 +{ + public static partial class Program + { + [Fact] + public static void {Method}{RetBaseType}() + { + var test = new BroadcastTest__{Method}{RetBaseType}(); + + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + if ({LoadIsa}.IsSupported) + { + // Validates basic functionality works, using Load + test.RunBasicScenario_Load(); + + // Validates basic functionality works, using LoadAligned + test.RunBasicScenario_LoadAligned(); + } + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + } + else + { + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class BroadcastTest__{Method}{RetBaseType} + { + private struct TestStruct + { + public {Op1VectorType}<{Op1BaseType}> _fld1; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + + return testStruct; + } + + public void RunStructFldScenario(BroadcastTest__{Method}{RetBaseType} testClass) + { + var result = {Isa}.{Method}(_fld1); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld1, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = {LargestVectorSize}; + + private static readonly int Op1ElementCount = Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>() / sizeof({Op1BaseType}); + private static readonly int RetElementCount = Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>() / sizeof({RetBaseType}); + + private static {Op1BaseType}[] _data1 = new {Op1BaseType}[Op1ElementCount]; + + private {Op1VectorType}<{Op1BaseType}> _fld1; + + private SimpleUnaryOpTest__DataTable<{RetBaseType}, {Op1BaseType}> _dataTable; + + public BroadcastTest__{Method}{RetBaseType}() + { + Succeeded = true; + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + _dataTable = new SimpleUnaryOpTest__DataTable<{RetBaseType}, {Op1BaseType}>(_data1, new {RetBaseType}[RetElementCount], LargestVectorSize); + } + + public bool IsSupported => {Isa}.IsSupported; + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = {Isa}.{Method}( + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArrayPtr) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr); + } + + public void RunBasicScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); + + var result = {Isa}.{Method}( + {LoadIsa}.Load{Op1VectorType}(({Op1BaseType}*)(_dataTable.inArrayPtr)) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr); + } + + public void RunBasicScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned)); + + var result = {Isa}.{Method}( + {LoadIsa}.LoadAligned{Op1VectorType}(({Op1BaseType}*)(_dataTable.inArrayPtr)) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var result = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { typeof({Op1VectorType}<{Op1BaseType}>) }) + .Invoke(null, new object[] { + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArrayPtr) + }); + + Unsafe.Write(_dataTable.outArrayPtr, ({RetVectorType}<{RetBaseType}>)(result)); + ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArrayPtr); + var result = {Isa}.{Method}(op1); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(op1, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = {Isa}.{Method}(_fld1); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld1, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = {Isa}.{Method}(test._fld1); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld1, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + + private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, void* result, [CallerMemberName] string method = "") + { + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), op1); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray1, outArray, method); + } + + private void ValidateResult(void* op1, void* result, [CallerMemberName] string method = "") + { + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), ref Unsafe.AsRef(op1), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray1, outArray, method); + } + + private void ValidateResult({Op1BaseType}[] firstOp, {RetBaseType}[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + + if ({ValidateFirstResult}) + { + succeeded = false; + } + else + { + for (var i = 1; i < RetElementCount; i++) + { + if ({ValidateRemainingResults}) + { + succeeded = false; + break; + } + } + } + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>({Op1VectorType}<{Op1BaseType}>): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" firstOp: ({string.Join(", ", firstOp)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Shared/ComplexBinOpTest.template b/src/tests/JIT/HardwareIntrinsics/X86/Shared/ComplexBinOpTest.template new file mode 100644 index 00000000000000..9993b82565d1d3 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Shared/ComplexBinOpTest.template @@ -0,0 +1,302 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/****************************************************************************** + * This file is auto-generated from a template file by the GenerateTests.csx * + * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make * + * changes, please update the corresponding template and run according to the * + * directions listed in the file. * + ******************************************************************************/ + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using Xunit; + +namespace JIT.HardwareIntrinsics.X86 +{ + public static partial class Program + { + [Fact] + public static void {Method}{RetBaseType}() + { + var test = new ComplexBinaryOpTest__{Method}{RetBaseType}(); + + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + if ({LoadIsa}.IsSupported) + { + // Validates basic functionality works, using Load + test.RunBasicScenario_Load(); + + // Validates basic functionality works, using LoadAligned + test.RunBasicScenario_LoadAligned(); + } + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + } + else + { + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class ComplexBinaryOpTest__{Method}{RetBaseType} + { + private struct TestStruct + { + public {Op1VectorType}<{Op1BaseType}> _fld1; + public {Op2VectorType}<{Op2BaseType}> _fld2; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref testStruct._fld2), ref Unsafe.As<{Op2BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + + return testStruct; + } + + public void RunStructFldScenario(ComplexBinaryOpTest__{Method}{RetBaseType} testClass) + { + var result = {Isa}.{Method}(_fld1, _fld2); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = {LargestVectorSize}; + + private static readonly int Op1ElementCount = Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>() / sizeof({Op1BaseType}); + private static readonly int Op2ElementCount = Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>() / sizeof({Op2BaseType}); + private static readonly int RetElementCount = Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>() / sizeof({RetBaseType}); + + private static {Op1BaseType}[] _data1 = new {Op1BaseType}[Op1ElementCount]; + private static {Op2BaseType}[] _data2 = new {Op2BaseType}[Op2ElementCount]; + + private {Op1VectorType}<{Op1BaseType}> _fld1; + private {Op2VectorType}<{Op2BaseType}> _fld2; + + private SimpleBinaryOpTest__DataTable<{RetBaseType}, {Op1BaseType}, {Op2BaseType}> _dataTable; + + public ComplexBinaryOpTest__{Method}{RetBaseType}() + { + Succeeded = true; + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _fld2), ref Unsafe.As<{Op2BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = {NextValueOp2}; } + _dataTable = new SimpleBinaryOpTest__DataTable<{RetBaseType}, {Op1BaseType}, {Op2BaseType}>(_data1, _data2, new {RetBaseType}[RetElementCount], LargestVectorSize); + } + + public bool IsSupported => {Isa}.IsSupported; + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = {Isa}.{Method}( + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunBasicScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); + + var result = {Isa}.{Method}( + {LoadIsa}.Load{Op1VectorType}(({Op1BaseType}*)(_dataTable.inArray1Ptr)), + {LoadIsa}.Load{Op2VectorType}(({Op2BaseType}*)(_dataTable.inArray2Ptr)) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunBasicScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned)); + + var result = {Isa}.{Method}( + {LoadIsa}.LoadAligned{Op1VectorType}(({Op1BaseType}*)(_dataTable.inArray1Ptr)), + {LoadIsa}.LoadAligned{Op2VectorType}(({Op2BaseType}*)(_dataTable.inArray2Ptr)) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var result = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { typeof({Op1VectorType}<{Op1BaseType}>), typeof({Op2VectorType}<{Op2BaseType}>) }) + .Invoke(null, new object[] { + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr) + }); + + Unsafe.Write(_dataTable.outArrayPtr, ({RetVectorType}<{RetBaseType}>)(result)); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + var op2 = Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr); + var result = {Isa}.{Method}(op1, op2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(op1, op2, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = {Isa}.{Method}(_fld1, _fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = {Isa}.{Method}(test._fld1, test._fld2); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + + private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, {Op2VectorType}<{Op2BaseType}> op2, void* result, [CallerMemberName] string method = "") + { + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray2 = new {Op2BaseType}[Op2ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), op1); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray2[0]), op2); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray1, inArray2, outArray, method); + } + + private void ValidateResult(void* op1, void* op2, void* result, [CallerMemberName] string method = "") + { + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray2 = new {Op2BaseType}[Op2ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), ref Unsafe.AsRef(op1), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray2[0]), ref Unsafe.AsRef(op2), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray1, inArray2, outArray, method); + } + + private void ValidateResult({Op1BaseType}[] left, {Op2BaseType}[] right, {RetBaseType}[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + + if ({ValidateFirstResult}) + { + succeeded = false; + } + else + { + for (var i = 1; i < RetElementCount; i++) + { + if ({ValidateRemainingResults}) + { + succeeded = false; + break; + } + } + } + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>({Op1VectorType}<{Op1BaseType}>, {Op2VectorType}<{Op2BaseType}>): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" left: ({string.Join(", ", left)})"); + TestLibrary.TestFramework.LogInformation($" right: ({string.Join(", ", right)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Shared/ComplexTernOpTest.template b/src/tests/JIT/HardwareIntrinsics/X86/Shared/ComplexTernOpTest.template new file mode 100644 index 00000000000000..8e8265dd3199c2 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Shared/ComplexTernOpTest.template @@ -0,0 +1,321 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/****************************************************************************** + * This file is auto-generated from a template file by the GenerateTests.csx * + * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make * + * changes, please update the corresponding template and run according to the * + * directions listed in the file. * + ******************************************************************************/ + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using Xunit; + +namespace JIT.HardwareIntrinsics.X86 +{ + public static partial class Program + { + [Fact] + public static void {Method}{RetBaseType}() + { + var test = new ComplexTernaryOpTest__{Method}{RetBaseType}(); + + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + if ({LoadIsa}.IsSupported) + { + // Validates basic functionality works, using Load + test.RunBasicScenario_Load(); + + // Validates basic functionality works, using LoadAligned + test.RunBasicScenario_LoadAligned(); + } + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + } + else + { + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class ComplexTernaryOpTest__{Method}{RetBaseType} + { + private struct TestStruct + { + public {Op1VectorType}<{Op1BaseType}> _fld1; + public {Op2VectorType}<{Op2BaseType}> _fld2; + public {Op2VectorType}<{Op3BaseType}> _fld3; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref testStruct._fld2), ref Unsafe.As<{Op2BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + for (var i = 0; i < Op3ElementCount; i++) { _data3[i] = {NextValueOp3}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op3VectorType}<{Op3BaseType}>, byte>(ref testStruct._fld3), ref Unsafe.As<{Op3BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op3VectorType}<{Op3BaseType}>>()); + + return testStruct; + } + + public void RunStructFldScenario(ComplexTernaryOpTest__{Method}{RetBaseType} testClass) + { + var result = {Isa}.{Method}(_fld1, _fld2, _fld3); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld1, _fld2, _fld3, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = {LargestVectorSize}; + + private static readonly int Op1ElementCount = Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>() / sizeof({Op1BaseType}); + private static readonly int Op2ElementCount = Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>() / sizeof({Op2BaseType}); + private static readonly int Op3ElementCount = Unsafe.SizeOf<{Op3VectorType}<{Op3BaseType}>>() / sizeof({Op3BaseType}); + private static readonly int RetElementCount = Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>() / sizeof({RetBaseType}); + + private static {Op1BaseType}[] _data1 = new {Op1BaseType}[Op1ElementCount]; + private static {Op2BaseType}[] _data2 = new {Op2BaseType}[Op2ElementCount]; + private static {Op3BaseType}[] _data3 = new {Op3BaseType}[Op3ElementCount]; + + private {Op1VectorType}<{Op1BaseType}> _fld1; + private {Op2VectorType}<{Op2BaseType}> _fld2; + private {Op3VectorType}<{Op3BaseType}> _fld3; + + private SimpleTernaryOpTest__DataTable<{RetBaseType}, {Op1BaseType}, {Op2BaseType}, {Op3BaseType}> _dataTable; + + public ComplexTernaryOpTest__{Method}{RetBaseType}() + { + Succeeded = true; + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = {NextValueOp2}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _fld2), ref Unsafe.As<{Op2BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + for (var i = 0; i < Op3ElementCount; i++) { _data3[i] = {NextValueOp3}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op3VectorType}<{Op3BaseType}>, byte>(ref _fld3), ref Unsafe.As<{Op3BaseType}, byte>(ref _data3[0]), (uint)Unsafe.SizeOf<{Op3VectorType}<{Op3BaseType}>>()); + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = {NextValueOp2}; } + for (var i = 0; i < Op3ElementCount; i++) { _data3[i] = {NextValueOp3}; } + _dataTable = new SimpleTernaryOpTest__DataTable<{RetBaseType}, {Op1BaseType}, {Op2BaseType}, {Op3BaseType}>(_data1, _data2, _data3, new {RetBaseType}[RetElementCount], LargestVectorSize); + } + + public bool IsSupported => {Isa}.IsSupported; + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = {Isa}.{Method}( + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr), + Unsafe.Read<{Op3VectorType}<{Op3BaseType}>>(_dataTable.inArray3Ptr) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr); + } + + public void RunBasicScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); + + var result = {Isa}.{Method}( + {LoadIsa}.Load{Op1VectorType}(({Op1BaseType}*)(_dataTable.inArray1Ptr)), + {LoadIsa}.Load{Op2VectorType}(({Op2BaseType}*)(_dataTable.inArray2Ptr)), + {LoadIsa}.Load{Op3VectorType}(({Op3BaseType}*)(_dataTable.inArray3Ptr)) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr); + } + + public void RunBasicScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned)); + + var result = {Isa}.{Method}( + {LoadIsa}.LoadAligned{Op1VectorType}(({Op1BaseType}*)(_dataTable.inArray1Ptr)), + {LoadIsa}.LoadAligned{Op2VectorType}(({Op2BaseType}*)(_dataTable.inArray2Ptr)), + {LoadIsa}.LoadAligned{Op3VectorType}(({Op3BaseType}*)(_dataTable.inArray3Ptr)) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var result = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { typeof({Op1VectorType}<{Op1BaseType}>), typeof({Op2VectorType}<{Op2BaseType}>), typeof({Op3VectorType}<{Op3BaseType}>) }) + .Invoke(null, new object[] { + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr), + Unsafe.Read<{Op3VectorType}<{Op3BaseType}>>(_dataTable.inArray3Ptr) + }); + + Unsafe.Write(_dataTable.outArrayPtr, ({RetVectorType}<{RetBaseType}>)(result)); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + var op2 = Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr); + var op3 = Unsafe.Read<{Op3VectorType}<{Op3BaseType}>>(_dataTable.inArray3Ptr); + var result = {Isa}.{Method}(op1, op2, op3); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(op1, op2, op3, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = {Isa}.{Method}(_fld1, _fld2, _fld3); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld1, _fld2, _fld3, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = {Isa}.{Method}(test._fld1, test._fld2, test._fld3); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld1, test._fld2, test._fld3, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + + private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, {Op2VectorType}<{Op2BaseType}> op2, {Op3VectorType}<{Op3BaseType}> op3, void* result, [CallerMemberName] string method = "") + { + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray2 = new {Op2BaseType}[Op2ElementCount]; + {Op3BaseType}[] inArray3 = new {Op3BaseType}[Op3ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), op1); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray2[0]), op2); + Unsafe.WriteUnaligned(ref Unsafe.As<{Op3BaseType}, byte>(ref inArray3[0]), op3); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray1, inArray2, inArray3, outArray, method); + } + + private void ValidateResult(void* op1, void* op2, void* op3, void* result, [CallerMemberName] string method = "") + { + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {Op2BaseType}[] inArray2 = new {Op2BaseType}[Op2ElementCount]; + {Op3BaseType}[] inArray3 = new {Op3BaseType}[Op3ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), ref Unsafe.AsRef(op1), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray2[0]), ref Unsafe.AsRef(op2), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op3BaseType}, byte>(ref inArray3[0]), ref Unsafe.AsRef(op3), (uint)Unsafe.SizeOf<{Op3VectorType}<{Op3BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray1, inArray2, inArray3, outArray, method); + } + + private void ValidateResult({Op1BaseType}[] firstOp, {Op2BaseType}[] secondOp, {Op3BaseType}[] thirdOp, {RetBaseType}[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + + if ({ValidateFirstResult}) + { + succeeded = false; + } + else + { + for (var i = 1; i < RetElementCount; i++) + { + if ({ValidateRemainingResults}) + { + succeeded = false; + break; + } + } + } + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>({Op1VectorType}<{Op1BaseType}>, {Op2VectorType}<{Op2BaseType}>, {Op3VectorType}<{Op3BaseType}>): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" firstOp: ({string.Join(", ", firstOp)})"); + TestLibrary.TestFramework.LogInformation($"secondOp: ({string.Join(", ", secondOp)})"); + TestLibrary.TestFramework.LogInformation($" thirdOp: ({string.Join(", ", thirdOp)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Shared/ComplexUnOpTest.template b/src/tests/JIT/HardwareIntrinsics/X86/Shared/ComplexUnOpTest.template new file mode 100644 index 00000000000000..f15bbe7dec566a --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/X86/Shared/ComplexUnOpTest.template @@ -0,0 +1,283 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/****************************************************************************** + * This file is auto-generated from a template file by the GenerateTests.csx * + * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make * + * changes, please update the corresponding template and run according to the * + * directions listed in the file. * + ******************************************************************************/ + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using Xunit; + +namespace JIT.HardwareIntrinsics.X86 +{ + public static partial class Program + { + [Fact] + public static void {Method}{RetBaseType}() + { + var test = new ComplexUnaryOpTest__{Method}{RetBaseType}(); + + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.Read + test.RunBasicScenario_UnsafeRead(); + + if ({LoadIsa}.IsSupported) + { + // Validates basic functionality works, using Load + test.RunBasicScenario_Load(); + + // Validates basic functionality works, using LoadAligned + test.RunBasicScenario_LoadAligned(); + } + + // Validates calling via reflection works, using Unsafe.Read + test.RunReflectionScenario_UnsafeRead(); + + // Validates passing a local works, using Unsafe.Read + test.RunLclVarScenario_UnsafeRead(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + } + else + { + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class ComplexUnaryOpTest__{Method}{RetBaseType} + { + private struct TestStruct + { + public {Op1VectorType}<{Op1BaseType}> _fld1; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + + return testStruct; + } + + public void RunStructFldScenario(ComplexUnaryOpTest__{Method}{RetBaseType} testClass) + { + var result = {Isa}.{Method}(_fld1); + + Unsafe.Write(testClass._dataTable.outArrayPtr, result); + testClass.ValidateResult(_fld1, testClass._dataTable.outArrayPtr); + } + } + + private static readonly int LargestVectorSize = {LargestVectorSize}; + + private static readonly int Op1ElementCount = Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>() / sizeof({Op1BaseType}); + private static readonly int RetElementCount = Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>() / sizeof({RetBaseType}); + + private static {Op1BaseType}[] _data1 = new {Op1BaseType}[Op1ElementCount]; + + private {Op1VectorType}<{Op1BaseType}> _fld1; + + private SimpleUnaryOpTest__DataTable<{RetBaseType}, {Op1BaseType}> _dataTable; + + public ComplexUnaryOpTest__{Method}{RetBaseType}() + { + Succeeded = true; + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + + for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } + _dataTable = new SimpleUnaryOpTest__DataTable<{RetBaseType}, {Op1BaseType}>(_data1, new {RetBaseType}[RetElementCount], LargestVectorSize); + } + + public bool IsSupported => {Isa}.IsSupported; + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = {Isa}.{Method}( + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArrayPtr) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr); + } + + public void RunBasicScenario_Load() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); + + var result = {Isa}.{Method}( + {LoadIsa}.Load{Op1VectorType}(({Op1BaseType}*)(_dataTable.inArrayPtr)) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr); + } + + public void RunBasicScenario_LoadAligned() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned)); + + var result = {Isa}.{Method}( + {LoadIsa}.LoadAligned{Op1VectorType}(({Op1BaseType}*)(_dataTable.inArrayPtr)) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var result = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { typeof({Op1VectorType}<{Op1BaseType}>) }) + .Invoke(null, new object[] { + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArrayPtr) + }); + + Unsafe.Write(_dataTable.outArrayPtr, ({RetVectorType}<{RetBaseType}>)(result)); + ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArrayPtr); + var result = {Isa}.{Method}(op1); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(op1, _dataTable.outArrayPtr); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = {Isa}.{Method}(_fld1); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_fld1, _dataTable.outArrayPtr); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = {Isa}.{Method}(test._fld1); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(test._fld1, _dataTable.outArrayPtr); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + + private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, void* result, [CallerMemberName] string method = "") + { + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), op1); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray1, outArray, method); + } + + private void ValidateResult(void* op1, void* result, [CallerMemberName] string method = "") + { + {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; + {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; + + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), ref Unsafe.AsRef(op1), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); + Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); + + ValidateResult(inArray1, outArray, method); + } + + private void ValidateResult({Op1BaseType}[] firstOp, {RetBaseType}[] result, [CallerMemberName] string method = "") + { + bool succeeded = true; + + if ({ValidateFirstResult}) + { + succeeded = false; + } + else + { + for (var i = 1; i < RetElementCount; i++) + { + if ({ValidateRemainingResults}) + { + succeeded = false; + break; + } + } + } + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>({Op1VectorType}<{Op1BaseType}>): {method} failed:"); + TestLibrary.TestFramework.LogInformation($" firstOp: ({string.Join(", ", firstOp)})"); + TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Shared/_BinaryOpTestTemplate.template b/src/tests/JIT/HardwareIntrinsics/X86/Shared/_BinaryOpTestTemplate.template index 2335fdc58658fb..b268a4c1d97630 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/Shared/_BinaryOpTestTemplate.template +++ b/src/tests/JIT/HardwareIntrinsics/X86/Shared/_BinaryOpTestTemplate.template @@ -41,6 +41,18 @@ namespace JIT.HardwareIntrinsics.X86 // Validates calling via reflection works, using Unsafe.Read test.RunReflectionScenario_UnsafeRead(); + // Validates broadcast functionality works + test.RunBroadcastScenario(); + + // Validates masking with a value functionality works + test.RunMaskingValueScenario(); + + // Validates masking with zero functionality works + test.RunMaskingZeroScenario(); + + // Validates broadcast + masking functionality + test.RunBroadcastAndMaskingScenario(); + // Validates passing a local works, using Unsafe.Read test.RunLclVarScenario_UnsafeRead(); @@ -179,6 +191,76 @@ namespace JIT.HardwareIntrinsics.X86 ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); } + public void RunBroadcastScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBroadcastScenario)); + + var result = {Isa}.{Method}( + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + {Op2VectorType}.Create<{Op2BaseType}>(Unsafe.Read<{Op2BaseType}>(_dataTable.inArray2Ptr)) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr, isBroadcast: true); + } + + public void RunMaskingValueScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunMaskingValueScenario)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + + var result = {RetVectorType}.ConditionalSelect( + {RetVectorType}.Equals<{RetBaseType}>(op1.As{RetBaseType}(), {RetVectorType}<{RetBaseType}>.Zero), + {Isa}.{Method}( + op1, + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr) + ), + {RetVectorType}.Create<{RetBaseType}>(1) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr, mask: 1); + } + + public void RunMaskingZeroScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunMaskingZeroScenario)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + + var result = {RetVectorType}.ConditionalSelect( + {RetVectorType}.Equals<{RetBaseType}>(op1.As{RetBaseType}(), {RetVectorType}<{RetBaseType}>.Zero), + {Isa}.{Method}( + op1, + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr) + ), + {RetVectorType}<{RetBaseType}>.Zero + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr, mask: 0); + } + + public void RunBroadcastAndMaskingScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBroadcastAndMaskingScenario)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + + var result = {RetVectorType}.ConditionalSelect( + {RetVectorType}.Equals<{RetBaseType}>(op1.As{RetBaseType}(), {RetVectorType}<{RetBaseType}>.Zero), + {Isa}.{Method}( + op1, + {Op2VectorType}.Create<{Op2BaseType}>(Unsafe.Read<{Op2BaseType}>(_dataTable.inArray2Ptr)) + ), + {RetVectorType}<{RetBaseType}>.Zero + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr, isBroadcast: true, mask: 0); + } + public void RunLclVarScenario_UnsafeRead() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); @@ -241,7 +323,7 @@ namespace JIT.HardwareIntrinsics.X86 } } - private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, {Op2VectorType}<{Op2BaseType}> op2, void* result, [CallerMemberName] string method = "") + private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, {Op2VectorType}<{Op2BaseType}> op2, void* result, bool isBroadcast = false, {RetBaseType}? mask = null, [CallerMemberName] string method = "") { {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; {Op2BaseType}[] inArray2 = new {Op2BaseType}[Op2ElementCount]; @@ -251,10 +333,10 @@ namespace JIT.HardwareIntrinsics.X86 Unsafe.WriteUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray2[0]), op2); Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); - ValidateResult(inArray1, inArray2, outArray, method); + ValidateResult(inArray1, inArray2, outArray, isBroadcast, mask, method); } - private void ValidateResult(void* op1, void* op2, void* result, [CallerMemberName] string method = "") + private void ValidateResult(void* op1, void* op2, void* result, bool isBroadcast = false, {RetBaseType}? mask = null, [CallerMemberName] string method = "") { {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; {Op2BaseType}[] inArray2 = new {Op2BaseType}[Op2ElementCount]; @@ -264,13 +346,18 @@ namespace JIT.HardwareIntrinsics.X86 Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray2[0]), ref Unsafe.AsRef(op2), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>()); Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); - ValidateResult(inArray1, inArray2, outArray, method); + ValidateResult(inArray1, inArray2, outArray, isBroadcast, mask, method); } - private void ValidateResult({Op1BaseType}[] left, {Op2BaseType}[] right, {RetBaseType}[] result, [CallerMemberName] string method = "") + private void ValidateResult({Op1BaseType}[] left, {Op2BaseType}[] right, {RetBaseType}[] result, bool isBroadcast, {RetBaseType}? mask, [CallerMemberName] string method = "") { bool succeeded = true; + if (isBroadcast) + { + right.AsSpan().Fill(right[0]); + } + {TemplateValidationLogic} if (!succeeded) diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Shared/_TernaryOpTestTemplate.template b/src/tests/JIT/HardwareIntrinsics/X86/Shared/_TernaryOpTestTemplate.template index 5cd0ab652ea088..cd9caf6b7072b5 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/Shared/_TernaryOpTestTemplate.template +++ b/src/tests/JIT/HardwareIntrinsics/X86/Shared/_TernaryOpTestTemplate.template @@ -41,6 +41,18 @@ namespace JIT.HardwareIntrinsics.X86 // Validates calling via reflection works, using Unsafe.Read test.RunReflectionScenario_UnsafeRead(); + // Validates broadcast functionality works + test.RunBroadcastScenario(); + + // Validates masking with a value functionality works + test.RunMaskingValueScenario(); + + // Validates masking with zero functionality works + test.RunMaskingZeroScenario(); + + // Validates broadcast + masking functionality + test.RunBroadcastAndMaskingScenario(); + // Validates passing a local works, using Unsafe.Read test.RunLclVarScenario_UnsafeRead(); @@ -192,6 +204,80 @@ namespace JIT.HardwareIntrinsics.X86 ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr); } + public void RunBroadcastScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBroadcastScenario)); + + var result = {Isa}.{Method}( + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr), + {Op3VectorType}.Create<{Op3BaseType}>(Unsafe.Read<{Op3BaseType}>(_dataTable.inArray3Ptr)) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr, isBroadcast: true); + } + + public void RunMaskingValueScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunMaskingValueScenario)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + + var result = {RetVectorType}.ConditionalSelect( + {RetVectorType}.Equals<{RetBaseType}>(op1.As{RetBaseType}(), {RetVectorType}<{RetBaseType}>.Zero), + {Isa}.{Method}( + op1, + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr), + Unsafe.Read<{Op3VectorType}<{Op3BaseType}>>(_dataTable.inArray3Ptr) + ), + {RetVectorType}.Create<{RetBaseType}>(1) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr, mask: 1); + } + + public void RunMaskingZeroScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunMaskingZeroScenario)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + + var result = {RetVectorType}.ConditionalSelect( + {RetVectorType}.Equals<{RetBaseType}>(op1.As{RetBaseType}(), {RetVectorType}<{RetBaseType}>.Zero), + {Isa}.{Method}( + op1, + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr), + Unsafe.Read<{Op3VectorType}<{Op3BaseType}>>(_dataTable.inArray3Ptr) + ), + {RetVectorType}<{RetBaseType}>.Zero + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr, mask: 0); + } + + public void RunBroadcastAndMaskingScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBroadcastAndMaskingScenario)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr); + + var result = {RetVectorType}.ConditionalSelect( + {RetVectorType}.Equals<{RetBaseType}>(op1.As{RetBaseType}(), {RetVectorType}<{RetBaseType}>.Zero), + {Isa}.{Method}( + op1, + Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr), + {Op3VectorType}.Create<{Op3BaseType}>(Unsafe.Read<{Op3BaseType}>(_dataTable.inArray3Ptr)) + ), + {RetVectorType}<{RetBaseType}>.Zero + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr, isBroadcast: true, mask: 0); + } + public void RunLclVarScenario_UnsafeRead() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); @@ -255,7 +341,7 @@ namespace JIT.HardwareIntrinsics.X86 } } - private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, {Op2VectorType}<{Op2BaseType}> op2, {Op3VectorType}<{Op3BaseType}> op3, void* result, [CallerMemberName] string method = "") + private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, {Op2VectorType}<{Op2BaseType}> op2, {Op3VectorType}<{Op3BaseType}> op3, void* result, bool isBroadcast = false, {RetBaseType}? mask = null, [CallerMemberName] string method = "") { {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; {Op2BaseType}[] inArray2 = new {Op2BaseType}[Op2ElementCount]; @@ -267,10 +353,10 @@ namespace JIT.HardwareIntrinsics.X86 Unsafe.WriteUnaligned(ref Unsafe.As<{Op3BaseType}, byte>(ref inArray3[0]), op3); Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); - ValidateResult(inArray1, inArray2, inArray3, outArray, method); + ValidateResult(inArray1, inArray2, inArray3, outArray, isBroadcast, mask, method); } - private void ValidateResult(void* op1, void* op2, void* op3, void* result, [CallerMemberName] string method = "") + private void ValidateResult(void* op1, void* op2, void* op3, void* result, bool isBroadcast = false, {RetBaseType}? mask = null, [CallerMemberName] string method = "") { {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; {Op2BaseType}[] inArray2 = new {Op2BaseType}[Op2ElementCount]; @@ -282,13 +368,18 @@ namespace JIT.HardwareIntrinsics.X86 Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op3BaseType}, byte>(ref inArray3[0]), ref Unsafe.AsRef(op3), (uint)Unsafe.SizeOf<{Op3VectorType}<{Op3BaseType}>>()); Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); - ValidateResult(inArray1, inArray2, inArray3, outArray, method); + ValidateResult(inArray1, inArray2, inArray3, outArray, isBroadcast, mask, method); } - private void ValidateResult({Op1BaseType}[] firstOp, {Op2BaseType}[] secondOp, {Op3BaseType}[] thirdOp, {RetBaseType}[] result, [CallerMemberName] string method = "") + private void ValidateResult({Op1BaseType}[] firstOp, {Op2BaseType}[] secondOp, {Op3BaseType}[] thirdOp, {RetBaseType}[] result, bool isBroadcast, {RetBaseType}? mask, [CallerMemberName] string method = "") { bool succeeded = true; + if (isBroadcast) + { + thirdOp.AsSpan().Fill(thirdOp[0]); + } + {TemplateValidationLogic} if (!succeeded) diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Shared/_UnaryOpTestTemplate.template b/src/tests/JIT/HardwareIntrinsics/X86/Shared/_UnaryOpTestTemplate.template index 308bb33e72fdf3..04f0f5e05f9adb 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/Shared/_UnaryOpTestTemplate.template +++ b/src/tests/JIT/HardwareIntrinsics/X86/Shared/_UnaryOpTestTemplate.template @@ -41,6 +41,18 @@ namespace JIT.HardwareIntrinsics.X86 // Validates calling via reflection works, using Unsafe.Read test.RunReflectionScenario_UnsafeRead(); + // Validates broadcast functionality works + test.RunBroadcastScenario(); + + // Validates masking with a value functionality works + test.RunMaskingValueScenario(); + + // Validates masking with zero functionality works + test.RunMaskingZeroScenario(); + + // Validates broadcast + masking functionality + test.RunBroadcastAndMaskingScenario(); + // Validates passing a local works, using Unsafe.Read test.RunLclVarScenario_UnsafeRead(); @@ -166,6 +178,73 @@ namespace JIT.HardwareIntrinsics.X86 ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr); } + public void RunBroadcastScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBroadcastScenario)); + + var result = {Isa}.{Method}( + {Op1VectorType}.Create<{Op1BaseType}>(Unsafe.Read<{Op1BaseType}>(_dataTable.inArrayPtr)) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr, isBroadcast: true); + } + + public void RunMaskingValueScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunMaskingValueScenario)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArrayPtr); + + var result = {RetVectorType}.ConditionalSelect( + {RetVectorType}.Equals<{RetBaseType}>(op1.As{RetBaseType}(), {RetVectorType}<{RetBaseType}>.Zero), + {Isa}.{Method}( + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArrayPtr) + ), + {RetVectorType}.Create<{RetBaseType}>(1) + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr, mask: 1); + } + + + public void RunMaskingZeroScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunMaskingZeroScenario)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArrayPtr); + + var result = {RetVectorType}.ConditionalSelect( + {RetVectorType}.Equals<{RetBaseType}>(op1.As{RetBaseType}(), {RetVectorType}<{RetBaseType}>.Zero), + {Isa}.{Method}( + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArrayPtr) + ), + {RetVectorType}<{RetBaseType}>.Zero + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr, mask: 0); + } + + public void RunBroadcastAndMaskingScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBroadcastAndMaskingScenario)); + + var op1 = Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArrayPtr); + + var result = {RetVectorType}.ConditionalSelect( + {RetVectorType}.Equals<{RetBaseType}>(op1.As{RetBaseType}(), {RetVectorType}<{RetBaseType}>.Zero), + {Isa}.{Method}( + {Op1VectorType}.Create<{Op1BaseType}>(Unsafe.Read<{Op1BaseType}>(_dataTable.inArrayPtr)) + ), + {RetVectorType}<{RetBaseType}>.Zero + ); + + Unsafe.Write(_dataTable.outArrayPtr, result); + ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr, isBroadcast: true, mask: 0); + } + public void RunLclVarScenario_UnsafeRead() { TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); @@ -227,7 +306,7 @@ namespace JIT.HardwareIntrinsics.X86 } } - private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, void* result, [CallerMemberName] string method = "") + private void ValidateResult({Op1VectorType}<{Op1BaseType}> op1, void* result, bool isBroadcast = false, {RetBaseType}? mask = null, [CallerMemberName] string method = "") { {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; @@ -235,10 +314,10 @@ namespace JIT.HardwareIntrinsics.X86 Unsafe.WriteUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), op1); Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); - ValidateResult(inArray1, outArray, method); + ValidateResult(inArray1, outArray, isBroadcast, mask, method); } - private void ValidateResult(void* op1, void* result, [CallerMemberName] string method = "") + private void ValidateResult(void* op1, void* result, bool isBroadcast = false, {RetBaseType}? mask = null, [CallerMemberName] string method = "") { {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount]; {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount]; @@ -246,13 +325,18 @@ namespace JIT.HardwareIntrinsics.X86 Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), ref Unsafe.AsRef(op1), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>()); - ValidateResult(inArray1, outArray, method); + ValidateResult(inArray1, outArray, isBroadcast, mask, method); } - private void ValidateResult({Op1BaseType}[] firstOp, {RetBaseType}[] result, [CallerMemberName] string method = "") + private void ValidateResult({Op1BaseType}[] firstOp, {RetBaseType}[] result, bool isBroadcast, {RetBaseType}? mask, [CallerMemberName] string method = "") { bool succeeded = true; + if (isBroadcast) + { + firstOp.AsSpan().Fill(firstOp[0]); + } + {TemplateValidationLogic} if (!succeeded) diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Sse2/Sse2_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86/Sse2/Sse2_r.csproj index 6a121d1f87d598..dbe58f761ca48c 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/Sse2/Sse2_r.csproj +++ b/src/tests/JIT/HardwareIntrinsics/X86/Sse2/Sse2_r.csproj @@ -2,6 +2,8 @@ X86_Sse2_r true + + true Embedded diff --git a/src/tests/JIT/HardwareIntrinsics/X86/Sse2/Sse2_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86/Sse2/Sse2_ro.csproj index 9fa0b5774caeeb..0a8a0a8357b6ae 100644 --- a/src/tests/JIT/HardwareIntrinsics/X86/Sse2/Sse2_ro.csproj +++ b/src/tests/JIT/HardwareIntrinsics/X86/Sse2/Sse2_ro.csproj @@ -2,6 +2,8 @@ X86_Sse2_ro true + + true Embedded