From 27a25bd37fa1fe6201c27709b1884e1b328f8beb Mon Sep 17 00:00:00 2001 From: Jan Kotas Date: Wed, 11 Oct 2017 13:04:43 -0700 Subject: [PATCH] Delete !FEATURE_IMPLICIT_TLS (#14398) Linux and Windows arm64 are using the regular C/C++ thread local statics. This change unifies the remaining Windows architectures to be on the same plan. --- clrdefinitions.cmake | 4 - src/ToolBox/SOS/Strike/strike.cpp | 28 +- src/debug/daccess/enummem.cpp | 5 - src/debug/daccess/request.cpp | 4 +- src/debug/di/rspriv.h | 3 +- src/debug/di/rsthread.cpp | 173 +----- src/debug/ee/rcthread.cpp | 5 - src/debug/inc/dbgipcevents.h | 1 - src/inc/clrconfigvalues.h | 1 - src/inc/dacvars.h | 7 +- src/inc/switches.h | 7 - src/inc/tls.h | 57 +- src/jit/ee_il_dll.cpp | 12 - src/utilcode/CMakeLists.txt | 13 - src/utilcode/tls.cpp | 271 --------- src/vm/CMakeLists.txt | 2 - src/vm/amd64/AsmMacros.inc | 30 +- src/vm/amd64/InstantiatingStub.asm | 8 +- src/vm/amd64/JitHelpers_Fast.asm | 59 +- .../amd64/JitHelpers_InlineGetAppDomain.asm | 123 ----- src/vm/amd64/JitHelpers_InlineGetThread.asm | 49 +- src/vm/amd64/JitHelpers_Slow.asm | 481 ---------------- src/vm/amd64/RedirectedHandledJITCase.asm | 3 +- src/vm/amd64/TlsGetters.asm | 120 ---- src/vm/amd64/UMThunkStub.asm | 8 +- src/vm/amd64/asmconstants.h | 17 +- src/vm/amd64/cgencpu.h | 19 +- src/vm/appdomain.cpp | 4 - src/vm/arm/asmconstants.h | 3 - src/vm/arm/asmhelpers.asm | 2 - src/vm/arm/cgencpu.h | 17 +- src/vm/arm/patchedcode.S | 16 - src/vm/arm/patchedcode.asm | 515 ------------------ src/vm/arm/stubs.cpp | 292 ++-------- src/vm/arm64/cgencpu.h | 10 +- src/vm/arm64/stubs.cpp | 39 -- src/vm/ceeload.cpp | 4 - src/vm/ceemain.cpp | 10 - src/vm/ceemain.h | 8 - src/vm/corhost.cpp | 48 -- src/vm/crossgencompile.cpp | 21 - src/vm/eedbginterface.h | 1 - src/vm/eedbginterfaceimpl.cpp | 11 +- src/vm/eedbginterfaceimpl.h | 1 - src/vm/i386/asmconstants.h | 3 - src/vm/i386/asmhelpers.asm | 63 --- src/vm/i386/cgencpu.h | 15 +- src/vm/i386/cgenx86.cpp | 2 +- src/vm/i386/jithelp.asm | 51 +- src/vm/i386/jitinterfacex86.cpp | 47 +- src/vm/i386/stublinkerx86.cpp | 229 +------- src/vm/i386/stublinkerx86.h | 8 +- src/vm/jitinterface.cpp | 13 - src/vm/jitinterfacegen.cpp | 183 +------ src/vm/threads.cpp | 356 +----------- src/vm/threads.h | 32 -- src/vm/threads.inl | 11 - src/vm/vars.cpp | 5 +- src/vm/vars.hpp | 4 +- 59 files changed, 177 insertions(+), 3357 deletions(-) delete mode 100644 src/utilcode/tls.cpp delete mode 100644 src/vm/amd64/JitHelpers_InlineGetAppDomain.asm delete mode 100644 src/vm/amd64/TlsGetters.asm diff --git a/clrdefinitions.cmake b/clrdefinitions.cmake index 18c93c377e09..12b074a80f58 100644 --- a/clrdefinitions.cmake +++ b/clrdefinitions.cmake @@ -142,10 +142,6 @@ add_definitions(-DFEATURE_ICASTABLE) if (WIN32 AND (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_I386)) add_definitions(-DFEATURE_INTEROP_DEBUGGING) endif (WIN32 AND (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_I386)) -if (CLR_CMAKE_PLATFORM_UNIX OR CLR_CMAKE_TARGET_ARCH_ARM64) - add_definitions(-DFEATURE_IMPLICIT_TLS) - set(FEATURE_IMPLICIT_TLS 1) -endif(CLR_CMAKE_PLATFORM_UNIX OR CLR_CMAKE_TARGET_ARCH_ARM64) if(FEATURE_INTERPRETER) add_definitions(-DFEATURE_INTERPRETER) endif(FEATURE_INTERPRETER) diff --git a/src/ToolBox/SOS/Strike/strike.cpp b/src/ToolBox/SOS/Strike/strike.cpp index 338e70dc9a5d..def0af31e4bf 100644 --- a/src/ToolBox/SOS/Strike/strike.cpp +++ b/src/ToolBox/SOS/Strike/strike.cpp @@ -5778,7 +5778,6 @@ HRESULT PrintSpecialThreads() TADDR CLRTLSDataAddr = 0; -#ifdef FEATURE_IMPLICIT_TLS TADDR tlsArrayAddr = NULL; if (!SafeReadMemory (TO_TADDR(cdaTeb) + WINNT_OFFSETOF__TEB__ThreadLocalStoragePointer , &tlsArrayAddr, sizeof (void**), NULL)) { @@ -5788,36 +5787,13 @@ HRESULT PrintSpecialThreads() TADDR moduleTlsDataAddr = 0; - if (!SafeReadMemory (tlsArrayAddr + sizeof (void*) * dwCLRTLSDataIndex, &moduleTlsDataAddr, sizeof (void**), NULL)) + if (!SafeReadMemory (tlsArrayAddr + sizeof (void*) * (dwCLRTLSDataIndex & 0xFFFF), &moduleTlsDataAddr, sizeof (void**), NULL)) { PrintLn("Failed to get Tls expansion slots for thread ", ThreadID(SysId)); continue; } - CLRTLSDataAddr = moduleTlsDataAddr + OFFSETOF__TLS__tls_EETlsData; -#else - if (dwCLRTLSDataIndex < TLS_MINIMUM_AVAILABLE) - { - CLRTLSDataAddr = TO_TADDR(cdaTeb) + offsetof(TEB, TlsSlots) + sizeof (void*) * dwCLRTLSDataIndex; - } - else - { - //if TLS index is bigger than TLS_MINIMUM_AVAILABLE, the TLS slot lives in ExpansionSlots - TADDR TebExpsionAddr = NULL; - if (!SafeReadMemory (TO_TADDR(cdaTeb) + offsetof(TEB, TlsExpansionSlots) , &TebExpsionAddr, sizeof (void**), NULL)) - { - PrintLn("Failed to get Tls expansion slots for thread ", ThreadID(SysId)); - continue; - } - - if (TebExpsionAddr == NULL) - { - continue; - } - - CLRTLSDataAddr = TebExpsionAddr + sizeof (void*) * (dwCLRTLSDataIndex - TLS_MINIMUM_AVAILABLE); - } -#endif // FEATURE_IMPLICIT_TLS + CLRTLSDataAddr = moduleTlsDataAddr + ((dwCLRTLSDataIndex & 0x7FFF0000) >> 16) + OFFSETOF__TLS__tls_EETlsData; TADDR CLRTLSData = NULL; if (!SafeReadMemory (CLRTLSDataAddr, &CLRTLSData, sizeof (TADDR), NULL)) diff --git a/src/debug/daccess/enummem.cpp b/src/debug/daccess/enummem.cpp index c1155d9e366d..007af0ba4bb0 100644 --- a/src/debug/daccess/enummem.cpp +++ b/src/debug/daccess/enummem.cpp @@ -256,11 +256,6 @@ HRESULT ClrDataAccess::EnumMemCLRStatic(IN CLRDataEnumMemoryFlags flags) // see synblk.cpp, the pointer is pointed to a static byte[] SyncBlockCache::s_pSyncBlockCache.EnumMem(); -#ifndef FEATURE_IMPLICIT_TLS - ReportMem(m_globalBase + g_dacGlobals.dac__gThreadTLSIndex, sizeof(DWORD)); - ReportMem(m_globalBase + g_dacGlobals.dac__gAppDomainTLSIndex, sizeof(DWORD)); -#endif - ReportMem(m_globalBase + g_dacGlobals.dac__g_FCDynamicallyAssignedImplementations, sizeof(TADDR)*ECall::NUM_DYNAMICALLY_ASSIGNED_FCALL_IMPLEMENTATIONS); diff --git a/src/debug/daccess/request.cpp b/src/debug/daccess/request.cpp index 08136f39e138..6d6e3589be1e 100644 --- a/src/debug/daccess/request.cpp +++ b/src/debug/daccess/request.cpp @@ -4041,14 +4041,14 @@ HRESULT ClrDataAccess::GetTLSIndex(ULONG *pIndex) return E_INVALIDARG; SOSDacEnter(); - if (CExecutionEngine::GetTlsIndex() == TLS_OUT_OF_INDEXES) + if (g_TlsIndex == TLS_OUT_OF_INDEXES) { *pIndex = 0; hr = S_FALSE; } else { - *pIndex = CExecutionEngine::GetTlsIndex(); + *pIndex = g_TlsIndex; } SOSDacLeave(); diff --git a/src/debug/di/rspriv.h b/src/debug/di/rspriv.h index e0489c53ad7f..de821f26a65d 100644 --- a/src/debug/di/rspriv.h +++ b/src/debug/di/rspriv.h @@ -10609,11 +10609,10 @@ class CordbUnmanagedThread : public CordbBase void CacheEEDebuggerWord(); HRESULT SetEEThreadValue(REMOTE_PTR EETlsValue); -#ifdef FEATURE_IMPLICIT_TLS + DWORD_PTR GetEEThreadValue(); REMOTE_PTR GetClrModuleTlsDataAddress(); REMOTE_PTR GetEETlsDataBlock(); -#endif public: HRESULT GetEEDebuggerWord(REMOTE_PTR *pValue); diff --git a/src/debug/di/rsthread.cpp b/src/debug/di/rsthread.cpp index aa85de83d87b..cd5e62932ae5 100644 --- a/src/debug/di/rsthread.cpp +++ b/src/debug/di/rsthread.cpp @@ -2996,12 +2996,7 @@ HRESULT CordbUnmanagedThread::RestoreLeafSeh() // return value == 0 (assumed default, *pRead = false REMOTE_PTR CordbUnmanagedThread::GetPreDefTlsSlot(SIZE_T slot, bool * pRead) { -#ifdef FEATURE_IMPLICIT_TLS REMOTE_PTR pBlock = (REMOTE_PTR) GetEETlsDataBlock(); -#else - DebuggerIPCRuntimeOffsets *pRO = &(GetProcess()->m_runtimeOffsets); - REMOTE_PTR pBlock = (REMOTE_PTR) GetTlsSlot(pRO->m_TLSIndexOfPredefs); -#endif REMOTE_PTR data = 0; @@ -3036,164 +3031,6 @@ REMOTE_PTR CordbUnmanagedThread::GetPreDefTlsSlot(SIZE_T slot, bool * pRead) return 0; } -#ifndef FEATURE_IMPLICIT_TLS - -// Read the contents from a LS threads's TLS slot. -DWORD_PTR CordbUnmanagedThread::GetTlsSlot(SIZE_T slot) -{ - DWORD_PTR ret = 0; - - // Compute the address of the necessary TLS value. - if (FAILED(LoadTLSArrayPtr())) - { - return NULL; - } - - - void * pBase = NULL; - SIZE_T slotAdjusted = slot; - - if (slot < TLS_MINIMUM_AVAILABLE) - { - pBase = m_pTLSArray; - } - else if (slot < TLS_MINIMUM_AVAILABLE + TLS_EXPANSION_SLOTS) - { - pBase = m_pTLSExtendedArray; - slotAdjusted -= TLS_MINIMUM_AVAILABLE; - - // Expansion slot is lazily allocated. If we're trying to read from it, but hasn't been allocated, - // then the TLS slot is still the default value, which is 0 (NULL). - if (pBase == NULL) - { - return NULL; - } - } - else - { - // Slot is out of range. Shouldn't happen unless debuggee is corrupted. - _ASSERTE(!"Invalid TLS slot"); - return NULL; - } - - void *pEEThreadTLS = (BYTE*) pBase + (slotAdjusted * sizeof(void*)); - - - // Read the thread's TLS value. - HRESULT hr = GetProcess()->SafeReadStruct(PTR_TO_CORDB_ADDRESS(pEEThreadTLS), &ret); - if (FAILED(hr)) - { - LOG((LF_CORDB, LL_INFO1000, "CUT::GEETV: failed to read TLS value: computed addr=0x%p index=%d, err=%x\n", - pEEThreadTLS, slot, hr)); - - return NULL; - } - - LOG((LF_CORDB, LL_INFO1000000, "CUT::GEETV: EE Thread TLS value is 0x%p for thread 0x%x, slot 0x%x\n", ret, m_id, slot)); - - return ret; -} - -// This does a WriteProcessMemory to write to the debuggee's TLS slot allotted to EEThread -// -// Arguments: -// EETlsValue - the value to write to the remote TLS slot. -// -// Notes: -// The TLS slot is m_TLSIndex. -// -// This is very brittle because the OS can lazily allocates storage for TLS slots. -// In order to gaurantee the storage is available, it must have been written to by the debuggee. -// For managed threads, that's easy because the Thread* is already written to the slot. -// But for pure native threads where GetThread() == NULL, the storage may not yet be allocated. -// -// The saving grace is that the debuggee's hijack filters will force the TLS to be allocated before it -// sends a flare. -// -// Therefore, this function can only be called: -// 1) on a managed thread -// 2) on a native thread after that thread has been hijacked and sent a flare. -// -// This is brittle reasoning, but so is the rest of interop-debugging. -// -HRESULT CordbUnmanagedThread::SetEEThreadValue(REMOTE_PTR EETlsValue) -{ - FAIL_IF_NEUTERED(this); - - // Compute the address of the necessary TLS value. - DebuggerIPCRuntimeOffsets *pRO = &(GetProcess()->m_runtimeOffsets); - - // Compute the address of the necessary TLS value. - HRESULT hr = LoadTLSArrayPtr(); - if (FAILED(hr)) - { - return hr; - } - - - DWORD slot = (DWORD) pRO->m_TLSIndex; - - void * pBase = NULL; - SIZE_T slotAdjusted = slot; - if (slot < TLS_MINIMUM_AVAILABLE) - { - pBase = m_pTLSArray; - } - else if (slot < TLS_MINIMUM_AVAILABLE+TLS_EXPANSION_SLOTS) - { - pBase = m_pTLSExtendedArray; - slotAdjusted -= TLS_MINIMUM_AVAILABLE; - - // Expansion slot is lazily allocated. If we're trying to read from it, but hasn't been allocated, - // then the TLS slot is still the default value, which is 0. - if (pBase == NULL) - { - // See reasoning in header for why this should succeed. - _ASSERTE(!"Can't set to expansion slots because they haven't been allocated"); - return E_FAIL; - } - } - else - { - // Slot is out of range. Shouldn't happen unless debuggee is corrupted. - _ASSERTE(!"Invalid TLS slot"); - return E_INVALIDARG; - } - - - void *pEEThreadTLS = (BYTE*) pBase + (slotAdjusted * sizeof(void*)); - - - // Write the thread's TLS value. - hr = GetProcess()->SafeWriteStruct(PTR_TO_CORDB_ADDRESS(pEEThreadTLS), &EETlsValue); - - if (FAILED(hr)) - { - LOG((LF_CORDB, LL_INFO1000, "CUT::SEETV: failed to set TLS value: " - "computed addr=0x%p index=%d, err=%x\n", - pEEThreadTLS, pRO->m_TLSIndex, hr)); - - return hr; - } - - LOG((LF_CORDB, LL_INFO1000000, - "CUT::SEETV: EE Thread TLS value is now 0x%p for thread 0x%x\n", - EETlsValue, m_id)); - - return S_OK; -} -#else // FEATURE_IMPLICIT_TLS - -#ifdef DBG_TARGET_X86 -#define WINNT_OFFSETOF__TEB__ThreadLocalStoragePointer 0x2c -#elif defined(DBG_TARGET_AMD64) -#define WINNT_OFFSETOF__TEB__ThreadLocalStoragePointer 0x58 -#elif defined(DBG_TARGET_ARM) -#define WINNT_OFFSETOF__TEB__ThreadLocalStoragePointer 0x2c -#elif defined(DBG_TARGET_ARM64) -#define WINNT_OFFSETOF__TEB__ThreadLocalStoragePointer 0x58 -#endif - // sets the value of gCurrentThreadInfo.m_pThread HRESULT CordbUnmanagedThread::SetEEThreadValue(REMOTE_PTR EETlsValue) { @@ -3275,7 +3112,7 @@ REMOTE_PTR CordbUnmanagedThread::GetClrModuleTlsDataAddress() DWORD slot = (DWORD)(GetProcess()->m_runtimeOffsets.m_TLSIndex); REMOTE_PTR clrModuleTlsDataAddr; - hr = GetProcess()->SafeReadStruct(PTR_TO_CORDB_ADDRESS((BYTE*)tlsArrayAddr + slot * sizeof(void*)), &clrModuleTlsDataAddr); + hr = GetProcess()->SafeReadStruct(PTR_TO_CORDB_ADDRESS((BYTE*)tlsArrayAddr + (slot & 0xFFFF) * sizeof(void*)), &clrModuleTlsDataAddr); if (FAILED(hr)) { return NULL; @@ -3287,7 +3124,7 @@ REMOTE_PTR CordbUnmanagedThread::GetClrModuleTlsDataAddress() return NULL; } - return clrModuleTlsDataAddr; + return (BYTE*) clrModuleTlsDataAddr + ((slot & 0x7FFF0000) >> 16); } // gets the value of gCurrentThreadInfo.m_EETlsData @@ -3312,8 +3149,6 @@ REMOTE_PTR CordbUnmanagedThread::GetEETlsDataBlock() return ret; } -#endif // FEATURE_IMPLICIT_TLS - /* * CacheEEDebuggerWord * @@ -3337,11 +3172,7 @@ void CordbUnmanagedThread::CacheEEDebuggerWord() { LOG((LF_CORDB, LL_INFO1000, "CacheEEDW: Entered\n")); -#ifdef FEATURE_IMPLICIT_TLS REMOTE_PTR value = (REMOTE_PTR)GetEEThreadValue(); -#else - REMOTE_PTR value = (REMOTE_PTR)GetTlsSlot(GetProcess()->m_runtimeOffsets.m_TLSIndex); -#endif if ((((DWORD)value) & 0x1) == 1) { diff --git a/src/debug/ee/rcthread.cpp b/src/debug/ee/rcthread.cpp index d4e707dd06cc..58547411c15a 100644 --- a/src/debug/ee/rcthread.cpp +++ b/src/debug/ee/rcthread.cpp @@ -763,7 +763,6 @@ HRESULT DebuggerRCThread::SetupRuntimeOffsets(DebuggerIPCControlBlock * pDebugge g_pEEInterface->GetRuntimeOffsets(&pDebuggerRuntimeOffsets->m_TLSIndex, &pDebuggerRuntimeOffsets->m_TLSIsSpecialIndex, &pDebuggerRuntimeOffsets->m_TLSCantStopIndex, - &pDebuggerRuntimeOffsets->m_TLSIndexOfPredefs, &pDebuggerRuntimeOffsets->m_EEThreadStateOffset, &pDebuggerRuntimeOffsets->m_EEThreadStateNCOffset, &pDebuggerRuntimeOffsets->m_EEThreadPGCDisabledOffset, @@ -778,10 +777,6 @@ HRESULT DebuggerRCThread::SetupRuntimeOffsets(DebuggerIPCControlBlock * pDebugge &pDebuggerRuntimeOffsets->m_EEFrameNextOffset, &pDebuggerRuntimeOffsets->m_EEIsManagedExceptionStateMask); -#ifndef FEATURE_IMPLICIT_TLS - _ASSERTE((pDebuggerRuntimeOffsets->m_TLSIndexOfPredefs != 0) || !"CExecutionEngine::TlsIndex is not initialized yet"); -#endif - // Remember the struct in the control block. pDebuggerIPCControlBlock->m_pRuntimeOffsets = pDebuggerRuntimeOffsets; diff --git a/src/debug/inc/dbgipcevents.h b/src/debug/inc/dbgipcevents.h index dc900660c315..8a6786a37889 100644 --- a/src/debug/inc/dbgipcevents.h +++ b/src/debug/inc/dbgipcevents.h @@ -133,7 +133,6 @@ struct MSLAYOUT DebuggerIPCRuntimeOffsets SIZE_T m_TLSIndex; // The TLS index the CLR is using to hold Thread objects SIZE_T m_TLSIsSpecialIndex; // The index into the Predef block of the the "IsSpecial" status for a thread. SIZE_T m_TLSCantStopIndex; // The index into the Predef block of the the Can't-Stop count. - SIZE_T m_TLSIndexOfPredefs; // The TLS index of the Predef block. SIZE_T m_EEThreadStateOffset; // Offset of m_state in a Thread SIZE_T m_EEThreadStateNCOffset; // Offset of m_stateNC in a Thread SIZE_T m_EEThreadPGCDisabledOffset; // Offset of the bit for whether PGC is disabled or not in a Thread diff --git a/src/inc/clrconfigvalues.h b/src/inc/clrconfigvalues.h index 7b096b438f7b..4c48a20c8649 100644 --- a/src/inc/clrconfigvalues.h +++ b/src/inc/clrconfigvalues.h @@ -284,7 +284,6 @@ CONFIG_DWORD_INFO_DIRECT_ACCESS(INTERNAL_ConditionalContracts, W("ConditionalCon CONFIG_DWORD_INFO(INTERNAL_ConsistencyCheck, W("ConsistencyCheck"), 0, "") CONFIG_DWORD_INFO_EX(INTERNAL_ContinueOnAssert, W("ContinueOnAssert"), 0, "If set, doesn't break on asserts.", CLRConfig::REGUTIL_default) RETAIL_CONFIG_DWORD_INFO_EX(UNSUPPORTED_disableStackOverflowProbing, W("disableStackOverflowProbing"), 0, "", CLRConfig::FavorConfigFile) -CONFIG_DWORD_INFO(INTERNAL_EnforceEEThreadNotRequiredContracts, W("EnforceEEThreadNotRequiredContracts"), 0, "Indicates whether to enforce EE_THREAD_NOT_REQUIRED contracts (not enforced by default for perf reasons). Only applicable in dbg/chk builds--EE_THREAD_NOT_REQUIRED contracts never enforced in ret builds.") CONFIG_DWORD_INFO_DIRECT_ACCESS(INTERNAL_InjectFatalError, W("InjectFatalError"), "") CONFIG_DWORD_INFO_EX(INTERNAL_InjectFault, W("InjectFault"), 0, "", CLRConfig::REGUTIL_default) CONFIG_DWORD_INFO_DIRECT_ACCESS(INTERNAL_SuppressChecks, W("SuppressChecks"), "") diff --git a/src/inc/dacvars.h b/src/inc/dacvars.h index c5eb2cf99655..f0f156dc62dd 100644 --- a/src/inc/dacvars.h +++ b/src/inc/dacvars.h @@ -127,7 +127,7 @@ DEFINE_DACVAR(ULONG, BOOL, SystemDomain__s_fForceInstrument, SystemDomain::s_fFo DEFINE_DACVAR(ULONG, PTR_SharedDomain, SharedDomain__m_pSharedDomain, SharedDomain::m_pSharedDomain) -DEFINE_DACVAR(ULONG, DWORD, CExecutionEngine__TlsIndex, CExecutionEngine::TlsIndex) +DEFINE_DACVAR(ULONG, DWORD, dac__g_TlsIndex, g_TlsIndex) #if defined(FEATURE_WINDOWSPHONE) DEFINE_DACVAR(ULONG, int, CCLRErrorReportingManager__g_ECustomDumpFlavor, CCLRErrorReportingManager::g_ECustomDumpFlavor) @@ -150,11 +150,6 @@ DEFINE_DACVAR(ULONG, PTR_JITNotification, dac__g_pNotificationTable, ::g_pNotifi DEFINE_DACVAR(ULONG, ULONG32, dac__g_dacNotificationFlags, ::g_dacNotificationFlags) DEFINE_DACVAR(ULONG, PTR_GcNotification, dac__g_pGcNotificationTable, ::g_pGcNotificationTable) -#ifndef FEATURE_IMPLICIT_TLS -DEFINE_DACVAR(ULONG, DWORD, dac__gThreadTLSIndex, ::gThreadTLSIndex) -DEFINE_DACVAR(ULONG, DWORD, dac__gAppDomainTLSIndex, ::gAppDomainTLSIndex) -#endif - DEFINE_DACVAR(ULONG, PTR_EEConfig, dac__g_pConfig, ::g_pConfig) DEFINE_DACVAR(ULONG, MscorlibBinder, dac__g_Mscorlib, ::g_Mscorlib) diff --git a/src/inc/switches.h b/src/inc/switches.h index fae746d853bd..a9accbb2c6b2 100644 --- a/src/inc/switches.h +++ b/src/inc/switches.h @@ -64,13 +64,6 @@ #define GC_STATS #endif - -#if defined(_DEBUG) && !defined(DACCESS_COMPILE) && (defined(_TARGET_X86_) || defined(_TARGET_AMD64_)) -// On x86/x64 Windows debug builds, respect the COMPlus_EnforceEEThreadNotRequiredContracts -// runtime switch. See code:InitThreadManager and code:GetThreadGenericFullCheck -#define ENABLE_GET_THREAD_GENERIC_FULL_CHECK -#endif - #if defined(_TARGET_X86_) || defined(_TARGET_ARM_) #define USE_UPPER_ADDRESS 0 diff --git a/src/inc/tls.h b/src/inc/tls.h index 55f74892bb5f..3e8c9a770d75 100644 --- a/src/inc/tls.h +++ b/src/inc/tls.h @@ -13,20 +13,8 @@ #ifndef __tls_h__ #define __tls_h__ -#ifdef FEATURE_IMPLICIT_TLS -#ifdef _WIN64 -#ifndef _DEBUG -#define OFFSETOF__TLS__tls_ThreadLocalInfo 0x10 -#else // _DEBUG -#define OFFSETOF__TLS__tls_ThreadLocalInfo 0x08 -#endif // _DEBUG -#else // _WIN64 -#define OFFSETOF__TLS__tls_ThreadLocalInfo 0x04 -#endif // _WIN64 - -#define OFFSETOF__TLS__tls_CurrentThread (OFFSETOF__TLS__tls_ThreadLocalInfo+0x0) -#define OFFSETOF__TLS__tls_EETlsData (OFFSETOF__TLS__tls_CurrentThread+2*sizeof(void*)) - +#define OFFSETOF__TLS__tls_CurrentThread (0x0) +#define OFFSETOF__TLS__tls_EETlsData (2*sizeof(void*)) #ifdef _TARGET_WIN64_ #define WINNT_OFFSETOF__TEB__ThreadLocalStoragePointer 0x58 @@ -34,45 +22,4 @@ #define WINNT_OFFSETOF__TEB__ThreadLocalStoragePointer 0x2c #endif -#endif // FEATURE_IMPLICIT_TLS - -// Pointer to a function that retrieves the TLS data for a specific index. -typedef LPVOID (*POPTIMIZEDTLSGETTER)(); - -//--------------------------------------------------------------------------- -// Creates a platform-optimized version of TlsGetValue compiled -// for a particular index. Can return NULL - the caller should substitute -// a non-optimized getter in this case. -//--------------------------------------------------------------------------- -POPTIMIZEDTLSGETTER MakeOptimizedTlsGetter(DWORD tlsIndex, LPVOID pBuffer = NULL, SIZE_T cbBuffer = 0, POPTIMIZEDTLSGETTER pGenericImpl = NULL, BOOL fForceGeneric = FALSE); - - -//--------------------------------------------------------------------------- -// Frees a function created by MakeOptimizedTlsGetter(). -//--------------------------------------------------------------------------- -VOID FreeOptimizedTlsGetter(POPTIMIZEDTLSGETTER pOptimizedTlsGetter); - - - -//--------------------------------------------------------------------------- -// For ASM stub generators that want to inline Thread access for efficiency, -// the Thread manager uses these constants to define how to access the Thread. -//--------------------------------------------------------------------------- -enum TLSACCESSMODE { - TLSACCESS_GENERIC = 1, // Make no platform assumptions: use the API - // TLS - TLSACCESS_WNT = 2, // WinNT-style TLS - TLSACCESS_WNT_HIGH = 3, // WinNT5-style TLS, slot > TLS_MINIMUM_AVAILABLE -}; - - -//--------------------------------------------------------------------------- -// WinNT store the TLS in different places relative to the -// fs:[0]. This api reveals which. Can also return TLSACCESS_GENERIC if -// no info is available about the Thread location (you have to use the TlsGetValue -// api.) This is intended for use by stub generators that want to inline TLS -// access. -//--------------------------------------------------------------------------- -TLSACCESSMODE GetTLSAccessMode(DWORD tlsIndex); - #endif // __tls_h__ diff --git a/src/jit/ee_il_dll.cpp b/src/jit/ee_il_dll.cpp index 1f91ef692c1e..0cfc5656798e 100644 --- a/src/jit/ee_il_dll.cpp +++ b/src/jit/ee_il_dll.cpp @@ -205,9 +205,6 @@ ICorJitCompiler* __stdcall getJit() // Information kept in thread-local storage. This is used in the noway_assert exceptional path. // If you are using it more broadly in retail code, you would need to understand the // performance implications of accessing TLS. -// -// If the JIT is being statically linked, these methods must be implemented by the consumer. -#if !defined(FEATURE_MERGE_JIT_AND_ENGINE) || !defined(FEATURE_IMPLICIT_TLS) __declspec(thread) void* gJitTls = nullptr; @@ -221,15 +218,6 @@ void SetJitTls(void* value) gJitTls = value; } -#else // !defined(FEATURE_MERGE_JIT_AND_ENGINE) || !defined(FEATURE_IMPLICIT_TLS) - -extern "C" { -void* GetJitTls(); -void SetJitTls(void* value); -} - -#endif // // defined(FEATURE_MERGE_JIT_AND_ENGINE) && defined(FEATURE_IMPLICIT_TLS) - #if defined(DEBUG) JitTls::JitTls(ICorJitInfo* jitInfo) : m_compiler(nullptr), m_logEnv(jitInfo) diff --git a/src/utilcode/CMakeLists.txt b/src/utilcode/CMakeLists.txt index dfe830d5c09f..9c61314848b9 100644 --- a/src/utilcode/CMakeLists.txt +++ b/src/utilcode/CMakeLists.txt @@ -81,11 +81,6 @@ if(WIN32) ) endif() - if(NOT DEFINED FEATURE_IMPLICIT_TLS) - list(APPEND UTILCODE_SOURCES - tls.cpp - ) - endif(NOT DEFINED FEATURE_IMPLICIT_TLS) endif(WIN32) set(UTILCODE_SOURCES @@ -93,14 +88,6 @@ set(UTILCODE_SOURCES perflog.cpp ) -if(WIN32) - if(NOT DEFINED FEATURE_IMPLICIT_TLS) - list(APPEND UTILCODE_SOURCES - tls.cpp - ) - endif(NOT DEFINED FEATURE_IMPLICIT_TLS) -endif() - set(UTILCODE_DAC_SOURCES ${UTILCODE_COMMON_SOURCES} hostimpl.cpp diff --git a/src/utilcode/tls.cpp b/src/utilcode/tls.cpp deleted file mode 100644 index cb6934fd0b43..000000000000 --- a/src/utilcode/tls.cpp +++ /dev/null @@ -1,271 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. -/* TLS.CPP: - * - - * - * Encapsulates TLS access for maximum performance. - * - */ - -#include "stdafx.h" - -#include "unsafe.h" -#include "tls.h" -#include "contract.h" -#include "corerror.h" -#include "ex.h" -#include "clrhost.h" - -#ifndef SELF_NO_HOST -#include "clrconfig.h" -#endif - -#include "clrnt.h" - -#ifndef SELF_NO_HOST - -//--------------------------------------------------------------------------- -// Win95 and WinNT store the TLS in different places relative to the -// fs:[0]. This api reveals which. Can also return TLSACCESS_GENERIC if -// no info is available about the Thread location (you have to use the TlsGetValue -// api.) This is intended for use by stub generators that want to inline TLS -// access. -//--------------------------------------------------------------------------- -TLSACCESSMODE GetTLSAccessMode(DWORD tlsIndex) -{ - // Static contracts because this is used by contract infrastructure - STATIC_CONTRACT_NOTHROW; - STATIC_CONTRACT_GC_NOTRIGGER; - - TLSACCESSMODE tlsAccessMode = TLSACCESS_GENERIC; - -#ifdef _DEBUG - // Debug builds allow user to throw a switch to force use of the generic - // (non-optimized) Thread/AppDomain getters. Even if the user doesn't throw - // the switch, force tests to go down the generic getter code path about 1% of the - // time so it's exercised a couple dozen times during each devbvt run. - if ((CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_UseGenericTlsGetters) != 0) || DbgRandomOnExe(.01)) - return TLSACCESS_GENERIC; -#endif - - if (tlsIndex < TLS_MINIMUM_AVAILABLE) - { - tlsAccessMode = TLSACCESS_WNT; - } - else - if (tlsIndex < (TLS_MINIMUM_AVAILABLE + TLS_EXPANSION_SLOTS)) - { - // Expansion slots are lazily created at the first call to - // TlsGetValue on a thread, and the code we generate - // assumes that the expansion slots will exist. - // - // On newer flavors of NT we could use the vectored - // exception handler to take the AV, call TlsGetValue, and - // resume execution at the start of the getter. - tlsAccessMode = TLSACCESS_GENERIC;//TLSACCESS_WNT_HIGH; - } - else - { - // - // If the app verifier is enabled, TLS indices - // are faked to help detect invalid handle use. - // - } - - return tlsAccessMode; -} - -//--------------------------------------------------------------------------- -// Creates a platform-optimized version of TlsGetValue compiled -// for a particular index. Can return NULL. -//--------------------------------------------------------------------------- -// A target for the optimized getter can be passed in, this is -// useful so that code can avoid an indirect call for the GetThread -// and GetAppDomain calls for instance. If NULL is passed then -// we will allocate from the executeable heap. -POPTIMIZEDTLSGETTER MakeOptimizedTlsGetter(DWORD tlsIndex, LPVOID pBuffer, SIZE_T cbBuffer, POPTIMIZEDTLSGETTER pGenericImpl, BOOL fForceGeneric) -{ - // Static contracts because this is used by contract infrastructure - STATIC_CONTRACT_NOTHROW; - STATIC_CONTRACT_GC_NOTRIGGER; - - ARM_ONLY(pBuffer = ThumbCodeToDataPointer(pBuffer)); - - // Buffer that should be big enough to encode the TLS getter on any reasonable platform - TADDR patch[4 INDEBUG(+4 /* last error trashing */)]; - - PBYTE pPatch = (PBYTE)&patch; - - TLSACCESSMODE mode = fForceGeneric ? TLSACCESS_GENERIC : GetTLSAccessMode(tlsIndex); - -#if defined(_DEBUG) - if (mode != TLSACCESS_GENERIC) - { - // - // Trash last error in debug builds - // - -#ifdef _TARGET_X86_ - *((DWORD*) (pPatch + 0)) = 0x05c764; // mov dword ptr fs:[offsetof(TEB, LastErrorValue)], LAST_ERROR_TRASH_VALUE - *((DWORD*) (pPatch + 3)) = offsetof(TEB, LastErrorValue); - *((DWORD*) (pPatch + 7)) = LAST_ERROR_TRASH_VALUE; - pPatch += 11; -#endif // _TARGET_X86_ - -#ifdef _TARGET_AMD64_ - // iDNA doesn't like writing directly to gs:[nn] - *((UINT64*)(pPatch + 0)) = 0x25048b4865; // mov rax, gs:[offsetof(TEB, NtTib.Self)] - *((DWORD*) (pPatch + 5)) = offsetof(TEB, NtTib.Self); - *((WORD*) (pPatch + 9)) = 0x80c7; // mov dword ptr [rax + offsetof(TEB, LastErrorValue)], LAST_ERROR_TRASH_VALUE - *((DWORD*) (pPatch + 11)) = offsetof(TEB, LastErrorValue); - *((DWORD*) (pPatch + 15)) = LAST_ERROR_TRASH_VALUE; - pPatch += 19; -#endif - } -#endif // _DEBUG - - switch (mode) - { -#ifdef _TARGET_X86_ - case TLSACCESS_WNT: - *((WORD*) (pPatch + 0)) = 0xa164; // mov eax, fs:[IMM32] - *((DWORD*) (pPatch + 2)) = offsetof(TEB, TlsSlots) + tlsIndex * sizeof(void*); - *((BYTE*) (pPatch + 6)) = 0xc3; // retn - pPatch += 7; - break; - - case TLSACCESS_GENERIC: - if (pGenericImpl == NULL) - return NULL; - - _ASSERTE(pBuffer != NULL); - *((BYTE*) (pPatch + 0)) = 0xE9; // jmp pGenericImpl - TADDR rel32 = ((TADDR)pGenericImpl - ((TADDR)pBuffer + 1 + sizeof(INT32))); - *((INT32*) (pPatch + 1)) = (INT32)rel32; - pPatch += 5; - break; -#endif // _TARGET_X86_ - -#ifdef _TARGET_AMD64_ - case TLSACCESS_WNT: - *((UINT64*)(pPatch + 0)) = 0x25048b4865; // mov rax, gs:[IMM32] - *((DWORD*) (pPatch + 5)) = offsetof(TEB, TlsSlots) + (tlsIndex * sizeof(void*)); - *((BYTE*) (pPatch + 9)) = 0xc3; // return - pPatch += 10; - break; - - case TLSACCESS_GENERIC: - if (pGenericImpl == NULL) - return NULL; - - _ASSERTE(pBuffer != NULL); - *((BYTE*) (pPatch + 0)) = 0xE9; // jmp pGenericImpl - TADDR rel32 = ((TADDR)pGenericImpl - ((TADDR)pBuffer + 1 + sizeof(INT32))); - _ASSERTE((INT64)(INT32)rel32 == (INT64)rel32); - *((INT32*) (pPatch + 1)) = (INT32)rel32; - pPatch += 5; - - *pPatch++ = 0xCC; // Make sure there is full 8 bytes worth of data - *pPatch++ = 0xCC; - *pPatch++ = 0xCC; - break; - -#endif // _TARGET_AMD64_ - -#ifdef _TARGET_ARM_ - case TLSACCESS_WNT: - { - WORD slotOffset = (WORD)(offsetof(TEB, TlsSlots) + tlsIndex * sizeof(void*)); - _ASSERTE(slotOffset < 4096); - - WORD *pInstr = (WORD*)pPatch; - - *pInstr++ = 0xee1d; // mrc p15, 0, r0, c13, c0, 2 - *pInstr++ = 0x0f50; - *pInstr++ = 0xf8d0; // ldr r0, [r0, #slotOffset] - *pInstr++ = slotOffset; - *pInstr++ = 0x4770; // bx lr - - pPatch = (PBYTE)pInstr; - } - break; - - case TLSACCESS_GENERIC: - { - if (pGenericImpl == NULL) - return NULL; - - _ASSERTE(pBuffer != NULL); - - *(DWORD *)pPatch = 0x9000F000; // b pGenericImpl - PutThumb2BlRel24((WORD*)pPatch, (TADDR)pGenericImpl - ((TADDR)pBuffer + 4 + THUMB_CODE)); - - pPatch += 4; - } - break; -#endif // _TARGET_ARM_ - } - - SIZE_T cbCode = (TADDR)pPatch - (TADDR)&patch; - _ASSERTE(cbCode <= sizeof(patch)); - - if (pBuffer != NULL) - { - _ASSERTE_ALL_BUILDS("clr/src/utilcode/tls.cpp", cbCode <= cbBuffer); - - // We assume that the first instruction of the buffer is a short jump to dummy helper - // that can be atomically overwritten to avoid races with other threads executing the code. - // It is the same basic technique as hot patching. - - // Assert on all builds to make sure that retail optimizations are not affecting the alignment. - _ASSERTE_ALL_BUILDS("clr/src/utilcode/tls.cpp", IS_ALIGNED((void*)pBuffer, sizeof(TADDR))); - - // Size of short jump that gets patched last. - if (cbCode > sizeof(TADDR)) - { - memcpy((BYTE *)pBuffer + sizeof(TADDR), &patch[1], cbCode - sizeof(TADDR)); - FlushInstructionCache(GetCurrentProcess(), (BYTE *)pBuffer + sizeof(TADDR), cbCode - sizeof(TADDR)); - } - - // Make sure that the the dummy implementation still works. - _ASSERTE(((POPTIMIZEDTLSGETTER)ARM_ONLY(DataPointerToThumbCode)(pBuffer))() == NULL); - - // It is important for this write to happen atomically - VolatileStore((TADDR *)pBuffer, patch[0]); - - FlushInstructionCache(GetCurrentProcess(), (BYTE *)pBuffer, sizeof(TADDR)); - } - else - { - pBuffer = (BYTE*) new (executable, nothrow) BYTE[cbCode]; - if (pBuffer == NULL) - return NULL; - - memcpy(pBuffer, &patch, cbCode); - - FlushInstructionCache(GetCurrentProcess(), pBuffer, cbCode); - } - - return (POPTIMIZEDTLSGETTER)ARM_ONLY(DataPointerToThumbCode)(pBuffer); -} - - -//--------------------------------------------------------------------------- -// Frees a function created by MakeOptimizedTlsGetter(). -//--------------------------------------------------------------------------- -VOID FreeOptimizedTlsGetter(POPTIMIZEDTLSGETTER pOptimizedTlsGetter) -{ - // Static contracts because this is used by contract infrastructure - STATIC_CONTRACT_NOTHROW; - STATIC_CONTRACT_GC_NOTRIGGER; - - BYTE* pGetter = (BYTE*)pOptimizedTlsGetter; -#ifdef _TARGET_ARM_ - pGetter = ThumbCodeToDataPointer(pGetter); -#endif - DeleteExecutable(pGetter); -} - -#endif // !SELF_NO_HOST diff --git a/src/vm/CMakeLists.txt b/src/vm/CMakeLists.txt index f8790cf85d49..0aed676d9416 100644 --- a/src/vm/CMakeLists.txt +++ b/src/vm/CMakeLists.txt @@ -351,7 +351,6 @@ if(CLR_CMAKE_TARGET_ARCH_AMD64) ${ARCH_SOURCES_DIR}/InstantiatingStub.asm ${ARCH_SOURCES_DIR}/JitHelpers_Fast.asm ${ARCH_SOURCES_DIR}/JitHelpers_FastWriteBarriers.asm - ${ARCH_SOURCES_DIR}/JitHelpers_InlineGetAppDomain.asm ${ARCH_SOURCES_DIR}/JitHelpers_InlineGetThread.asm ${ARCH_SOURCES_DIR}/JitHelpers_SingleAppDomain.asm ${ARCH_SOURCES_DIR}/JitHelpers_Slow.asm @@ -359,7 +358,6 @@ if(CLR_CMAKE_TARGET_ARCH_AMD64) ${ARCH_SOURCES_DIR}/RedirectedHandledJITCase.asm ${ARCH_SOURCES_DIR}/ThePreStubAMD64.asm ${ARCH_SOURCES_DIR}/ExternalMethodFixupThunk.asm - ${ARCH_SOURCES_DIR}/TlsGetters.asm # Condition="'$(FeatureImplicitTls)' != 'true' ${ARCH_SOURCES_DIR}/UMThunkStub.asm ${ARCH_SOURCES_DIR}/VirtualCallStubAMD64.asm ) diff --git a/src/vm/amd64/AsmMacros.inc b/src/vm/amd64/AsmMacros.inc index f95a29192931..58fd8d130ff4 100644 --- a/src/vm/amd64/AsmMacros.inc +++ b/src/vm/amd64/AsmMacros.inc @@ -188,29 +188,25 @@ Section ends ; -; Macro to Call GetThread() correctly whether it is indirect or direct -; -CALL_GETTHREAD macro -ifndef GetThread -extern GetThread:proc -endif - call GetThread - endm +; Inlined version of GetThread +; Trashes rax and r11 +; +INLINE_GETTHREAD macro Reg + + EXTERN _tls_index : DWORD + EXTERN gCurrentThreadInfo:DWORD + + mov r11d, [_tls_index] + mov rax, gs:[OFFSET__TEB__ThreadLocalStoragePointer] + mov rax, [rax + r11 * 8] + mov r11d, SECTIONREL gCurrentThreadInfo + mov Reg, [rax + r11] -CALL_GETAPPDOMAIN macro -ifndef GetAppDomain -extern GetAppDomain:proc -endif - call GetAppDomain endm -; ; if you change this code there will be corresponding code in JITInterfaceGen.cpp which will need to be changed ; -; DEFAULT_TARGET needs to always be futher away than the fixed up target will be - - JIT_HELPER_MONITOR_THUNK macro THUNK_NAME, Section Section segment para 'CODE' align 16 diff --git a/src/vm/amd64/InstantiatingStub.asm b/src/vm/amd64/InstantiatingStub.asm index dff1b6f5a657..8601e4ae4435 100644 --- a/src/vm/amd64/InstantiatingStub.asm +++ b/src/vm/amd64/InstantiatingStub.asm @@ -90,13 +90,11 @@ NESTED_ENTRY InstantiatingMethodStubWorker, _TEXT ; ; link the StubHelperFrame ; - CALL_GETTHREAD - mov rdx, [rax + OFFSETOF__Thread__m_pFrame] + INLINE_GETTHREAD r12 + mov rdx, [r12 + OFFSETOF__Thread__m_pFrame] mov [rbp + OFFSETOF_FRAME + OFFSETOF__Frame__m_Next], rdx lea rcx, [rbp + OFFSETOF_FRAME] - mov [rax + OFFSETOF__Thread__m_pFrame], rcx - - mov r12, rax ; store the Thread pointer + mov [r12 + OFFSETOF__Thread__m_pFrame], rcx add rsp, SIZEOF_MAX_OUTGOING_ARGUMENT_HOMES diff --git a/src/vm/amd64/JitHelpers_Fast.asm b/src/vm/amd64/JitHelpers_Fast.asm index f004be549eea..5e0d79f74f9f 100644 --- a/src/vm/amd64/JitHelpers_Fast.asm +++ b/src/vm/amd64/JitHelpers_Fast.asm @@ -583,58 +583,6 @@ endif nop LEAF_END_MARKED JIT_WriteBarrier, _TEXT -ifndef FEATURE_IMPLICIT_TLS -LEAF_ENTRY GetThread, _TEXT - ; the default implementation will just jump to one that returns null until - ; MakeOptimizedTlsGetter is run which will overwrite this with the actual - ; implementation. - jmp short GetTLSDummy - - ; - ; insert enough NOPS to be able to insert the largest optimized TLS getter - ; that we might need, it is important that the TLS getter doesn't overwrite - ; into the dummy getter. - ; - db (TLS_GETTER_MAX_SIZE_ASM - 2) DUP (0CCh) - -LEAF_END GetThread, _TEXT - -LEAF_ENTRY GetAppDomain, _TEXT - ; the default implementation will just jump to one that returns null until - ; MakeOptimizedTlsGetter is run which will overwrite this with the actual - ; implementation. - jmp short GetTLSDummy - - ; - ; insert enough NOPS to be able to insert the largest optimized TLS getter - ; that we might need, it is important that the TLS getter doesn't overwrite - ; into the dummy getter. - ; - db (TLS_GETTER_MAX_SIZE_ASM - 2) DUP (0CCh) - -LEAF_END GetAppDomain, _TEXT - -LEAF_ENTRY GetTLSDummy, _TEXT - xor rax, rax - ret -LEAF_END GetTLSDummy, _TEXT - -LEAF_ENTRY ClrFlsGetBlock, _TEXT - ; the default implementation will just jump to one that returns null until - ; MakeOptimizedTlsGetter is run which will overwrite this with the actual - ; implementation. - jmp short GetTLSDummy - - ; - ; insert enough NOPS to be able to insert the largest optimized TLS getter - ; that we might need, it is important that the TLS getter doesn't overwrite - ; into the dummy getter. - ; - db (TLS_GETTER_MAX_SIZE_ASM - 2) DUP (0CCh) - -LEAF_END ClrFlsGetBlock, _TEXT -endif - ; Mark start of the code region that we patch at runtime LEAF_ENTRY JIT_PatchedCodeLast, _TEXT ret @@ -986,12 +934,11 @@ if 0 ne 0 ; ; link the TailCallFrame ; - CALL_GETTHREAD - mov r14, rax - mov r15, [rax + OFFSETOF__Thread__m_pFrame] + INLINE_GETTHREAD r14 + mov r15, [r14 + OFFSETOF__Thread__m_pFrame] mov [r13 + OFFSETOF_FRAME + OFFSETOF__Frame__m_Next], r15 lea r10, [r13 + OFFSETOF_FRAME] - mov [rax + OFFSETOF__Thread__m_pFrame], r10 + mov [r14 + OFFSETOF__Thread__m_pFrame], r10 endif ; the pretend call would be here diff --git a/src/vm/amd64/JitHelpers_InlineGetAppDomain.asm b/src/vm/amd64/JitHelpers_InlineGetAppDomain.asm deleted file mode 100644 index 187decf14d2c..000000000000 --- a/src/vm/amd64/JitHelpers_InlineGetAppDomain.asm +++ /dev/null @@ -1,123 +0,0 @@ -; Licensed to the .NET Foundation under one or more agreements. -; The .NET Foundation licenses this file to you under the MIT license. -; See the LICENSE file in the project root for more information. - -; ==++== -; - -; -; ==--== -; *********************************************************************** -; File: JitHelpers_InlineGetAppDomain.asm, see history in jithelp.asm -; -; Notes: These routinues will be patched at runtime with the location in -; the TLS to find the AppDomain* and are the fastest implementation -; of their specific functionality. -; *********************************************************************** - -include AsmMacros.inc -include asmconstants.inc - -; Min amount of stack space that a nested function should allocate. -MIN_SIZE equ 28h - -; Macro to create a patchable inline GetAppdomain, if we decide to create patchable -; high TLS inline versions then just change this macro to make sure to create enough -; space in the asm to patch the high TLS getter instructions. -PATCHABLE_INLINE_GETAPPDOMAIN macro Reg, PatchLabel -PATCH_LABEL PatchLabel - mov Reg, gs:[OFFSET__TEB__TlsSlots] - endm - -extern JIT_GetSharedNonGCStaticBase_Helper:proc -extern JIT_GetSharedGCStaticBase_Helper:proc - -LEAF_ENTRY JIT_GetSharedNonGCStaticBase_InlineGetAppDomain, _TEXT - ; Check if rcx (moduleDomainID) is not a moduleID - mov rax, rcx - test rax, 1 - jz HaveLocalModule - - PATCHABLE_INLINE_GETAPPDOMAIN rax, JIT_GetSharedNonGCStaticBase__PatchTLSLabel - - ; Get the LocalModule, rcx will always be odd, so: rcx * 4 - 4 <=> (rcx >> 1) * 8 - mov rax, [rax + OFFSETOF__AppDomain__m_sDomainLocalBlock + OFFSETOF__DomainLocalBlock__m_pModuleSlots] - mov rax, [rax + rcx * 4 - 4] - - HaveLocalModule: - ; If class is not initialized, bail to C++ helper - test byte ptr [rax + OFFSETOF__DomainLocalModule__m_pDataBlob + rdx], 1 - jz CallHelper - REPRET - - align 16 - CallHelper: - ; Tail call JIT_GetSharedNonGCStaticBase_Helper - mov rcx, rax - jmp JIT_GetSharedNonGCStaticBase_Helper -LEAF_END JIT_GetSharedNonGCStaticBase_InlineGetAppDomain, _TEXT - -LEAF_ENTRY JIT_GetSharedNonGCStaticBaseNoCtor_InlineGetAppDomain, _TEXT - ; Check if rcx (moduleDomainID) is not a moduleID - mov rax, rcx - test rax, 1 - jz HaveLocalModule - - PATCHABLE_INLINE_GETAPPDOMAIN rax, JIT_GetSharedNonGCStaticBaseNoCtor__PatchTLSLabel - - ; Get the LocalModule, rcx will always be odd, so: rcx * 4 - 4 <=> (rcx >> 1) * 8 - mov rax, [rax + OFFSETOF__AppDomain__m_sDomainLocalBlock + OFFSETOF__DomainLocalBlock__m_pModuleSlots] - mov rax, [rax + rcx * 4 - 4] - ret - - align 16 - HaveLocalModule: - REPRET -LEAF_END JIT_GetSharedNonGCStaticBaseNoCtor_InlineGetAppDomain, _TEXT - -LEAF_ENTRY JIT_GetSharedGCStaticBase_InlineGetAppDomain, _TEXT - ; Check if rcx (moduleDomainID) is not a moduleID - mov rax, rcx - test rax, 1 - jz HaveLocalModule - - PATCHABLE_INLINE_GETAPPDOMAIN rax, JIT_GetSharedGCStaticBase__PatchTLSLabel - - ; Get the LocalModule, rcx will always be odd, so: rcx * 4 - 4 <=> (rcx >> 1) * 8 - mov rax, [rax + OFFSETOF__AppDomain__m_sDomainLocalBlock + OFFSETOF__DomainLocalBlock__m_pModuleSlots] - mov rax, [rax + rcx * 4 - 4] - - HaveLocalModule: - ; If class is not initialized, bail to C++ helper - test byte ptr [rax + OFFSETOF__DomainLocalModule__m_pDataBlob + rdx], 1 - jz CallHelper - - mov rax, [rax + OFFSETOF__DomainLocalModule__m_pGCStatics] - ret - - align 16 - CallHelper: - ; Tail call Jit_GetSharedGCStaticBase_Helper - mov rcx, rax - jmp JIT_GetSharedGCStaticBase_Helper -LEAF_END JIT_GetSharedGCStaticBase_InlineGetAppDomain, _TEXT - -LEAF_ENTRY JIT_GetSharedGCStaticBaseNoCtor_InlineGetAppDomain, _TEXT - ; Check if rcx (moduleDomainID) is not a moduleID - mov rax, rcx - test rax, 1 - jz HaveLocalModule - - PATCHABLE_INLINE_GETAPPDOMAIN rax, JIT_GetSharedGCStaticBaseNoCtor__PatchTLSLabel - - ; Get the LocalModule, rcx will always be odd, so: rcx * 4 - 4 <=> (rcx >> 1) * 8 - mov rax, [rax + OFFSETOF__AppDomain__m_sDomainLocalBlock + OFFSETOF__DomainLocalBlock__m_pModuleSlots] - mov rax, [rax + rcx * 4 - 4] - - HaveLocalModule: - mov rax, [rax + OFFSETOF__DomainLocalModule__m_pGCStatics] - ret -LEAF_END JIT_GetSharedGCStaticBaseNoCtor_InlineGetAppDomain, _TEXT - - end - diff --git a/src/vm/amd64/JitHelpers_InlineGetThread.asm b/src/vm/amd64/JitHelpers_InlineGetThread.asm index 40d63bf7298a..d9f58cc30fc7 100644 --- a/src/vm/amd64/JitHelpers_InlineGetThread.asm +++ b/src/vm/amd64/JitHelpers_InlineGetThread.asm @@ -2,11 +2,6 @@ ; The .NET Foundation licenses this file to you under the MIT license. ; See the LICENSE file in the project root for more information. -; ==++== -; - -; -; ==--== ; *********************************************************************** ; File: JitHelpers_InlineGetThread.asm, see history in jithelp.asm ; @@ -21,15 +16,6 @@ include asmconstants.inc ; Min amount of stack space that a nested function should allocate. MIN_SIZE equ 28h -; Macro to create a patchable inline GetAppdomain, if we decide to create patchable -; high TLS inline versions then just change this macro to make sure to create enough -; space in the asm to patch the high TLS getter instructions. -PATCHABLE_INLINE_GETTHREAD macro Reg, PatchLabel -PATCH_LABEL PatchLabel - mov Reg, gs:[OFFSET__TEB__TlsSlots] - endm - - JIT_NEW equ ?JIT_New@@YAPEAVObject@@PEAUCORINFO_CLASS_STRUCT_@@@Z Object__DEBUG_SetAppDomain equ ?DEBUG_SetAppDomain@Object@@QEAAXPEAVAppDomain@@@Z CopyValueClassUnchecked equ ?CopyValueClassUnchecked@@YAXPEAX0PEAVMethodTable@@@Z @@ -49,11 +35,6 @@ extern JIT_NewArr1:proc extern JIT_InternalThrow:proc -ifdef _DEBUG -extern DEBUG_TrialAllocSetAppDomain:proc -extern DEBUG_TrialAllocSetAppDomain_NoScratchArea:proc -endif - ; IN: rcx: MethodTable* ; OUT: rax: new object LEAF_ENTRY JIT_TrialAllocSFastMP_InlineGetThread, _TEXT @@ -61,7 +42,7 @@ LEAF_ENTRY JIT_TrialAllocSFastMP_InlineGetThread, _TEXT ; m_BaseSize is guaranteed to be a multiple of 8. - PATCHABLE_INLINE_GETTHREAD r11, JIT_TrialAllocSFastMP_InlineGetThread__PatchTLSOffset + INLINE_GETTHREAD r11 mov r10, [r11 + OFFSET__Thread__m_alloc_context__alloc_limit] mov rax, [r11 + OFFSET__Thread__m_alloc_context__alloc_ptr] @@ -73,10 +54,6 @@ LEAF_ENTRY JIT_TrialAllocSFastMP_InlineGetThread, _TEXT mov [r11 + OFFSET__Thread__m_alloc_context__alloc_ptr], rdx mov [rax], rcx -ifdef _DEBUG - call DEBUG_TrialAllocSetAppDomain_NoScratchArea -endif ; _DEBUG - ret AllocFailed: @@ -95,7 +72,7 @@ NESTED_ENTRY JIT_BoxFastMP_InlineGetThread, _TEXT ; m_BaseSize is guaranteed to be a multiple of 8. - PATCHABLE_INLINE_GETTHREAD r11, JIT_BoxFastMPIGT__PatchTLSLabel + INLINE_GETTHREAD r11 mov r10, [r11 + OFFSET__Thread__m_alloc_context__alloc_limit] mov rax, [r11 + OFFSET__Thread__m_alloc_context__alloc_ptr] @@ -107,10 +84,6 @@ NESTED_ENTRY JIT_BoxFastMP_InlineGetThread, _TEXT mov [r11 + OFFSET__Thread__m_alloc_context__alloc_ptr], r8 mov [rax], rcx -ifdef _DEBUG - call DEBUG_TrialAllocSetAppDomain_NoScratchArea -endif ; _DEBUG - ; Check whether the object contains pointers test dword ptr [rcx + OFFSETOF__MethodTable__m_dwFlags], MethodTable__enum_flag_ContainsPointers jnz ContainsPointers @@ -169,7 +142,7 @@ LEAF_ENTRY AllocateStringFastMP_InlineGetThread, _TEXT lea edx, [edx + ecx*2 + 7] and edx, -8 - PATCHABLE_INLINE_GETTHREAD r11, AllocateStringFastMP_InlineGetThread__PatchTLSOffset + INLINE_GETTHREAD r11 mov r10, [r11 + OFFSET__Thread__m_alloc_context__alloc_limit] mov rax, [r11 + OFFSET__Thread__m_alloc_context__alloc_ptr] @@ -183,10 +156,6 @@ LEAF_ENTRY AllocateStringFastMP_InlineGetThread, _TEXT mov [rax + OFFSETOF__StringObject__m_StringLength], ecx -ifdef _DEBUG - call DEBUG_TrialAllocSetAppDomain_NoScratchArea -endif ; _DEBUG - ret OversizedString: @@ -226,7 +195,7 @@ LEAF_ENTRY JIT_NewArr1VC_MP_InlineGetThread, _TEXT and r8d, -8 - PATCHABLE_INLINE_GETTHREAD r11, JIT_NewArr1VC_MP_InlineGetThread__PatchTLSOffset + INLINE_GETTHREAD r11 mov r10, [r11 + OFFSET__Thread__m_alloc_context__alloc_limit] mov rax, [r11 + OFFSET__Thread__m_alloc_context__alloc_ptr] @@ -241,10 +210,6 @@ LEAF_ENTRY JIT_NewArr1VC_MP_InlineGetThread, _TEXT mov dword ptr [rax + OFFSETOF__ArrayBase__m_NumComponents], edx -ifdef _DEBUG - call DEBUG_TrialAllocSetAppDomain_NoScratchArea -endif ; _DEBUG - ret OversizedArray: @@ -279,7 +244,7 @@ LEAF_ENTRY JIT_NewArr1OBJ_MP_InlineGetThread, _TEXT ; No need for rounding in this case - element size is 8, and m_BaseSize is guaranteed ; to be a multiple of 8. - PATCHABLE_INLINE_GETTHREAD r11, JIT_NewArr1OBJ_MP_InlineGetThread__PatchTLSOffset + INLINE_GETTHREAD r11 mov r10, [r11 + OFFSET__Thread__m_alloc_context__alloc_limit] mov rax, [r11 + OFFSET__Thread__m_alloc_context__alloc_ptr] @@ -293,10 +258,6 @@ LEAF_ENTRY JIT_NewArr1OBJ_MP_InlineGetThread, _TEXT mov dword ptr [rax + OFFSETOF__ArrayBase__m_NumComponents], edx -ifdef _DEBUG - call DEBUG_TrialAllocSetAppDomain_NoScratchArea -endif ; _DEBUG - ret OversizedArray: diff --git a/src/vm/amd64/JitHelpers_Slow.asm b/src/vm/amd64/JitHelpers_Slow.asm index f86d429e33f7..0e26ae6dfd96 100644 --- a/src/vm/amd64/JitHelpers_Slow.asm +++ b/src/vm/amd64/JitHelpers_Slow.asm @@ -42,7 +42,6 @@ EXTERN g_GCShadowEnd:QWORD endif JIT_NEW equ ?JIT_New@@YAPEAVObject@@PEAUCORINFO_CLASS_STRUCT_@@@Z -Object__DEBUG_SetAppDomain equ ?DEBUG_SetAppDomain@Object@@QEAAXPEAVAppDomain@@@Z CopyValueClassUnchecked equ ?CopyValueClassUnchecked@@YAXPEAX0PEAVMethodTable@@@Z JIT_Box equ ?JIT_Box@@YAPEAVObject@@PEAUCORINFO_CLASS_STRUCT_@@PEAX@Z g_pStringClass equ ?g_pStringClass@@3PEAVMethodTable@@EA @@ -162,290 +161,6 @@ endif LEAF_END_MARKED JIT_WriteBarrier_Debug, _TEXT endif -NESTED_ENTRY JIT_TrialAllocSFastMP, _TEXT - alloc_stack MIN_SIZE - END_PROLOGUE - - CALL_GETTHREAD - mov r11, rax - - mov r8d, [rcx + OFFSET__MethodTable__m_BaseSize] - - ; m_BaseSize is guaranteed to be a multiple of 8. - - mov r10, [r11 + OFFSET__Thread__m_alloc_context__alloc_limit] - mov rax, [r11 + OFFSET__Thread__m_alloc_context__alloc_ptr] - - add r8, rax - - cmp r8, r10 - ja AllocFailed - - mov [r11 + OFFSET__Thread__m_alloc_context__alloc_ptr], r8 - mov [rax], rcx - -ifdef _DEBUG - call DEBUG_TrialAllocSetAppDomain -endif ; _DEBUG - - ; epilog - add rsp, MIN_SIZE - ret - - AllocFailed: - add rsp, MIN_SIZE - jmp JIT_NEW -NESTED_END JIT_TrialAllocSFastMP, _TEXT - - -; HCIMPL2(Object*, JIT_Box, CORINFO_CLASS_HANDLE type, void* unboxedData) -NESTED_ENTRY JIT_BoxFastMP, _TEXT - alloc_stack MIN_SIZE - END_PROLOGUE - - mov rax, [rcx + OFFSETOF__MethodTable__m_pWriteableData] - - ; Check whether the class has not been initialized - test dword ptr [rax + OFFSETOF__MethodTableWriteableData__m_dwFlags], MethodTableWriteableData__enum_flag_Unrestored - jnz ClassNotInited - - CALL_GETTHREAD - mov r11, rax - - mov r8d, [rcx + OFFSET__MethodTable__m_BaseSize] - - ; m_BaseSize is guaranteed to be a multiple of 8. - - mov r10, [r11 + OFFSET__Thread__m_alloc_context__alloc_limit] - mov rax, [r11 + OFFSET__Thread__m_alloc_context__alloc_ptr] - - add r8, rax - - cmp r8, r10 - ja AllocFailed - - mov [r11 + OFFSET__Thread__m_alloc_context__alloc_ptr], r8 - mov [rax], rcx - -ifdef _DEBUG - call DEBUG_TrialAllocSetAppDomain -endif ; _DEBUG - - ; Check whether the object contains pointers - test dword ptr [rcx + OFFSETOF__MethodTable__m_dwFlags], MethodTable__enum_flag_ContainsPointers - jnz ContainsPointers - - ; We have no pointers - emit a simple inline copy loop - - mov ecx, [rcx + OFFSET__MethodTable__m_BaseSize] - sub ecx, 18h ; sizeof(ObjHeader) + sizeof(Object) + last slot - - CopyLoop: - mov r8, [rdx+rcx] - mov [rax+rcx+8], r8 - - sub ecx, 8 - jge CopyLoop - - add rsp, MIN_SIZE - ret - - ContainsPointers: - ; Do call to CopyValueClassUnchecked(object, data, pMT) - - mov [rsp+20h], rax - - mov r8, rcx - lea rcx, [rax + 8] - call CopyValueClassUnchecked - - mov rax, [rsp+20h] - - add rsp, MIN_SIZE - ret - - ClassNotInited: - AllocFailed: - add rsp, MIN_SIZE - jmp JIT_Box -NESTED_END JIT_BoxFastMP, _TEXT - - -NESTED_ENTRY AllocateStringFastMP, _TEXT - alloc_stack MIN_SIZE - END_PROLOGUE - - ; Instead of doing elaborate overflow checks, we just limit the number of elements - ; to (LARGE_OBJECT_SIZE - 256)/sizeof(WCHAR) or less. - ; This will avoid all overflow problems, as well as making sure - ; big string objects are correctly allocated in the big object heap. - - cmp ecx, (ASM_LARGE_OBJECT_SIZE - 256)/2 - jae OversizedString - - CALL_GETTHREAD - mov r11, rax - - mov rdx, [g_pStringClass] - mov r8d, [rdx + OFFSET__MethodTable__m_BaseSize] - - ; Calculate the final size to allocate. - ; We need to calculate baseSize + cnt*2, then round that up by adding 7 and anding ~7. - - lea r8d, [r8d + ecx*2 + 7] - and r8d, -8 - - mov r10, [r11 + OFFSET__Thread__m_alloc_context__alloc_limit] - mov rax, [r11 + OFFSET__Thread__m_alloc_context__alloc_ptr] - - add r8, rax - - cmp r8, r10 - ja AllocFailed - - mov [r11 + OFFSET__Thread__m_alloc_context__alloc_ptr], r8 - mov [rax], rdx - - mov [rax + OFFSETOF__StringObject__m_StringLength], ecx - -ifdef _DEBUG - call DEBUG_TrialAllocSetAppDomain -endif ; _DEBUG - - add rsp, MIN_SIZE - ret - - OversizedString: - AllocFailed: - add rsp, MIN_SIZE - jmp FramedAllocateString -NESTED_END AllocateStringFastMP, _TEXT - -; HCIMPL2(Object*, JIT_NewArr1VC_MP, CORINFO_CLASS_HANDLE arrayMT, INT_PTR size) -NESTED_ENTRY JIT_NewArr1VC_MP, _TEXT - alloc_stack MIN_SIZE - END_PROLOGUE - - ; We were passed a (shared) method table in RCX, which contains the element type. - - ; The element count is in RDX - - ; NOTE: if this code is ported for CORINFO_HELP_NEWSFAST_ALIGN8, it will need - ; to emulate the double-specific behavior of JIT_TrialAlloc::GenAllocArray. - - ; Do a conservative check here. This is to avoid overflow while doing the calculations. We don't - ; have to worry about "large" objects, since the allocation quantum is never big enough for - ; LARGE_OBJECT_SIZE. - - ; For Value Classes, this needs to be 2^16 - slack (2^32 / max component size), - ; The slack includes the size for the array header and round-up ; for alignment. Use 256 for the - ; slack value out of laziness. - - ; In both cases we do a final overflow check after adding to the alloc_ptr. - - CALL_GETTHREAD - mov r11, rax - - cmp rdx, (65535 - 256) - jae OversizedArray - - movzx r8d, word ptr [rcx + OFFSETOF__MethodTable__m_dwFlags] ; component size is low 16 bits - imul r8d, edx ; signed mul, but won't overflow due to length restriction above - add r8d, dword ptr [rcx + OFFSET__MethodTable__m_BaseSize] - - ; round the size to a multiple of 8 - - add r8d, 7 - and r8d, -8 - - mov r10, [r11 + OFFSET__Thread__m_alloc_context__alloc_limit] - mov rax, [r11 + OFFSET__Thread__m_alloc_context__alloc_ptr] - - add r8, rax - jc AllocFailed - - cmp r8, r10 - ja AllocFailed - - mov [r11 + OFFSET__Thread__m_alloc_context__alloc_ptr], r8 - mov [rax], rcx - - mov dword ptr [rax + OFFSETOF__ArrayBase__m_NumComponents], edx - -ifdef _DEBUG - call DEBUG_TrialAllocSetAppDomain -endif ; _DEBUG - - add rsp, MIN_SIZE - ret - - OversizedArray: - AllocFailed: - add rsp, MIN_SIZE - jmp JIT_NewArr1 -NESTED_END JIT_NewArr1VC_MP, _TEXT - - -; HCIMPL2(Object*, JIT_NewArr1OBJ_MP, CORINFO_CLASS_HANDLE arrayMT, INT_PTR size) -NESTED_ENTRY JIT_NewArr1OBJ_MP, _TEXT - alloc_stack MIN_SIZE - END_PROLOGUE - - ; We were passed a (shared) method table in RCX, which contains the element type. - - ; The element count is in RDX - - ; NOTE: if this code is ported for CORINFO_HELP_NEWSFAST_ALIGN8, it will need - ; to emulate the double-specific behavior of JIT_TrialAlloc::GenAllocArray. - - ; Verifies that LARGE_OBJECT_SIZE fits in 32-bit. This allows us to do array size - ; arithmetic using 32-bit registers. - .erre ASM_LARGE_OBJECT_SIZE lt 100000000h - - cmp rdx, (ASM_LARGE_OBJECT_SIZE - 256)/8 - jae OversizedArray - - CALL_GETTHREAD - mov r11, rax - - ; In this case we know the element size is sizeof(void *), or 8 for x64 - ; This helps us in two ways - we can shift instead of multiplying, and - ; there's no need to align the size either - - mov r8d, dword ptr [rcx + OFFSET__MethodTable__m_BaseSize] - lea r8d, [r8d + edx * 8] - - ; No need for rounding in this case - element size is 8, and m_BaseSize is guaranteed - ; to be a multiple of 8. - - mov r10, [r11 + OFFSET__Thread__m_alloc_context__alloc_limit] - mov rax, [r11 + OFFSET__Thread__m_alloc_context__alloc_ptr] - - add r8, rax - - cmp r8, r10 - ja AllocFailed - - mov [r11 + OFFSET__Thread__m_alloc_context__alloc_ptr], r8 - mov [rax], rcx - - mov dword ptr [rax + OFFSETOF__ArrayBase__m_NumComponents], edx - -ifdef _DEBUG - call DEBUG_TrialAllocSetAppDomain -endif ; _DEBUG - - add rsp, MIN_SIZE - ret - - OversizedArray: - AllocFailed: - add rsp, MIN_SIZE - jmp JIT_NewArr1 -NESTED_END JIT_NewArr1OBJ_MP, _TEXT - - - extern g_global_alloc_lock:dword extern g_global_alloc_context:qword @@ -471,10 +186,6 @@ LEAF_ENTRY JIT_TrialAllocSFastSP, _TEXT mov [rax], rcx mov [g_global_alloc_lock], -1 -ifdef _DEBUG - call DEBUG_TrialAllocSetAppDomain_NoScratchArea -endif ; _DEBUG - ret AllocFailed: @@ -511,10 +222,6 @@ NESTED_ENTRY JIT_BoxFastUP, _TEXT mov [rax], rcx mov [g_global_alloc_lock], -1 -ifdef _DEBUG - call DEBUG_TrialAllocSetAppDomain_NoScratchArea -endif ; _DEBUG - ; Check whether the object contains pointers test dword ptr [rcx + OFFSETOF__MethodTable__m_dwFlags], MethodTable__enum_flag_ContainsPointers jnz ContainsPointers @@ -594,10 +301,6 @@ LEAF_ENTRY AllocateStringFastUP, _TEXT mov [rax + OFFSETOF__StringObject__m_StringLength], ecx -ifdef _DEBUG - call DEBUG_TrialAllocSetAppDomain_NoScratchArea -endif ; _DEBUG - ret AllocFailed: @@ -655,10 +358,6 @@ LEAF_ENTRY JIT_NewArr1VC_UP, _TEXT mov dword ptr [rax + OFFSETOF__ArrayBase__m_NumComponents], edx -ifdef _DEBUG - call DEBUG_TrialAllocSetAppDomain_NoScratchArea -endif ; _DEBUG - ret AllocFailed: @@ -711,10 +410,6 @@ LEAF_ENTRY JIT_NewArr1OBJ_UP, _TEXT mov dword ptr [rax + OFFSETOF__ArrayBase__m_NumComponents], edx -ifdef _DEBUG - call DEBUG_TrialAllocSetAppDomain_NoScratchArea -endif ; _DEBUG - ret AllocFailed: @@ -725,181 +420,5 @@ endif ; _DEBUG LEAF_END JIT_NewArr1OBJ_UP, _TEXT -NESTED_ENTRY JIT_GetSharedNonGCStaticBase_Slow, _TEXT - alloc_stack MIN_SIZE - END_PROLOGUE - - ; Check if rcx (moduleDomainID) is not a moduleID - test rcx, 1 - jz HaveLocalModule - - CALL_GETAPPDOMAIN - - ; Get the LocalModule - mov rax, [rax + OFFSETOF__AppDomain__m_sDomainLocalBlock + OFFSETOF__DomainLocalBlock__m_pModuleSlots] - ; rcx will always be odd, so: rcx * 4 - 4 <=> (rcx >> 1) * 8 - mov rcx, [rax + rcx * 4 - 4] - - HaveLocalModule: - ; If class is not initialized, bail to C++ helper - test [rcx + OFFSETOF__DomainLocalModule__m_pDataBlob + rdx], 1 - jz CallHelper - - mov rax, rcx - add rsp, MIN_SIZE - ret - - align 16 - CallHelper: - ; Tail call Jit_GetSharedNonGCStaticBase_Helper - add rsp, MIN_SIZE - jmp JIT_GetSharedNonGCStaticBase_Helper -NESTED_END JIT_GetSharedNonGCStaticBase_Slow, _TEXT - -NESTED_ENTRY JIT_GetSharedNonGCStaticBaseNoCtor_Slow, _TEXT - alloc_stack MIN_SIZE - END_PROLOGUE - - ; Check if rcx (moduleDomainID) is not a moduleID - test rcx, 1 - jz HaveLocalModule - - CALL_GETAPPDOMAIN - - ; Get the LocalModule - mov rax, [rax + OFFSETOF__AppDomain__m_sDomainLocalBlock + OFFSETOF__DomainLocalBlock__m_pModuleSlots] - ; rcx will always be odd, so: rcx * 4 - 4 <=> (rcx >> 1) * 8 - mov rax, [rax + rcx * 4 - 4] - - add rsp, MIN_SIZE - ret - - align 16 - HaveLocalModule: - mov rax, rcx - add rsp, MIN_SIZE - ret -NESTED_END JIT_GetSharedNonGCStaticBaseNoCtor_Slow, _TEXT - -NESTED_ENTRY JIT_GetSharedGCStaticBase_Slow, _TEXT - alloc_stack MIN_SIZE - END_PROLOGUE - - ; Check if rcx (moduleDomainID) is not a moduleID - test rcx, 1 - jz HaveLocalModule - - CALL_GETAPPDOMAIN - - ; Get the LocalModule - mov rax, [rax + OFFSETOF__AppDomain__m_sDomainLocalBlock + OFFSETOF__DomainLocalBlock__m_pModuleSlots] - ; rcx will always be odd, so: rcx * 4 - 4 <=> (rcx >> 1) * 8 - mov rcx, [rax + rcx * 4 - 4] - - HaveLocalModule: - ; If class is not initialized, bail to C++ helper - test [rcx + OFFSETOF__DomainLocalModule__m_pDataBlob + rdx], 1 - jz CallHelper - - mov rax, [rcx + OFFSETOF__DomainLocalModule__m_pGCStatics] - - add rsp, MIN_SIZE - ret - - align 16 - CallHelper: - ; Tail call Jit_GetSharedGCStaticBase_Helper - add rsp, MIN_SIZE - jmp JIT_GetSharedGCStaticBase_Helper -NESTED_END JIT_GetSharedGCStaticBase_Slow, _TEXT - -NESTED_ENTRY JIT_GetSharedGCStaticBaseNoCtor_Slow, _TEXT - alloc_stack MIN_SIZE - END_PROLOGUE - - ; Check if rcx (moduleDomainID) is not a moduleID - test rcx, 1 - jz HaveLocalModule - - CALL_GETAPPDOMAIN - - ; Get the LocalModule - mov rax, [rax + OFFSETOF__AppDomain__m_sDomainLocalBlock + OFFSETOF__DomainLocalBlock__m_pModuleSlots] - ; rcx will always be odd, so: rcx * 4 - 4 <=> (rcx >> 1) * 8 - mov rcx, [rax + rcx * 4 - 4] - - HaveLocalModule: - mov rax, [rcx + OFFSETOF__DomainLocalModule__m_pGCStatics] - - add rsp, MIN_SIZE - ret -NESTED_END JIT_GetSharedGCStaticBaseNoCtor_Slow, _TEXT - - -ifdef _DEBUG - -extern Object__DEBUG_SetAppDomain:proc - -; -; IN: rax: new object needing the AppDomain ID set.. -; OUT: rax, returns original value at entry -; -; all integer register state is preserved -; -DEBUG_TrialAllocSetAppDomain_STACK_SIZE equ MIN_SIZE + 10h -NESTED_ENTRY DEBUG_TrialAllocSetAppDomain, _TEXT - push_vol_reg rax - push_vol_reg rcx - push_vol_reg rdx - push_vol_reg r8 - push_vol_reg r9 - push_vol_reg r10 - push_vol_reg r11 - push_nonvol_reg rbx - alloc_stack MIN_SIZE - END_PROLOGUE - - mov rbx, rax - - ; get the app domain ptr - CALL_GETAPPDOMAIN - - ; set the sync block app domain ID - mov rcx, rbx - mov rdx, rax - call Object__DEBUG_SetAppDomain - - ; epilog - add rsp, MIN_SIZE - pop rbx - pop r11 - pop r10 - pop r9 - pop r8 - pop rdx - pop rcx - pop rax - ret -NESTED_END DEBUG_TrialAllocSetAppDomain, _TEXT - -NESTED_ENTRY DEBUG_TrialAllocSetAppDomain_NoScratchArea, _TEXT - - push_nonvol_reg rbp - set_frame rbp, 0 - END_PROLOGUE - - sub rsp, 20h - and rsp, -16 - - call DEBUG_TrialAllocSetAppDomain - - lea rsp, [rbp+0] - pop rbp - ret -NESTED_END DEBUG_TrialAllocSetAppDomain_NoScratchArea, _TEXT - -endif - - end diff --git a/src/vm/amd64/RedirectedHandledJITCase.asm b/src/vm/amd64/RedirectedHandledJITCase.asm index a6d349635747..5ab69475e284 100644 --- a/src/vm/amd64/RedirectedHandledJITCase.asm +++ b/src/vm/amd64/RedirectedHandledJITCase.asm @@ -195,9 +195,8 @@ NESTED_ENTRY RedirectForThrowControl2, _TEXT END_PROLOGUE ; Fetch rip from a CONTEXT, and store it as our return address. - CALL_GETTHREAD + INLINE_GETTHREAD rcx - mov rcx, rax call Thread__GetAbortContext mov rax, [rax + OFFSETOF__CONTEXT__Rip] diff --git a/src/vm/amd64/TlsGetters.asm b/src/vm/amd64/TlsGetters.asm deleted file mode 100644 index 7b5a30844b57..000000000000 --- a/src/vm/amd64/TlsGetters.asm +++ /dev/null @@ -1,120 +0,0 @@ -; Licensed to the .NET Foundation under one or more agreements. -; The .NET Foundation licenses this file to you under the MIT license. -; See the LICENSE file in the project root for more information. - -; ==++== -; - -; -; ==--== -; *********************************************************************** -; File: TlsGetters.asm, see history in jithelp.asm -; -; Notes: These TlsGetters (GetAppDomain(), GetThread()) are implemented -; in a generic fashion, but might be patched at runtime to contain -; a much faster implementation which goes straight to the TLS for -; the Thread* or AppDomain*. -; -; Note that the macro takes special care to not have these become -; non-unwindable after the patching has overwritten the prologue of -; the generic getter. -; *********************************************************************** - -include AsmMacros.inc -include asmconstants.inc - -; Min amount of stack space that a nested function should allocate. -MIN_SIZE equ 28h - - -; These generic TLS getters are used for GetThread() and GetAppDomain(), they do a little -; extra work to ensure that certain registers are preserved, those include the following -; volatile registers -; -; rcx -; rdx -; r8 -; r9 -; r10 -; r11 -; -; The return value is in rax as usual -; -; They DO NOT save scratch flowing point registers, if you need those you need to save them. - -ifdef ENABLE_GET_THREAD_GENERIC_FULL_CHECK -GetThreadGenericFullCheck equ ?GetThreadGenericFullCheck@@YAPEAVThread@@XZ -extern GetThreadGenericFullCheck:proc -endif ; ENABLE_GET_THREAD_GENERIC_FULL_CHECK - -; Creates a generic TLS getter using the value from TLS slot gTLSIndex. Set GenerateGetThread -; when using this macro to generate GetThread, as that will cause special code to be generated which -; enables additional debug-only checking, such as enforcement of EE_THREAD_NOT_REQUIRED contracts -GenerateOptimizedTLSGetter macro name, GenerateGetThread - -extern g&name&TLSIndex:dword -extern __imp_TlsGetValue:qword - -SIZEOF_PUSHED_ARGS equ 10h - -NESTED_ENTRY Get&name&Generic, _TEXT - push_vol_reg r10 - push_vol_reg r11 - alloc_stack MIN_SIZE - - ; save argument registers in shadow space - save_reg_postrsp rcx, MIN_SIZE + 8h + SIZEOF_PUSHED_ARGS - save_reg_postrsp rdx, MIN_SIZE + 10h + SIZEOF_PUSHED_ARGS - save_reg_postrsp r8, MIN_SIZE + 18h + SIZEOF_PUSHED_ARGS - save_reg_postrsp r9, MIN_SIZE + 20h + SIZEOF_PUSHED_ARGS - END_PROLOGUE - -ifdef _DEBUG - cmp dword ptr [g&name&TLSIndex], -1 - jnz @F - int 3 -@@: -endif ; _DEBUG - -CALL_GET_THREAD_GENERIC_FULL_CHECK=0 - -ifdef ENABLE_GET_THREAD_GENERIC_FULL_CHECK -if GenerateGetThread - -; Generating the GetThread() tlsgetter, and GetThreadGenericFullCheck is -; defined in C (in threads.cpp). So we'll want to delegate directly to -; GetThreadGenericFullCheck, which may choose to do additional checking, such -; as enforcing EE_THREAD_NOT_REQUIRED contracts -CALL_GET_THREAD_GENERIC_FULL_CHECK=1 - -endif ; GenerateGetThread -endif ; ENABLE_GET_THREAD_GENERIC_FULL_CHECK - -if CALL_GET_THREAD_GENERIC_FULL_CHECK - call GetThreadGenericFullCheck -else - ; Not generating the GetThread() tlsgetter (or there is no GetThreadGenericFullCheck - ; to call), so do nothing special--just look up the value stored at TLS slot gTLSIndex - mov ecx, [g&name&TLSIndex] - call [__imp_TlsGetValue] -endif - - ; restore arguments from shadow space - mov rcx, [rsp + MIN_SIZE + 8h + SIZEOF_PUSHED_ARGS] - mov rdx, [rsp + MIN_SIZE + 10h + SIZEOF_PUSHED_ARGS] - mov r8, [rsp + MIN_SIZE + 18h + SIZEOF_PUSHED_ARGS] - mov r9, [rsp + MIN_SIZE + 20h + SIZEOF_PUSHED_ARGS] - - ; epilog - add rsp, MIN_SIZE - pop r11 - pop r10 - ret -NESTED_END Get&name&Generic, _TEXT - - endm - -GenerateOptimizedTLSGetter Thread, 1 -GenerateOptimizedTLSGetter AppDomain, 0 - - end diff --git a/src/vm/amd64/UMThunkStub.asm b/src/vm/amd64/UMThunkStub.asm index dd6d4ebc3c50..8b44e67a7e3a 100644 --- a/src/vm/amd64/UMThunkStub.asm +++ b/src/vm/amd64/UMThunkStub.asm @@ -166,14 +166,12 @@ UMThunkStubAMD64_FIXED_STACK_ALLOC_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE - (U ; ; Call GetThread() ; - CALL_GETTHREAD ; will not trash r10 - test rax, rax + INLINE_GETTHREAD r12 ; will not trash r10 + test r12, r12 jz DoThreadSetup HaveThread: - mov r12, rax ; r12 <- Thread* - ;FailFast if a native callable method invoked via ldftn and calli. cmp dword ptr [r12 + OFFSETOF__Thread__m_fPreemptiveGCDisabled], 1 jz InvalidTransition @@ -250,6 +248,8 @@ DoThreadSetup: movdqa xmm1, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 10h] movdqa xmm2, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 20h] movdqa xmm3, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 30h] + + mov r12, rax jmp HaveThread diff --git a/src/vm/amd64/asmconstants.h b/src/vm/amd64/asmconstants.h index 1fef80f66ddc..b6a3c712e747 100644 --- a/src/vm/amd64/asmconstants.h +++ b/src/vm/amd64/asmconstants.h @@ -535,22 +535,9 @@ ASMCONSTANTS_C_ASSERT(MethodDescClassification__mdcClassification == mdcClassifi ASMCONSTANTS_C_ASSERT(MethodDescClassification__mcInstantiated == mcInstantiated); #ifndef FEATURE_PAL - -#define OFFSET__TEB__TlsSlots 0x1480 -ASMCONSTANTS_C_ASSERT(OFFSET__TEB__TlsSlots == offsetof(TEB, TlsSlots)); - -#define OFFSETOF__TEB__LastErrorValue 0x68 -ASMCONSTANTS_C_ASSERT(OFFSETOF__TEB__LastErrorValue == offsetof(TEB, LastErrorValue)); - -#endif // !FEATURE_PAL - -#ifdef _DEBUG -#define TLS_GETTER_MAX_SIZE_ASM 0x30 -#else -#define TLS_GETTER_MAX_SIZE_ASM 0x18 +#define OFFSET__TEB__ThreadLocalStoragePointer 0x58 +ASMCONSTANTS_C_ASSERT(OFFSET__TEB__ThreadLocalStoragePointer == offsetof(TEB, ThreadLocalStoragePointer)); #endif -ASMCONSTANTS_C_ASSERT(TLS_GETTER_MAX_SIZE_ASM == TLS_GETTER_MAX_SIZE) - // If you change these constants, you need to update code in // RedirectHandledJITCase.asm and ExcepAMD64.cpp. diff --git a/src/vm/amd64/cgencpu.h b/src/vm/amd64/cgencpu.h index 98e9770858b5..b83437039bac 100644 --- a/src/vm/amd64/cgencpu.h +++ b/src/vm/amd64/cgencpu.h @@ -104,13 +104,6 @@ EXTERN_C void FastCallFinalizeWorker(Object *obj, PCODE funcPtr); #define X86RegFromAMD64Reg(extended_reg) \ ((X86Reg)(((int)extended_reg) & X86_REGISTER_MASK)) -// Max size of optimized TLS helpers -#ifdef _DEBUG -// Debug build needs extra space for last error trashing -#define TLS_GETTER_MAX_SIZE 0x30 -#else -#define TLS_GETTER_MAX_SIZE 0x18 -#endif //======================================================================= // IMPORTANT: This value is used to figure out how much to allocate @@ -538,26 +531,22 @@ inline BOOL ClrFlushInstructionCache(LPCVOID pCodeAddr, size_t sizeOfCode) return TRUE; } -#ifndef FEATURE_IMPLICIT_TLS // // JIT HELPER ALIASING FOR PORTABILITY. // // Create alias for optimized implementations of helpers provided on this platform // -#define JIT_GetSharedGCStaticBase JIT_GetSharedGCStaticBase_InlineGetAppDomain -#define JIT_GetSharedNonGCStaticBase JIT_GetSharedNonGCStaticBase_InlineGetAppDomain -#define JIT_GetSharedGCStaticBaseNoCtor JIT_GetSharedGCStaticBaseNoCtor_InlineGetAppDomain -#define JIT_GetSharedNonGCStaticBaseNoCtor JIT_GetSharedNonGCStaticBaseNoCtor_InlineGetAppDomain -#endif // FEATURE_IMPLICIT_TLS +#define JIT_GetSharedGCStaticBase JIT_GetSharedGCStaticBase_SingleAppDomain +#define JIT_GetSharedNonGCStaticBase JIT_GetSharedNonGCStaticBase_SingleAppDomain +#define JIT_GetSharedGCStaticBaseNoCtor JIT_GetSharedGCStaticBaseNoCtor_SingleAppDomain +#define JIT_GetSharedNonGCStaticBaseNoCtor JIT_GetSharedNonGCStaticBaseNoCtor_SingleAppDomain #ifndef FEATURE_PAL - #define JIT_ChkCastClass JIT_ChkCastClass #define JIT_ChkCastClassSpecial JIT_ChkCastClassSpecial #define JIT_IsInstanceOfClass JIT_IsInstanceOfClass #define JIT_ChkCastInterface JIT_ChkCastInterface #define JIT_IsInstanceOfInterface JIT_IsInstanceOfInterface - #endif // FEATURE_PAL #define JIT_Stelem_Ref JIT_Stelem_Ref diff --git a/src/vm/appdomain.cpp b/src/vm/appdomain.cpp index 7b0da7f5a221..edcc3df1d716 100644 --- a/src/vm/appdomain.cpp +++ b/src/vm/appdomain.cpp @@ -2406,10 +2406,6 @@ void SystemDomain::Init() _ASSERTE(curCtx->GetDomain() != NULL); #endif -#ifdef _DEBUG - g_fVerifierOff = g_pConfig->IsVerifierOff(); -#endif - #ifdef FEATURE_PREJIT if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_ZapDisable) != 0) g_fAllowNativeImages = false; diff --git a/src/vm/arm/asmconstants.h b/src/vm/arm/asmconstants.h index 704fa28556e8..ab3d16b9a9f4 100644 --- a/src/vm/arm/asmconstants.h +++ b/src/vm/arm/asmconstants.h @@ -256,9 +256,6 @@ ASMCONSTANTS_C_ASSERT(VASigCookie__pNDirectILStub == offsetof(VASigCookie, pNDir #define CONTEXT_Pc 0x040 ASMCONSTANTS_C_ASSERT(CONTEXT_Pc == offsetof(T_CONTEXT,Pc)) -#define TLS_GETTER_MAX_SIZE_ASM 0x10 -ASMCONSTANTS_C_ASSERT(TLS_GETTER_MAX_SIZE_ASM == TLS_GETTER_MAX_SIZE) - #define CallDescrData__pSrc 0x00 #define CallDescrData__numStackSlots 0x04 #define CallDescrData__pArgumentRegisters 0x08 diff --git a/src/vm/arm/asmhelpers.asm b/src/vm/arm/asmhelpers.asm index e5fd41a513b9..4d21d074628b 100644 --- a/src/vm/arm/asmhelpers.asm +++ b/src/vm/arm/asmhelpers.asm @@ -1481,7 +1481,6 @@ stackProbe_loop NESTED_END -#ifdef FEATURE_CORECLR ; ; JIT Static access helpers for single appdomain case ; @@ -1543,7 +1542,6 @@ CallCppHelper3 bx lr LEAF_END -#endif ; ------------------------------------------------------------------ ; __declspec(naked) void F_CALL_CONV JIT_Stelem_Ref(PtrArray* array, unsigned idx, Object* val) diff --git a/src/vm/arm/cgencpu.h b/src/vm/arm/cgencpu.h index 2a369d8f029c..8da2b2b3ccbe 100644 --- a/src/vm/arm/cgencpu.h +++ b/src/vm/arm/cgencpu.h @@ -83,9 +83,6 @@ EXTERN_C void setFPReturn(int fpSize, INT64 retVal); #define CALLDESCR_ARGREGS 1 // CallDescrWorker has ArgumentRegister parameter #define CALLDESCR_FPARGREGS 1 // CallDescrWorker has FloatArgumentRegisters parameter -// Max size of optimized TLS helpers -#define TLS_GETTER_MAX_SIZE 0x10 - // Given a return address retrieved during stackwalk, // this is the offset by which it should be decremented to arrive at the callsite. #define STACKWALK_CONTROLPC_ADJUST_OFFSET 2 @@ -552,7 +549,7 @@ class StubLinkerCPU : public StubLinker ThumbEmitJumpRegister(thumbRegLr); } - void ThumbEmitGetThread(TLSACCESSMODE mode, ThumbReg dest); + void ThumbEmitGetThread(ThumbReg dest); void ThumbEmitNop() { @@ -1056,19 +1053,15 @@ inline BOOL ClrFlushInstructionCache(LPCVOID pCodeAddr, size_t sizeOfCode) #endif } -#ifndef FEATURE_IMPLICIT_TLS // // JIT HELPER ALIASING FOR PORTABILITY. // // Create alias for optimized implementations of helpers provided on this platform // -// optimized static helpers -#define JIT_GetSharedGCStaticBase JIT_GetSharedGCStaticBase_InlineGetAppDomain -#define JIT_GetSharedNonGCStaticBase JIT_GetSharedNonGCStaticBase_InlineGetAppDomain -#define JIT_GetSharedGCStaticBaseNoCtor JIT_GetSharedGCStaticBaseNoCtor_InlineGetAppDomain -#define JIT_GetSharedNonGCStaticBaseNoCtor JIT_GetSharedNonGCStaticBaseNoCtor_InlineGetAppDomain - -#endif +#define JIT_GetSharedGCStaticBase JIT_GetSharedGCStaticBase_SingleAppDomain +#define JIT_GetSharedNonGCStaticBase JIT_GetSharedNonGCStaticBase_SingleAppDomain +#define JIT_GetSharedGCStaticBaseNoCtor JIT_GetSharedGCStaticBaseNoCtor_SingleAppDomain +#define JIT_GetSharedNonGCStaticBaseNoCtor JIT_GetSharedNonGCStaticBaseNoCtor_SingleAppDomain #ifndef FEATURE_PAL #define JIT_Stelem_Ref JIT_Stelem_Ref diff --git a/src/vm/arm/patchedcode.S b/src/vm/arm/patchedcode.S index 9335fe65fcdb..43adca25c82b 100644 --- a/src/vm/arm/patchedcode.S +++ b/src/vm/arm/patchedcode.S @@ -19,22 +19,6 @@ bx lr LEAF_END JIT_PatchedCodeStart, _TEXT -// ------------------------------------------------------------------ -// Optimized TLS getters - - LEAF_ENTRY GetTLSDummy, _TEXT - mov r0, #0 - bx lr - LEAF_END GetTLSDummy, _TEXT - - .align 4 - LEAF_ENTRY ClrFlsGetBlock, _TEXT - // This will be overwritten at runtime with optimized ClrFlsGetBlock implementation - b C_FUNC(GetTLSDummy) - // Just allocate space that will be filled in at runtime - .space (TLS_GETTER_MAX_SIZE_ASM - 2) - LEAF_END ClrFlsGetBlock, _TEXT - // ------------------------------------------------------------------ // GC write barrier support. // diff --git a/src/vm/arm/patchedcode.asm b/src/vm/arm/patchedcode.asm index 9fdd60961d81..c7e2322e38da 100644 --- a/src/vm/arm/patchedcode.asm +++ b/src/vm/arm/patchedcode.asm @@ -13,52 +13,6 @@ #include "asmmacros.h" - SETALIAS JIT_Box,?JIT_Box@@YAPAVObject@@PAUCORINFO_CLASS_STRUCT_@@PAX@Z - SETALIAS JIT_New, ?JIT_New@@YAPAVObject@@PAUCORINFO_CLASS_STRUCT_@@@Z - SETALIAS JIT_Box, ?JIT_Box@@YAPAVObject@@PAUCORINFO_CLASS_STRUCT_@@PAX@Z - SETALIAS FramedAllocateString, ?FramedAllocateString@@YAPAVStringObject@@K@Z - SETALIAS g_pStringClass, ?g_pStringClass@@3PAVMethodTable@@A - SETALIAS JIT_NewArr1, ?JIT_NewArr1@@YAPAVObject@@PAUCORINFO_CLASS_STRUCT_@@H@Z - SETALIAS CopyValueClassUnchecked, ?CopyValueClassUnchecked@@YAXPAX0PAVMethodTable@@@Z - - IMPORT $JIT_New - IMPORT $JIT_Box - IMPORT $FramedAllocateString - IMPORT $g_pStringClass - IMPORT $JIT_NewArr1 - IMPORT $CopyValueClassUnchecked - IMPORT SetAppDomainInObject - - - IMPORT JIT_GetSharedNonGCStaticBase_Helper - IMPORT JIT_GetSharedGCStaticBase_Helper - - - EXPORT JIT_TrialAllocSFastMP_InlineGetThread__PatchTLSOffset - EXPORT JIT_BoxFastMP_InlineGetThread__PatchTLSOffset - EXPORT AllocateStringFastMP_InlineGetThread__PatchTLSOffset - EXPORT JIT_NewArr1VC_MP_InlineGetThread__PatchTLSOffset - EXPORT JIT_NewArr1OBJ_MP_InlineGetThread__PatchTLSOffset - - EXPORT JIT_GetSharedNonGCStaticBase__PatchTLSLabel - EXPORT JIT_GetSharedNonGCStaticBaseNoCtor__PatchTLSLabel - EXPORT JIT_GetSharedGCStaticBase__PatchTLSLabel - EXPORT JIT_GetSharedGCStaticBaseNoCtor__PatchTLSLabel - - MACRO - PATCHABLE_INLINE_GETTHREAD $reg, $label -$label - mrc p15, 0, $reg, c13, c0, 2 - ldr $reg, [$reg, #0xe10] - MEND - - - MACRO - PATCHABLE_INLINE_GETAPPDOMAIN $reg, $label -$label - mrc p15, 0, $reg, c13, c0, 2 - ldr $reg, [$reg, #0xe10] - MEND TEXTAREA @@ -69,38 +23,6 @@ $label bx lr LEAF_END -; ------------------------------------------------------------------ -; Optimized TLS getters - - ALIGN 4 - LEAF_ENTRY GetThread - ; This will be overwritten at runtime with optimized GetThread implementation - b GetTLSDummy - ; Just allocate space that will be filled in at runtime - SPACE (TLS_GETTER_MAX_SIZE_ASM - 2) - LEAF_END - - ALIGN 4 - LEAF_ENTRY GetAppDomain - ; This will be overwritten at runtime with optimized GetThread implementation - b GetTLSDummy - ; Just allocate space that will be filled in at runtime - SPACE (TLS_GETTER_MAX_SIZE_ASM - 2) - LEAF_END - - LEAF_ENTRY GetTLSDummy - mov r0, #0 - bx lr - LEAF_END - - ALIGN 4 - LEAF_ENTRY ClrFlsGetBlock - ; This will be overwritten at runtime with optimized ClrFlsGetBlock implementation - b GetTLSDummy - ; Just allocate space that will be filled in at runtime - SPACE (TLS_GETTER_MAX_SIZE_ASM - 2) - LEAF_END - ; ------------------------------------------------------------------ ; GC write barrier support. ; @@ -134,443 +56,6 @@ $label bx lr LEAF_END -; JIT Allocation helpers when TLS Index for Thread is low enough for fast helpers - -;--------------------------------------------------------------------------- -; IN: r0: MethodTable* -;; OUT: r0: new object - - LEAF_ENTRY JIT_TrialAllocSFastMP_InlineGetThread - - ;get object size - ldr r1, [r0, #MethodTable__m_BaseSize] - - ; m_BaseSize is guaranteed to be a multiple of 4. - - ;getThread - PATCHABLE_INLINE_GETTHREAD r12, JIT_TrialAllocSFastMP_InlineGetThread__PatchTLSOffset - - ;load current allocation pointers - ldr r2, [r12, #Thread__m_alloc_context__alloc_limit] - ldr r3, [r12, #Thread__m_alloc_context__alloc_ptr] - - ;add object size to current pointer - add r1, r3 - - ;if beyond the limit call c++ method - cmp r1, r2 - bhi AllocFailed - - ;r1 is the new alloc_ptr and r3 has object address - ;update the alloc_ptr in Thread - str r1, [r12, #Thread__m_alloc_context__alloc_ptr] - - ;write methodTable in object - str r0, [r3] - - ;return object in r0 - mov r0, r3 - -#ifdef _DEBUG - ; Tail call to a helper that will set the current AppDomain index into the object header and then - ; return the object pointer back to our original caller. - b SetAppDomainInObject -#else - ;return - bx lr -#endif - -AllocFailed - b $JIT_New - LEAF_END - - -;--------------------------------------------------------------------------- -; HCIMPL2(Object*, JIT_Box, CORINFO_CLASS_HANDLE type, void* unboxedData) -; IN: r0: MethodTable* -; IN: r1: data pointer -;; OUT: r0: new object - - LEAF_ENTRY JIT_BoxFastMP_InlineGetThread - - ldr r2, [r0, #MethodTable__m_pWriteableData] - - ;Check whether the class has been initialized - ldr r2, [r2, #MethodTableWriteableData__m_dwFlags] - cmp r2, #MethodTableWriteableData__enum_flag_Unrestored - bne ClassNotInited - - ; Check whether the object contains pointers - ldr r3, [r0, #MethodTable__m_dwFlags] - cmp r3, #MethodTable__enum_flag_ContainsPointers - bne ContainsPointers - - ldr r2, [r0, #MethodTable__m_BaseSize] - - ;m_BaseSize is guranteed to be a multiple of 4 - - ;GetThread - PATCHABLE_INLINE_GETTHREAD r12, JIT_BoxFastMP_InlineGetThread__PatchTLSOffset - - ldr r3, [r12, #Thread__m_alloc_context__alloc_ptr] - add r3, r2 - - ldr r2, [r12, #Thread__m_alloc_context__alloc_limit] - - cmp r3, r2 - bhi AllocFailed2 - - ldr r2, [r12, #Thread__m_alloc_context__alloc_ptr] - - ;advance alloc_ptr in Thread - str r3, [r12, #Thread__m_alloc_context__alloc_ptr] - - ;write methodtable* in the object - str r0, [r2] - - ;copy the contents of value type in the object - - ldr r3, [r0, #MethodTable__m_BaseSize] - sub r3, #0xc - - ;r3 = no of bytes to copy - - ;move address of object to return register - mov r0, r2 - - ;advance r2 to skip methodtable location - add r2, #4 - -CopyLoop - ldr r12, [r1, r3] - str r12, [r2, r3] - sub r3, #4 - bne CopyLoop - -#ifdef _DEBUG - ; Tail call to a helper that will set the current AppDomain index into the object header and then - ; return the object pointer back to our original caller. - b SetAppDomainInObject -#else - ;return - bx lr -#endif - -ContainsPointers -ClassNotInited -AllocFailed2 - b $JIT_Box - LEAF_END - - -;--------------------------------------------------------------------------- -; IN: r0: number of characters to allocate -;; OUT: r0: address of newly allocated string - - LEAF_ENTRY AllocateStringFastMP_InlineGetThread - - ; Instead of doing elaborate overflow checks, we just limit the number of elements to - ; MAX_FAST_ALLOCATE_STRING_SIZE. This is picked (in asmconstants.h) to avoid any possibility of - ; overflow and to ensure we never try to allocate anything here that really should go on the large - ; object heap instead. Additionally the size has been selected so that it will encode into an - ; immediate in a single cmp instruction. - - cmp r0, #MAX_FAST_ALLOCATE_STRING_SIZE - bhs OversizedString - - ; Calculate total string size: Align(base size + (characters * 2), 4). - mov r1, #(SIZEOF__BaseStringObject + 3) ; r1 == string base size + 3 for alignment round up - add r1, r1, r0, lsl #1 ; r1 += characters * 2 - bic r1, r1, #3 ; r1 &= ~3; round size to multiple of 4 - - ;GetThread - PATCHABLE_INLINE_GETTHREAD r12, AllocateStringFastMP_InlineGetThread__PatchTLSOffset - ldr r2, [r12, #Thread__m_alloc_context__alloc_limit] - ldr r3, [r12, #Thread__m_alloc_context__alloc_ptr] - - add r1, r3 - cmp r1, r2 - bhi AllocFailed3 - - ;can allocate - - ;advance alloc_ptr - str r1, [r12, #Thread__m_alloc_context__alloc_ptr] - - ; Write MethodTable pointer into new object. - ldr r1, =$g_pStringClass - ldr r1, [r1] - str r1, [r3] - - ; Write string length into new object. - str r0, [r3, #StringObject__m_StringLength] - - ;prepare to return new object address - mov r0, r3 - -#ifdef _DEBUG - ; Tail call to a helper that will set the current AppDomain index into the object header and then - ; return the object pointer back to our original caller. - b SetAppDomainInObject -#else - ;return - bx lr -#endif - - -OversizedString -AllocFailed3 - b $FramedAllocateString - - LEAF_END - - -; HCIMPL2(Object*, JIT_NewArr1VC_MP_InlineGetThread, CORINFO_CLASS_HANDLE arrayMT, INT_PTR size) -;--------------------------------------------------------------------------- -; IN: r0: a (shared) array method table, which contains the element type. -; IN: r1: number of array elements -;; OUT: r0: address of newly allocated object - - LEAF_ENTRY JIT_NewArr1VC_MP_InlineGetThread - - ; Do a conservative check here for number of elements. - ; This is to avoid overflow while doing the calculations. We don't - ; have to worry about "large" objects, since the allocation quantum is never big enough for - ; LARGE_OBJECT_SIZE. - - ; For Value Classes, this needs to be < (max_value_in_4byte - size_of_base_array)/(max_size_of_each_element) - ; This evaluates to (2^32-1 - 0xc)/2^16 - - ; Additionally the constant has been chosen such that it can be encoded in a - ; single Thumb2 CMP instruction. - - cmp r1, #MAX_FAST_ALLOCATE_ARRAY_VC_SIZE - bhs OverSizedArray3 - - ;get element size - stored in low 16bits of m_dwFlags - ldrh r12, [r0, #MethodTable__m_dwFlags] - - ; getting size of object to allocate - - ; multiply number of elements with size of each element - mul r2, r12, r1 - - ; add the base array size and 3 to align total bytes at 4 byte boundary - add r2, r2, #SIZEOF__ArrayOfValueType + 3 - bic r2, #3 - - ;GetThread - PATCHABLE_INLINE_GETTHREAD r12, JIT_NewArr1VC_MP_InlineGetThread__PatchTLSOffset - ldr r3, [r12, #Thread__m_alloc_context__alloc_ptr] - - add r3, r2 - - ldr r2, [r12, #Thread__m_alloc_context__alloc_limit] - - cmp r3, r2 - bhi AllocFailed6 - - ; can allocate - - ;r2 = address of new object - ldr r2, [r12, #Thread__m_alloc_context__alloc_ptr] - - ;update pointer in allocation context - str r3, [r12, #Thread__m_alloc_context__alloc_ptr] - - ;store number of elements - str r1, [r2, #ArrayBase__m_NumComponents] - - ;store methodtable - str r0, [r2] - - ;copy return value - mov r0, r2 - -#ifdef _DEBUG - ; Tail call to a helper that will set the current AppDomain index into the object header and then - ; return the object pointer back to our original caller. - b SetAppDomainInObject -#else - ;return - bx lr -#endif - - - -AllocFailed6 -OverSizedArray3 - b $JIT_NewArr1 - - LEAF_END - - - -; HCIMPL2(Object*, JIT_NewArr1OBJ_MP_InlineGetThread, CORINFO_CLASS_HANDLE arrayMT, INT_PTR size) -;--------------------------------------------------------------------------- -; IN: r0: a (shared) array method table, which contains the element type. -; IN: r1: number of array elements -;; OUT: r0: address of newly allocated object - - LEAF_ENTRY JIT_NewArr1OBJ_MP_InlineGetThread - - cmp r1, #MAX_FAST_ALLOCATE_ARRAY_OBJECTREF_SIZE - bhs OverSizedArray - - mov r2, #SIZEOF__ArrayOfObjectRef - add r2, r2, r1, lsl #2 - - ;r2 will be a multiple of 4 - - - ;GetThread - PATCHABLE_INLINE_GETTHREAD r12, JIT_NewArr1OBJ_MP_InlineGetThread__PatchTLSOffset - ldr r3, [r12, #Thread__m_alloc_context__alloc_ptr] - - add r3, r2 - - ldr r2, [r12, #Thread__m_alloc_context__alloc_limit] - - cmp r3, r2 - bhi AllocFailed4 - - ;can allocate - - ;r2 = address of new object - ldr r2, [r12, #Thread__m_alloc_context__alloc_ptr] - - ;update pointer in allocation context - str r3, [r12, #Thread__m_alloc_context__alloc_ptr] - - ;store number of elements - str r1, [r2, #ArrayBase__m_NumComponents] - - ;store methodtable - str r0, [r2] - - ;copy return value - mov r0, r2 - -#ifdef _DEBUG - ; Tail call to a helper that will set the current AppDomain index into the object header and then - ; return the object pointer back to our original caller. - b SetAppDomainInObject -#else - ;return - bx lr -#endif - -OverSizedArray -AllocFailed4 - b $JIT_NewArr1 - LEAF_END - -; -; JIT Static access helpers when TLS Index for AppDomain is low enough for fast helpers -; - -; ------------------------------------------------------------------ -; void* JIT_GetSharedNonGCStaticBase(SIZE_T moduleDomainID, DWORD dwClassDomainID) - - LEAF_ENTRY JIT_GetSharedNonGCStaticBase_InlineGetAppDomain - ; Check if r0 (moduleDomainID) is not a moduleID - tst r0, #1 - beq HaveLocalModule1 - - PATCHABLE_INLINE_GETAPPDOMAIN r2, JIT_GetSharedNonGCStaticBase__PatchTLSLabel - - ; Get the LocalModule, r0 will always be odd, so: r0 * 2 - 2 <=> (r0 >> 1) * 4 - ldr r2, [r2 , #AppDomain__m_sDomainLocalBlock + DomainLocalBlock__m_pModuleSlots] - add r2, r2, r0, LSL #1 - ldr r0, [r2, #-2] - -HaveLocalModule1 - ; If class is not initialized, bail to C++ helper - add r2, r0, #DomainLocalModule__m_pDataBlob - ldrb r2, [r2, r1] - tst r2, #1 - beq CallHelper1 - - bx lr - -CallHelper1 - ; Tail call JIT_GetSharedNonGCStaticBase_Helper - b JIT_GetSharedNonGCStaticBase_Helper - LEAF_END - - -; ------------------------------------------------------------------ -; void* JIT_GetSharedNonGCStaticBaseNoCtor(SIZE_T moduleDomainID, DWORD dwClassDomainID) - - LEAF_ENTRY JIT_GetSharedNonGCStaticBaseNoCtor_InlineGetAppDomain - ; Check if r0 (moduleDomainID) is not a moduleID - tst r0, #1 - beq HaveLocalModule2 - - PATCHABLE_INLINE_GETAPPDOMAIN r2, JIT_GetSharedNonGCStaticBaseNoCtor__PatchTLSLabel - - ; Get the LocalModule, r0 will always be odd, so: r0 * 2 - 2 <=> (r0 >> 1) * 4 - ldr r2, [r2 , #AppDomain__m_sDomainLocalBlock + DomainLocalBlock__m_pModuleSlots] - add r2, r2, r0, LSL #1 - ldr r0, [r2, #-2] - - -HaveLocalModule2 - bx lr - LEAF_END - - -; ------------------------------------------------------------------ -; void* JIT_GetSharedGCStaticBase(SIZE_T moduleDomainID, DWORD dwClassDomainID) - - LEAF_ENTRY JIT_GetSharedGCStaticBase_InlineGetAppDomain - ; Check if r0 (moduleDomainID) is not a moduleID - tst r0, #1 - beq HaveLocalModule3 - - PATCHABLE_INLINE_GETAPPDOMAIN r2, JIT_GetSharedGCStaticBase__PatchTLSLabel - - ; Get the LocalModule, r0 will always be odd, so: r0 * 2 - 2 <=> (r0 >> 1) * 4 - ldr r2, [r2 , #AppDomain__m_sDomainLocalBlock + DomainLocalBlock__m_pModuleSlots] - add r2, r2, r0, LSL #1 - ldr r0, [r2, #-2] - -HaveLocalModule3 - ; If class is not initialized, bail to C++ helper - add r2, r0, #DomainLocalModule__m_pDataBlob - ldrb r2, [r2, r1] - tst r2, #1 - beq CallHelper3 - - ldr r0, [r0, #DomainLocalModule__m_pGCStatics] - bx lr - -CallHelper3 - ; Tail call Jit_GetSharedGCStaticBase_Helper - b JIT_GetSharedGCStaticBase_Helper - LEAF_END - - -; ------------------------------------------------------------------ -; void* JIT_GetSharedGCStaticBaseNoCtor(SIZE_T moduleDomainID, DWORD dwClassDomainID) - - LEAF_ENTRY JIT_GetSharedGCStaticBaseNoCtor_InlineGetAppDomain - ; Check if r0 (moduleDomainID) is not a moduleID - tst r0, #1 - beq HaveLocalModule4 - - PATCHABLE_INLINE_GETAPPDOMAIN r2, JIT_GetSharedGCStaticBaseNoCtor__PatchTLSLabel - - ; Get the LocalModule, r0 will always be odd, so: r0 * 2 - 2 <=> (r0 >> 1) * 4 - ldr r2, [r2 , #AppDomain__m_sDomainLocalBlock + DomainLocalBlock__m_pModuleSlots] - add r2, r2, r0, LSL #1 - ldr r0, [r2, #-2] - -HaveLocalModule4 - ldr r0, [r0, #DomainLocalModule__m_pGCStatics] - bx lr - LEAF_END - ; ------------------------------------------------------------------ ; End of the writeable code region LEAF_ENTRY JIT_PatchedCodeLast diff --git a/src/vm/arm/stubs.cpp b/src/vm/arm/stubs.cpp index 6d9b3d0dca68..e0574762b1b8 100644 --- a/src/vm/arm/stubs.cpp +++ b/src/vm/arm/stubs.cpp @@ -1403,30 +1403,21 @@ Stub *GenerateInitPInvokeFrameHelper() ThumbReg regThread = ThumbReg(5); ThumbReg regScratch = ThumbReg(6); -#ifdef FEATURE_IMPLICIT_TLS - TLSACCESSMODE mode = TLSACCESS_GENERIC; -#else - TLSACCESSMODE mode = GetTLSAccessMode(GetThreadTLSIndex()); -#endif +#ifdef FEATURE_PAL + // Erect frame to perform call to GetThread + psl->ThumbEmitProlog(1, sizeof(ArgumentRegisters), FALSE); // Save r4 for aligned stack + // Save argument registers around the GetThread call. Don't bother with using ldm/stm since this inefficient path anyway. + for (int reg = 0; reg < 4; reg++) + psl->ThumbEmitStoreRegIndirect(ThumbReg(reg), thumbRegSp, offsetof(ArgumentRegisters, r[reg])); +#endif - if (mode == TLSACCESS_GENERIC) - { - // Erect frame to perform call to GetThread - psl->ThumbEmitProlog(1, sizeof(ArgumentRegisters), FALSE); // Save r4 for aligned stack - - // Save argument registers around the GetThread call. Don't bother with using ldm/stm since this inefficient path anyway. - for (int reg = 0; reg < 4; reg++) - psl->ThumbEmitStoreRegIndirect(ThumbReg(reg), thumbRegSp, offsetof(ArgumentRegisters, r[reg])); - } - - psl->ThumbEmitGetThread(mode, regThread); + psl->ThumbEmitGetThread(regThread); - if (mode == TLSACCESS_GENERIC) - { - for (int reg = 0; reg < 4; reg++) - psl->ThumbEmitLoadRegIndirect(ThumbReg(reg), thumbRegSp, offsetof(ArgumentRegisters, r[reg])); - } +#ifdef FEATURE_PAL + for (int reg = 0; reg < 4; reg++) + psl->ThumbEmitLoadRegIndirect(ThumbReg(reg), thumbRegSp, offsetof(ArgumentRegisters, r[reg])); +#endif // mov [regFrame + FrameInfo.offsetOfGSCookie], GetProcessGSCookie() psl->ThumbEmitMovConstant(regScratch, GetProcessGSCookie()); @@ -1448,82 +1439,36 @@ Stub *GenerateInitPInvokeFrameHelper() psl->ThumbEmitMovConstant(regScratch, 0); psl->ThumbEmitStoreRegIndirect(regScratch, regFrame, FrameInfo.offsetOfReturnAddress - negSpace); - if (mode == TLSACCESS_GENERIC) - { - DWORD cbSavedRegs = sizeof(ArgumentRegisters) + 2 * 4; // r0-r3, r4, lr - psl->ThumbEmitAdd(regScratch, thumbRegSp, cbSavedRegs); - psl->ThumbEmitStoreRegIndirect(regScratch, regFrame, FrameInfo.offsetOfCallSiteSP - negSpace); - } - else - { - // str SP, [regFrame + FrameInfo.offsetOfCallSiteSP] - psl->ThumbEmitStoreRegIndirect(thumbRegSp, regFrame, FrameInfo.offsetOfCallSiteSP - negSpace); - } +#ifdef FEATURE_PAL + DWORD cbSavedRegs = sizeof(ArgumentRegisters) + 2 * 4; // r0-r3, r4, lr + psl->ThumbEmitAdd(regScratch, thumbRegSp, cbSavedRegs); + psl->ThumbEmitStoreRegIndirect(regScratch, regFrame, FrameInfo.offsetOfCallSiteSP - negSpace); +#else + // str SP, [regFrame + FrameInfo.offsetOfCallSiteSP] + psl->ThumbEmitStoreRegIndirect(thumbRegSp, regFrame, FrameInfo.offsetOfCallSiteSP - negSpace); +#endif // mov [regThread + offsetof(Thread, m_pFrame)], regFrame psl->ThumbEmitStoreRegIndirect(regFrame, regThread, offsetof(Thread, m_pFrame)); // leave current Thread in R4 - if (mode == TLSACCESS_GENERIC) - { - psl->ThumbEmitEpilog(); - } - else - { - // Return. The return address has been restored into LR at this point. - // bx lr - psl->ThumbEmitJumpRegister(thumbRegLr); - } +#ifdef FEATURE_PAL + psl->ThumbEmitEpilog(); +#else + // Return. The return address has been restored into LR at this point. + // bx lr + psl->ThumbEmitJumpRegister(thumbRegLr); +#endif // A single process-wide stub that will never unload RETURN psl->Link(SystemDomain::GetGlobalLoaderAllocator()->GetStubHeap()); } -void StubLinkerCPU::ThumbEmitGetThread(TLSACCESSMODE mode, ThumbReg dest) +void StubLinkerCPU::ThumbEmitGetThread(ThumbReg dest) { -#ifndef FEATURE_IMPLICIT_TLS - DWORD idxThread = GetThreadTLSIndex(); - - if (mode != TLSACCESS_GENERIC) - { - // mrc p15, 0, dest, c13, c0, 2 - Emit16(0xee1d); - Emit16((WORD)(0x0f50 | (dest << 12))); +#ifdef FEATURE_PAL - if (mode == TLSACCESS_WNT) - { - // ldr dest, [dest, #(WINNT_TLS_OFFSET + (idxThread * sizeof(void*)))] - ThumbEmitLoadRegIndirect(dest, dest, offsetof(TEB, TlsSlots) + (idxThread * sizeof(void*))); - } - else - { - _ASSERTE(mode == TLSACCESS_WNT_HIGH); - - // ldr dest, [dest, #WINNT5_TLSEXPANSIONPTR_OFFSET] - ThumbEmitLoadRegIndirect(dest, dest, offsetof(TEB, TlsExpansionSlots)); - - // ldr dest, [dest + #(idxThread * 4)] - ThumbEmitLoadRegIndirect(dest, dest, (idxThread - TLS_MINIMUM_AVAILABLE) * sizeof(void*)); - } - } - else - { - ThumbEmitMovConstant(ThumbReg(0), idxThread); - -#pragma push_macro("TlsGetValue") -#undef TlsGetValue - ThumbEmitMovConstant(ThumbReg(1), (TADDR)TlsGetValue); -#pragma pop_macro("TlsGetValue") - - ThumbEmitCallRegister(ThumbReg(1)); - - if (dest != ThumbReg(0)) - { - ThumbEmitMovRegReg(dest, ThumbReg(0)); - } - } -#else ThumbEmitMovConstant(ThumbReg(0), (TADDR)GetThread); ThumbEmitCallRegister(ThumbReg(0)); @@ -1532,7 +1477,20 @@ void StubLinkerCPU::ThumbEmitGetThread(TLSACCESSMODE mode, ThumbReg dest) { ThumbEmitMovRegReg(dest, ThumbReg(0)); } -#endif + +#else // FEATURE_PAL + + // mrc p15, 0, dest, c13, c0, 2 + Emit16(0xee1d); + Emit16((WORD)(0x0f50 | (dest << 12))); + + ThumbEmitLoadRegIndirect(dest, dest, offsetof(TEB, ThreadLocalStoragePointer)); + + ThumbEmitLoadRegIndirect(dest, dest, sizeof(void *) * (g_TlsIndex & 0xFFFF)); + + ThumbEmitLoadRegIndirect(dest, dest, (g_TlsIndex & 0x7FFF0000) >> 16); + +#endif // FEATURE_PAL } #endif // CROSSGEN_COMPILE @@ -2537,110 +2495,12 @@ void UMEntryThunkCode::Poison() #ifndef CROSSGEN_COMPILE - -EXTERN_C DWORD gThreadTLSIndex; -EXTERN_C DWORD gAppDomainTLSIndex; - - -EXTERN_C Object* JIT_TrialAllocSFastMP_InlineGetThread(CORINFO_CLASS_HANDLE typeHnd_); -EXTERN_C Object* JIT_BoxFastMP_InlineGetThread (CORINFO_CLASS_HANDLE type, void* unboxedData); -EXTERN_C Object* AllocateStringFastMP_InlineGetThread (CLR_I4 cch); -EXTERN_C Object* JIT_NewArr1OBJ_MP_InlineGetThread (CORINFO_CLASS_HANDLE arrayTypeHnd_, INT_PTR size); -EXTERN_C Object* JIT_NewArr1VC_MP_InlineGetThread (CORINFO_CLASS_HANDLE arrayTypeHnd_, INT_PTR size); - -EXTERN_C void JIT_TrialAllocSFastMP_InlineGetThread__PatchTLSOffset(); -EXTERN_C void JIT_BoxFastMP_InlineGetThread__PatchTLSOffset(); -EXTERN_C void AllocateStringFastMP_InlineGetThread__PatchTLSOffset(); -EXTERN_C void JIT_NewArr1VC_MP_InlineGetThread__PatchTLSOffset(); -EXTERN_C void JIT_NewArr1OBJ_MP_InlineGetThread__PatchTLSOffset(); - extern "C" void STDCALL JIT_PatchedCodeStart(); extern "C" void STDCALL JIT_PatchedCodeLast(); -#ifndef FEATURE_IMPLICIT_TLS -static const LPVOID InlineGetThreadLocations[] = { - (PVOID)JIT_TrialAllocSFastMP_InlineGetThread__PatchTLSOffset, - (PVOID)JIT_BoxFastMP_InlineGetThread__PatchTLSOffset, - (PVOID)AllocateStringFastMP_InlineGetThread__PatchTLSOffset, - (PVOID)JIT_NewArr1VC_MP_InlineGetThread__PatchTLSOffset, - (PVOID)JIT_NewArr1OBJ_MP_InlineGetThread__PatchTLSOffset, -}; -#endif - -//EXTERN_C Object* JIT_TrialAllocSFastMP(CORINFO_CLASS_HANDLE typeHnd_); -Object* JIT_TrialAllocSFastMP(CORINFO_CLASS_HANDLE typeHnd_); -EXTERN_C Object* JIT_NewArr1OBJ_MP(CORINFO_CLASS_HANDLE arrayMT, INT_PTR size); -EXTERN_C Object* AllocateStringFastMP(CLR_I4 cch); -EXTERN_C Object* JIT_NewArr1VC_MP(CORINFO_CLASS_HANDLE arrayMT, INT_PTR size); -EXTERN_C Object* JIT_BoxFastMP(CORINFO_CLASS_HANDLE type, void* unboxedData); - - -EXTERN_C void JIT_GetSharedNonGCStaticBase__PatchTLSLabel(); -EXTERN_C void JIT_GetSharedNonGCStaticBaseNoCtor__PatchTLSLabel(); -EXTERN_C void JIT_GetSharedGCStaticBase__PatchTLSLabel(); -EXTERN_C void JIT_GetSharedGCStaticBaseNoCtor__PatchTLSLabel(); - -EXTERN_C void JIT_GetSharedNonGCStaticBase_SingleAppDomain(); -EXTERN_C void JIT_GetSharedNonGCStaticBaseNoCtor_SingleAppDomain(); -EXTERN_C void JIT_GetSharedGCStaticBase_SingleAppDomain(); -EXTERN_C void JIT_GetSharedGCStaticBaseNoCtor_SingleAppDomain(); - - -static const LPVOID InlineGetAppDomainLocations[] = { - (PVOID)JIT_GetSharedNonGCStaticBase__PatchTLSLabel, - (PVOID)JIT_GetSharedNonGCStaticBaseNoCtor__PatchTLSLabel, - (PVOID)JIT_GetSharedGCStaticBase__PatchTLSLabel, - (PVOID)JIT_GetSharedGCStaticBaseNoCtor__PatchTLSLabel -}; - -#ifndef FEATURE_IMPLICIT_TLS -void FixupInlineGetters(DWORD tlsSlot, const LPVOID * pLocations, int nLocations) -{ - STANDARD_VM_CONTRACT; - - for (int i=0; iAppDomainLeaks()) @@ -2655,54 +2515,14 @@ void InitJITHelpers1() #endif // _DEBUG )) { - _ASSERTE(GCHeapUtilities::UseThreadAllocationContexts()); - // If the TLS for Thread is low enough use the super-fast helpers - if (gThreadTLSIndex < TLS_MINIMUM_AVAILABLE) - { - SetJitHelperFunction(CORINFO_HELP_NEWSFAST, JIT_TrialAllocSFastMP_InlineGetThread); - SetJitHelperFunction(CORINFO_HELP_BOX, JIT_BoxFastMP_InlineGetThread); - SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, JIT_NewArr1VC_MP_InlineGetThread); - SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_MP_InlineGetThread); - ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateStringFastMP_InlineGetThread), ECall::FastAllocateString); - } - else - { -/* - SetJitHelperFunction(CORINFO_HELP_NEWSFAST, JIT_TrialAllocSFastMP); - SetJitHelperFunction(CORINFO_HELP_BOX, JIT_BoxFastMP); - SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, JIT_NewArr1VC_MP); - SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_MP); + SetJitHelperFunction(CORINFO_HELP_NEWSFAST, JIT_NewS_MP_FastPortable); + SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, JIT_NewArr1VC_MP_FastPortable); + SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_MP_FastPortable); - ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateStringFastMP), ECall::FastAllocateString); -*/ - } + ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateString_MP_FastPortable), ECall::FastAllocateString); } - - - if(IsSingleAppDomain()) - { - SetJitHelperFunction(CORINFO_HELP_GETSHARED_GCSTATIC_BASE, JIT_GetSharedGCStaticBase_SingleAppDomain); - SetJitHelperFunction(CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE, JIT_GetSharedNonGCStaticBase_SingleAppDomain); - SetJitHelperFunction(CORINFO_HELP_GETSHARED_GCSTATIC_BASE_NOCTOR, JIT_GetSharedGCStaticBaseNoCtor_SingleAppDomain); - SetJitHelperFunction(CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE_NOCTOR,JIT_GetSharedNonGCStaticBaseNoCtor_SingleAppDomain); - } - else - if (gAppDomainTLSIndex >= TLS_MINIMUM_AVAILABLE) - { - SetJitHelperFunction(CORINFO_HELP_GETSHARED_GCSTATIC_BASE, JIT_GetSharedGCStaticBase_Portable); - SetJitHelperFunction(CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE, JIT_GetSharedNonGCStaticBase_Portable); - SetJitHelperFunction(CORINFO_HELP_GETSHARED_GCSTATIC_BASE_NOCTOR, JIT_GetSharedGCStaticBaseNoCtor_Portable); - SetJitHelperFunction(CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE_NOCTOR,JIT_GetSharedNonGCStaticBaseNoCtor_Portable); - } -#endif -} - -extern "C" Object *SetAppDomainInObject(Object *pObject) -{ - pObject->SetAppDomain(); - return pObject; } // +64 stack-based arguments here @@ -3041,19 +2861,13 @@ void StubLinkerCPU::EmitStubLinkFrame(TADDR pFrameVptr, int offsetOfFrame, int o // str r6, [r4 + #offsetof(MulticastFrame, m_Next)] // str r4, [r5 + #offsetof(Thread, m_pFrame)] -#ifdef FEATURE_IMPLICIT_TLS - TLSACCESSMODE mode = TLSACCESS_GENERIC; -#else - TLSACCESSMODE mode = GetTLSAccessMode(GetThreadTLSIndex()); + ThumbEmitGetThread(ThumbReg(5)); +#ifdef FEATURE_PAL + // reload argument registers that could have been corrupted by the call + for (int reg = 0; reg < 4; reg++) + ThumbEmitLoadRegIndirect(ThumbReg(reg), ThumbReg(4), + offsetOfTransitionBlock + TransitionBlock::GetOffsetOfArgumentRegisters() + offsetof(ArgumentRegisters, r[reg])); #endif - ThumbEmitGetThread(mode, ThumbReg(5)); - if (mode == TLSACCESS_GENERIC) - { - // reload argument registers that could have been corrupted by the call - for (int reg = 0; reg < 4; reg++) - ThumbEmitLoadRegIndirect(ThumbReg(reg), ThumbReg(4), - offsetOfTransitionBlock + TransitionBlock::GetOffsetOfArgumentRegisters() + offsetof(ArgumentRegisters, r[reg])); - } ThumbEmitLoadRegIndirect(ThumbReg(6), ThumbReg(5), Thread::GetOffsetOfCurrentFrame()); ThumbEmitStoreRegIndirect(ThumbReg(6), ThumbReg(4), Frame::GetOffsetOfNextLink()); diff --git a/src/vm/arm64/cgencpu.h b/src/vm/arm64/cgencpu.h index 8abe4de6ab57..a168cdc162cb 100644 --- a/src/vm/arm64/cgencpu.h +++ b/src/vm/arm64/cgencpu.h @@ -84,7 +84,11 @@ typedef INT64 StackElemType; // // Create alias for optimized implementations of helpers provided on this platform // -// optimized static helpers +#define JIT_GetSharedGCStaticBase JIT_GetSharedGCStaticBase_SingleAppDomain +#define JIT_GetSharedNonGCStaticBase JIT_GetSharedNonGCStaticBase_SingleAppDomain +#define JIT_GetSharedGCStaticBaseNoCtor JIT_GetSharedGCStaticBaseNoCtor_SingleAppDomain +#define JIT_GetSharedNonGCStaticBaseNoCtor JIT_GetSharedNonGCStaticBaseNoCtor_SingleAppDomain + #define JIT_Stelem_Ref JIT_Stelem_Ref //********************************************************************** @@ -435,10 +439,8 @@ class StubLinkerCPU : public StubLinker void EmitUnboxMethodStub(MethodDesc* pRealMD); void EmitCallManagedMethod(MethodDesc *pMD, BOOL fTailCall); void EmitCallLabel(CodeLabel *target, BOOL fTailCall, BOOL fIndirect); - void EmitSecureDelegateInvoke(UINT_PTR hash); - static UINT_PTR HashMulticastInvoke(MetaSig* pSig); + void EmitShuffleThunk(struct ShuffleEntry *pShuffleEntryArray); - void EmitGetThreadInlined(IntReg Xt); #ifdef _DEBUG void EmitNop() { Emit32(0xD503201F); } diff --git a/src/vm/arm64/stubs.cpp b/src/vm/arm64/stubs.cpp index c57fca94b10a..3c56c382eae7 100644 --- a/src/vm/arm64/stubs.cpp +++ b/src/vm/arm64/stubs.cpp @@ -16,10 +16,6 @@ #include "jitinterface.h" #include "ecall.h" -EXTERN_C void JIT_GetSharedNonGCStaticBase_SingleAppDomain(); -EXTERN_C void JIT_GetSharedNonGCStaticBaseNoCtor_SingleAppDomain(); -EXTERN_C void JIT_GetSharedGCStaticBase_SingleAppDomain(); -EXTERN_C void JIT_GetSharedGCStaticBaseNoCtor_SingleAppDomain(); EXTERN_C void JIT_UpdateWriteBarrierState(bool skipEphemeralCheck); @@ -1114,14 +1110,6 @@ void InitJITHelpers1() } #endif - if(IsSingleAppDomain()) - { - SetJitHelperFunction(CORINFO_HELP_GETSHARED_GCSTATIC_BASE, JIT_GetSharedGCStaticBase_SingleAppDomain); - SetJitHelperFunction(CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE, JIT_GetSharedNonGCStaticBase_SingleAppDomain); - SetJitHelperFunction(CORINFO_HELP_GETSHARED_GCSTATIC_BASE_NOCTOR, JIT_GetSharedGCStaticBaseNoCtor_SingleAppDomain); - SetJitHelperFunction(CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE_NOCTOR,JIT_GetSharedNonGCStaticBaseNoCtor_SingleAppDomain); - } - JIT_UpdateWriteBarrierState(GCHeapUtilities::IsServerHeap()); } #ifndef FEATURE_PAL // TODO-ARM64-WINDOWS #13592 @@ -1846,33 +1834,6 @@ void StubLinkerCPU::EmitCallManagedMethod(MethodDesc *pMD, BOOL fTailCall) #ifndef CROSSGEN_COMPILE -EXTERN_C UINT32 _tls_index; -void StubLinkerCPU::EmitGetThreadInlined(IntReg Xt) -{ -#if defined(FEATURE_IMPLICIT_TLS) && !defined(FEATURE_PAL) - // Trashes x8. - IntReg X8 = IntReg(8); - _ASSERTE(Xt != X8); - - // Load the _tls_index - EmitLabelRef(NewExternalCodeLabel((LPVOID)&_tls_index), reinterpret_cast(gLoadFromLabelIF), X8); - - // Load Teb->ThreadLocalStoragePointer into x8 - EmitLoadStoreRegImm(eLOAD, Xt, IntReg(18), offsetof(_TEB, ThreadLocalStoragePointer)); - - // index it with _tls_index, i.e Teb->ThreadLocalStoragePointer[_tls_index]. - // This will give us the TLS section for the module on this thread's context - EmitLoadRegReg(Xt, Xt, X8, eLSL); - - // read the Thread* from TLS section - EmitAddImm(Xt, Xt, OFFSETOF__TLS__tls_CurrentThread); - EmitLoadStoreRegImm(eLOAD, Xt, Xt, 0); -#else - _ASSERTE(!"NYI:StubLinkerCPU::EmitGetThreadInlined"); -#endif - -} - void StubLinkerCPU::EmitUnboxMethodStub(MethodDesc *pMD) { _ASSERTE(!pMD->RequiresInstMethodDescArg()); diff --git a/src/vm/ceeload.cpp b/src/vm/ceeload.cpp index c64d9e9042d8..f5173196e365 100644 --- a/src/vm/ceeload.cpp +++ b/src/vm/ceeload.cpp @@ -10844,11 +10844,7 @@ void Module::LoadTokenTables() pEEInfo->emptyString = (CORINFO_Object **)StringObject::GetEmptyStringRefPtr(); } -#ifdef FEATURE_IMPLICIT_TLS pEEInfo->threadTlsIndex = TLS_OUT_OF_INDEXES; -#else - pEEInfo->threadTlsIndex = GetThreadTLSIndex(); -#endif pEEInfo->rvaStaticTlsIndex = NULL; #endif // CROSSGEN_COMPILE } diff --git a/src/vm/ceemain.cpp b/src/vm/ceemain.cpp index 44f5d8eb1d36..9dbe2b9dfff7 100644 --- a/src/vm/ceemain.cpp +++ b/src/vm/ceemain.cpp @@ -2714,17 +2714,7 @@ BOOL STDMETHODCALLTYPE EEDllMain( // TRUE on success, FALSE on error. , TRUE #endif ); -#ifdef FEATURE_IMPLICIT_TLS Thread* thread = GetThread(); -#else - // Don't use GetThread because perhaps we didn't initialize yet, or we - // have already shutdown the EE. Note that there is a race here. We - // might ask for TLS from a slot we just released. We are assuming that - // nobody re-allocates that same slot while we are doing this. It just - // isn't worth locking for such an obscure case. - DWORD tlsVal = GetThreadTLSIndex(); - Thread *thread = (tlsVal != (DWORD)-1)?(Thread *) UnsafeTlsGetValue(tlsVal):NULL; -#endif if (thread) { #ifdef FEATURE_COMINTEROP diff --git a/src/vm/ceemain.h b/src/vm/ceemain.h index ccf763ac80e3..9a14af54cad9 100644 --- a/src/vm/ceemain.h +++ b/src/vm/ceemain.h @@ -83,8 +83,6 @@ class CExecutionEngine : public IExecutionEngine, public IEEMemoryManager // Setup FLS simulation block, including ClrDebugState and StressLog. static void SetupTLSForThread(Thread *pThread); - static DWORD GetTlsIndex () {return TlsIndex;} - static LPVOID* GetTlsData(); static BOOL SetTlsData (void** ppTlsInfo); @@ -92,12 +90,6 @@ class CExecutionEngine : public IExecutionEngine, public IEEMemoryManager // private implementation: //*************************************************************************** private: - - // The debugger needs access to the TlsIndex so that we can read it from OOP. - friend class EEDbgInterfaceImpl; - - SVAL_DECL (DWORD, TlsIndex); - static PTLS_CALLBACK_FUNCTION Callbacks[MAX_PREDEFINED_TLS_SLOT]; //*************************************************************************** diff --git a/src/vm/corhost.cpp b/src/vm/corhost.cpp index f6ca34267a08..b1e5a8a0f36f 100644 --- a/src/vm/corhost.cpp +++ b/src/vm/corhost.cpp @@ -51,7 +51,6 @@ GVAL_IMPL_INIT(DWORD, g_fHostConfig, 0); -#ifdef FEATURE_IMPLICIT_TLS #ifndef __llvm__ EXTERN_C __declspec(thread) ThreadLocalInfo gCurrentThreadInfo; #else // !__llvm__ @@ -62,11 +61,6 @@ EXTERN_C UINT32 _tls_index; #else // FEATURE_PAL UINT32 _tls_index = 0; #endif // FEATURE_PAL -SVAL_IMPL_INIT(DWORD, CExecutionEngine, TlsIndex, _tls_index); -#else -SVAL_IMPL_INIT(DWORD, CExecutionEngine, TlsIndex, TLS_OUT_OF_INDEXES); -#endif - #if defined(FEATURE_WINDOWSPHONE) SVAL_IMPL_INIT(ECustomDumpFlavor, CCLRErrorReportingManager, g_ECustomDumpFlavor, DUMP_FLAVOR_Default); @@ -3168,7 +3162,6 @@ VOID WINAPI FlsCallback( #endif // HAS_FLS_SUPPORT -#ifdef FEATURE_IMPLICIT_TLS void** CExecutionEngine::GetTlsData() { LIMITED_METHOD_CONTRACT; @@ -3183,28 +3176,6 @@ BOOL CExecutionEngine::SetTlsData (void** ppTlsInfo) gCurrentThreadInfo.m_EETlsData = ppTlsInfo; return TRUE; } -#else -void** CExecutionEngine::GetTlsData() -{ - LIMITED_METHOD_CONTRACT; - - if (TlsIndex == TLS_OUT_OF_INDEXES) - return NULL; - - void **ppTlsData = (void **)UnsafeTlsGetValue(TlsIndex); - return ppTlsData; -} -BOOL CExecutionEngine::SetTlsData (void** ppTlsInfo) -{ - LIMITED_METHOD_CONTRACT; - - if (TlsIndex == TLS_OUT_OF_INDEXES) - return FALSE; - - return UnsafeTlsSetValue(TlsIndex, ppTlsInfo); -} - -#endif // FEATURE_IMPLICIT_TLS //--------------------------------------------------------------------------------------- // @@ -3295,25 +3266,6 @@ void **CExecutionEngine::CheckThreadState(DWORD slot, BOOL force) } #endif // HAS_FLS_SUPPORT -#ifndef FEATURE_IMPLICIT_TLS - // Ensure we have a TLS Index - if (TlsIndex == TLS_OUT_OF_INDEXES) - { - DWORD tryTlsIndex = UnsafeTlsAlloc(); - if (tryTlsIndex != TLS_OUT_OF_INDEXES) - { - if (FastInterlockCompareExchange((LONG*)&TlsIndex, tryTlsIndex, TLS_OUT_OF_INDEXES) != (LONG)TLS_OUT_OF_INDEXES) - { - UnsafeTlsFree(tryTlsIndex); - } - } - if (TlsIndex == TLS_OUT_OF_INDEXES) - { - COMPlusThrowOM(); - } - } -#endif // FEATURE_IMPLICIT_TLS - void** pTlsData = CExecutionEngine::GetTlsData(); BOOL fInTls = (pTlsData != NULL); diff --git a/src/vm/crossgencompile.cpp b/src/vm/crossgencompile.cpp index c4b9d3dfc3d0..411029becda8 100644 --- a/src/vm/crossgencompile.cpp +++ b/src/vm/crossgencompile.cpp @@ -72,27 +72,6 @@ BOOL Debug_IsLockedViaThreadSuspension() } #endif // _DEBUG -#if defined(FEATURE_MERGE_JIT_AND_ENGINE) && defined(FEATURE_IMPLICIT_TLS) -void* theJitTls; - -extern "C" -{ - -void* GetJitTls() -{ - LIMITED_METHOD_CONTRACT - - return theJitTls; -} -void SetJitTls(void* v) -{ - LIMITED_METHOD_CONTRACT - theJitTls = v; -} - -} -#endif - //--------------------------------------------------------------------------------------- // // All locks are nops because of there is always only one thread. diff --git a/src/vm/eedbginterface.h b/src/vm/eedbginterface.h index 8c8c44d3e2d6..241ef332e2e3 100644 --- a/src/vm/eedbginterface.h +++ b/src/vm/eedbginterface.h @@ -280,7 +280,6 @@ class EEDebugInterface virtual void GetRuntimeOffsets(SIZE_T *pTLSIndex, SIZE_T *pTLSIsSpecialIndex, SIZE_T *pTLSCantStopIndex, - SIZE_T *pTLSIndexOfPredefs, SIZE_T *pEEThreadStateOffset, SIZE_T *pEEThreadStateNCOffset, SIZE_T *pEEThreadPGCDisabledOffset, diff --git a/src/vm/eedbginterfaceimpl.cpp b/src/vm/eedbginterfaceimpl.cpp index ede82c778003..e9f59d25d286 100644 --- a/src/vm/eedbginterfaceimpl.cpp +++ b/src/vm/eedbginterfaceimpl.cpp @@ -1387,14 +1387,11 @@ void EEDbgInterfaceImpl::DisableTraceCall(Thread *thread) thread->DecrementTraceCallCount(); } -#ifdef FEATURE_IMPLICIT_TLS EXTERN_C UINT32 _tls_index; -#endif void EEDbgInterfaceImpl::GetRuntimeOffsets(SIZE_T *pTLSIndex, SIZE_T *pTLSIsSpecialIndex, SIZE_T *pTLSCantStopIndex, - SIZE_T* pTLSIndexOfPredefs, SIZE_T *pEEThreadStateOffset, SIZE_T *pEEThreadStateNCOffset, SIZE_T *pEEThreadPGCDisabledOffset, @@ -1417,7 +1414,6 @@ void EEDbgInterfaceImpl::GetRuntimeOffsets(SIZE_T *pTLSIndex, PRECONDITION(CheckPointer(pTLSIndex)); PRECONDITION(CheckPointer(pTLSIsSpecialIndex)); PRECONDITION(CheckPointer(pEEThreadStateOffset)); - PRECONDITION(CheckPointer(pTLSIndexOfPredefs)); PRECONDITION(CheckPointer(pEEThreadStateNCOffset)); PRECONDITION(CheckPointer(pEEThreadPGCDisabledOffset)); PRECONDITION(CheckPointer(pEEThreadPGCDisabledValue)); @@ -1433,14 +1429,9 @@ void EEDbgInterfaceImpl::GetRuntimeOffsets(SIZE_T *pTLSIndex, } CONTRACTL_END; -#ifdef FEATURE_IMPLICIT_TLS - *pTLSIndex = _tls_index; -#else - *pTLSIndex = GetThreadTLSIndex(); -#endif + *pTLSIndex = g_TlsIndex; *pTLSIsSpecialIndex = TlsIdx_ThreadType; *pTLSCantStopIndex = TlsIdx_CantStopCount; - *pTLSIndexOfPredefs = CExecutionEngine::TlsIndex; *pEEThreadStateOffset = Thread::GetOffsetOfState(); *pEEThreadStateNCOffset = Thread::GetOffsetOfStateNC(); *pEEThreadPGCDisabledOffset = Thread::GetOffsetOfGCFlag(); diff --git a/src/vm/eedbginterfaceimpl.h b/src/vm/eedbginterfaceimpl.h index 979c706fb2fb..7451246a21c2 100644 --- a/src/vm/eedbginterfaceimpl.h +++ b/src/vm/eedbginterfaceimpl.h @@ -272,7 +272,6 @@ class EEDbgInterfaceImpl : public EEDebugInterface void GetRuntimeOffsets(SIZE_T *pTLSIndex, SIZE_T *pTLSIsSpecialIndex, SIZE_T *pTLSCantStopIndex, - SIZE_T *pTLSIndexOfPredefs, SIZE_T *pEEThreadStateOffset, SIZE_T *pEEThreadStateNCOffset, SIZE_T *pEEThreadPGCDisabledOffset, diff --git a/src/vm/i386/asmconstants.h b/src/vm/i386/asmconstants.h index f7d5f709dcaa..aa420428f7a3 100644 --- a/src/vm/i386/asmconstants.h +++ b/src/vm/i386/asmconstants.h @@ -317,9 +317,6 @@ ASMCONSTANTS_C_ASSERT(ASM__VTABLE_SLOTS_PER_CHUNK == VTABLE_SLOTS_PER_CHUNK) #define ASM__VTABLE_SLOTS_PER_CHUNK_LOG2 3 ASMCONSTANTS_C_ASSERT(ASM__VTABLE_SLOTS_PER_CHUNK_LOG2 == VTABLE_SLOTS_PER_CHUNK_LOG2) -#define TLS_GETTER_MAX_SIZE_ASM DBG_FRE(0x20, 0x10) -ASMCONSTANTS_C_ASSERT(TLS_GETTER_MAX_SIZE_ASM == TLS_GETTER_MAX_SIZE) - #define JIT_TailCall_StackOffsetToFlags 0x08 #define CallDescrData__pSrc 0x00 diff --git a/src/vm/i386/asmhelpers.asm b/src/vm/i386/asmhelpers.asm index 9df33219ac97..17d521fb92a4 100644 --- a/src/vm/i386/asmhelpers.asm +++ b/src/vm/i386/asmhelpers.asm @@ -47,23 +47,10 @@ endif ; FEATURE_COMINTEROP EXTERN __alloca_probe:PROC EXTERN _NDirectImportWorker@4:PROC EXTERN _UMThunkStubRareDisableWorker@8:PROC -ifndef FEATURE_IMPLICIT_TLS -ifdef ENABLE_GET_THREAD_GENERIC_FULL_CHECK -; This is defined in C (threads.cpp) and enforces EE_THREAD_NOT_REQUIRED contracts -GetThreadGenericFullCheck EQU ?GetThreadGenericFullCheck@@YGPAVThread@@XZ -EXTERN GetThreadGenericFullCheck:PROC -endif ; ENABLE_GET_THREAD_GENERIC_FULL_CHECK - -EXTERN _gThreadTLSIndex:DWORD -EXTERN _gAppDomainTLSIndex:DWORD -endif ; FEATURE_IMPLICIT_TLS EXTERN _VarargPInvokeStubWorker@12:PROC EXTERN _GenericPInvokeCalliStubWorker@12:PROC -EXTERN _GetThread@0:PROC -EXTERN _GetAppDomain@0:PROC - ifdef MDA_SUPPORTED EXTERN _PInvokeStackImbalanceWorker@8:PROC endif @@ -716,56 +703,6 @@ doRet: FASTCALL_ENDFUNC HelperMethodFrameRestoreState -ifndef FEATURE_IMPLICIT_TLS -;--------------------------------------------------------------------------- -; Portable GetThread() function: used if no platform-specific optimizations apply. -; This is in assembly code because we count on edx not getting trashed on calls -; to this function. -;--------------------------------------------------------------------------- -; Thread* __stdcall GetThreadGeneric(void); -GetThreadGeneric PROC stdcall public USES ecx edx - -ifdef _DEBUG - cmp dword ptr [_gThreadTLSIndex], -1 - jnz @F - int 3 -@@: -endif -ifdef ENABLE_GET_THREAD_GENERIC_FULL_CHECK - ; non-PAL, debug-only GetThreadGeneric should defer to GetThreadGenericFullCheck - ; to do extra contract enforcement. (See GetThreadGenericFullCheck for details.) - ; This code is intentionally not added to asmhelper.s, as this enforcement is only - ; implemented for non-PAL builds. - call GetThreadGenericFullCheck -else - push dword ptr [_gThreadTLSIndex] - call dword ptr [__imp__TlsGetValue@4] -endif - ret -GetThreadGeneric ENDP - -;--------------------------------------------------------------------------- -; Portable GetAppdomain() function: used if no platform-specific optimizations apply. -; This is in assembly code because we count on edx not getting trashed on calls -; to this function. -;--------------------------------------------------------------------------- -; Appdomain* __stdcall GetAppDomainGeneric(void); -GetAppDomainGeneric PROC stdcall public USES ecx edx - -ifdef _DEBUG - cmp dword ptr [_gAppDomainTLSIndex], -1 - jnz @F - int 3 -@@: -endif - - push dword ptr [_gAppDomainTLSIndex] - call dword ptr [__imp__TlsGetValue@4] - ret -GetAppDomainGeneric ENDP -endif - - ifdef FEATURE_HIJACK ; A JITted method's return address was hijacked to return to us here. diff --git a/src/vm/i386/cgencpu.h b/src/vm/i386/cgencpu.h index 5360b3eb0eb4..ffdfb82b1422 100644 --- a/src/vm/i386/cgencpu.h +++ b/src/vm/i386/cgencpu.h @@ -108,14 +108,6 @@ BOOL Runtime_Test_For_SSE2(); #define ENREGISTERED_RETURNTYPE_INTEGER_MAXSIZE 4 #define CALLDESCR_ARGREGS 1 // CallDescrWorker has ArgumentRegister parameter -// Max size of patched TLS helpers -#ifdef _DEBUG -// Debug build needs extra space for last error trashing -#define TLS_GETTER_MAX_SIZE 0x20 -#else -#define TLS_GETTER_MAX_SIZE 0x10 -#endif - //======================================================================= // IMPORTANT: This value is used to figure out how much to allocate // for a fixed array of FieldMarshaler's. That means it must be at least @@ -558,6 +550,12 @@ inline BOOL ClrFlushInstructionCache(LPCVOID pCodeAddr, size_t sizeOfCode) return TRUE; } +// +// JIT HELPER ALIASING FOR PORTABILITY. +// +// Create alias for optimized implementations of helpers provided on this platform +// + // optimized static helpers generated dynamically at runtime // #define JIT_GetSharedGCStaticBase // #define JIT_GetSharedNonGCStaticBase @@ -573,4 +571,5 @@ inline BOOL ClrFlushInstructionCache(LPCVOID pCodeAddr, size_t sizeOfCode) #define JIT_NewCrossContext JIT_NewCrossContext #define JIT_Stelem_Ref JIT_Stelem_Ref #endif // FEATURE_PAL + #endif // __cgenx86_h__ diff --git a/src/vm/i386/cgenx86.cpp b/src/vm/i386/cgenx86.cpp index 3b523c9c2849..c91a716137df 100644 --- a/src/vm/i386/cgenx86.cpp +++ b/src/vm/i386/cgenx86.cpp @@ -1076,7 +1076,7 @@ Stub *GenerateInitPInvokeFrameHelper() unsigned negSpace = FrameInfo.offsetOfFrameVptr; // mov esi, GetThread() - psl->X86EmitCurrentThreadFetch(kESI, (1<X86EmitCurrentThreadFetch(kESI); // mov [edi + FrameInfo.offsetOfGSCookie], GetProcessGSCookie() psl->X86EmitOffsetModRM(0xc7, (X86Reg)0x0, kEDI, FrameInfo.offsetOfGSCookie - negSpace); diff --git a/src/vm/i386/jithelp.asm b/src/vm/i386/jithelp.asm index 85e824040a94..5d64b18c9424 100644 --- a/src/vm/i386/jithelp.asm +++ b/src/vm/i386/jithelp.asm @@ -2,11 +2,6 @@ ; The .NET Foundation licenses this file to you under the MIT license. ; See the LICENSE file in the project root for more information. -; ==++== -; - -; -; ==--== ; *********************************************************************** ; File: JIThelp.asm ; @@ -70,13 +65,11 @@ endif EXTERN _g_TailCallFrameVptr:DWORD EXTERN @JIT_FailFast@0:PROC EXTERN _s_gsCookie:DWORD +EXTERN _GetThread@0:PROC EXTERN @JITutil_IsInstanceOfInterface@8:PROC EXTERN @JITutil_ChkCastInterface@8:PROC EXTERN @JITutil_IsInstanceOfAny@8:PROC EXTERN @JITutil_ChkCastAny@8:PROC -ifdef FEATURE_IMPLICIT_TLS -EXTERN _GetThread@0:PROC -endif ifdef WRITE_BARRIER_CHECK ; Those global variables are always defined, but should be 0 for Server GC @@ -963,13 +956,15 @@ NewArgs equ 20 ; extra space is incremented as we push things on the stack along the way ExtraSpace = 0 - call _GetThread@0; eax = Thread* - push eax ; Thread* + push 0 ; Thread* ; save ArgumentRegisters push ecx push edx + call _GetThread@0; eax = Thread* + mov [esp + 8], eax + ExtraSpace = 12 ; pThread, ecx, edx ifdef FEATURE_HIJACK @@ -1247,44 +1242,8 @@ _JIT_PatchedCodeStart@0 proc public ret _JIT_PatchedCodeStart@0 endp -; -; Optimized TLS getters -; - - ALIGN 4 - -ifndef FEATURE_IMPLICIT_TLS -_GetThread@0 proc public - ; This will be overwritten at runtime with optimized GetThread implementation - jmp short _GetTLSDummy@0 - ; Just allocate space that will be filled in at runtime - db (TLS_GETTER_MAX_SIZE_ASM - 2) DUP (0CCh) -_GetThread@0 endp - - ALIGN 4 - -_GetAppDomain@0 proc public - ; This will be overwritten at runtime with optimized GetAppDomain implementation - jmp short _GetTLSDummy@0 - ; Just allocate space that will be filled in at runtime - db (TLS_GETTER_MAX_SIZE_ASM - 2) DUP (0CCh) -_GetAppDomain@0 endp - -_GetTLSDummy@0 proc public - xor eax,eax - ret -_GetTLSDummy@0 endp - ALIGN 4 -_ClrFlsGetBlock@0 proc public - ; This will be overwritten at runtime with optimized ClrFlsGetBlock implementation - jmp short _GetTLSDummy@0 - ; Just allocate space that will be filled in at runtime - db (TLS_GETTER_MAX_SIZE_ASM - 2) DUP (0CCh) -_ClrFlsGetBlock@0 endp -endif - ;********************************************************************** ; Write barriers generated at runtime diff --git a/src/vm/i386/jitinterfacex86.cpp b/src/vm/i386/jitinterfacex86.cpp index 8463b084ac76..1b470c95333f 100644 --- a/src/vm/i386/jitinterfacex86.cpp +++ b/src/vm/i386/jitinterfacex86.cpp @@ -539,7 +539,7 @@ void JIT_TrialAlloc::EmitCore(CPUSTUBLINKER *psl, CodeLabel *noLock, CodeLabel * && "EAX should contain size for allocation and it doesnt!!!"); // Fetch current thread into EDX, preserving EAX and ECX - psl->X86EmitCurrentThreadFetch(kEDX, (1<X86EmitCurrentThreadFetch(kEDX); // Try the allocation. @@ -1253,7 +1253,7 @@ FastPrimitiveArrayAllocatorFuncPtr fastPrimitiveArrayAllocator = UnframedAllocat // "init" should be the address of a routine which takes an argument of // the module domain ID, the class domain ID, and returns the static base pointer -void EmitFastGetSharedStaticBase(CPUSTUBLINKER *psl, CodeLabel *init, bool bCCtorCheck, bool bGCStatic, bool bSingleAppDomain) +void EmitFastGetSharedStaticBase(CPUSTUBLINKER *psl, CodeLabel *init, bool bCCtorCheck, bool bGCStatic) { STANDARD_VM_CONTRACT; @@ -1267,35 +1267,6 @@ void EmitFastGetSharedStaticBase(CPUSTUBLINKER *psl, CodeLabel *init, bool bCCto psl->Emit8(0x89); psl->Emit8(0xc8); - if(!bSingleAppDomain) - { - // Check tag - CodeLabel *cctorCheck = psl->NewCodeLabel(); - - - // test eax, 1 - psl->Emit8(0xa9); - psl->Emit32(1); - - // jz cctorCheck - psl->X86EmitCondJump(cctorCheck, X86CondCode::kJZ); - - // mov eax GetAppDomain() - psl->X86EmitCurrentAppDomainFetch(kEAX, (1<m_sDomainLocalBlock.m_pModuleSlots] - psl->X86EmitIndexRegLoad(kEAX, kEAX, (__int32) AppDomain::GetOffsetOfModuleSlotsPointer()); - - // Note: weird address arithmetic effectively does: - // shift over 1 to remove tag bit (which is always 1), then multiply by 4. - // mov eax [eax + ecx*2 - 2] - psl->X86EmitOp(0x8b, kEAX, kEAX, -2, kECX, 2); - - // cctorCheck: - psl->EmitLabel(cctorCheck); - - } - if (bCCtorCheck) { // test [eax + edx + offsetof(DomainLocalModule, m_pDataBlob], ClassInitFlags::INITIALIZED_FLAG // Is class inited @@ -1356,7 +1327,7 @@ void EmitFastGetSharedStaticBase(CPUSTUBLINKER *psl, CodeLabel *init, bool bCCto } -void *GenFastGetSharedStaticBase(bool bCheckCCtor, bool bGCStatic, bool bSingleAppDomain) +void *GenFastGetSharedStaticBase(bool bCheckCCtor, bool bGCStatic) { STANDARD_VM_CONTRACT; @@ -1372,7 +1343,7 @@ void *GenFastGetSharedStaticBase(bool bCheckCCtor, bool bGCStatic, bool bSingleA init = sl.NewExternalCodeLabel((LPVOID)JIT_GetSharedNonGCStaticBase); } - EmitFastGetSharedStaticBase(&sl, init, bCheckCCtor, bGCStatic, bSingleAppDomain); + EmitFastGetSharedStaticBase(&sl, init, bCheckCCtor, bGCStatic); Stub *pStub = sl.Link(SystemDomain::GetGlobalLoaderAllocator()->GetExecutableHeap()); @@ -1521,16 +1492,14 @@ void InitJITHelpers1() //UnframedAllocateString; } - bool bSingleAppDomain = IsSingleAppDomain(); - // Replace static helpers with faster assembly versions - pMethodAddresses[6] = GenFastGetSharedStaticBase(true, true, bSingleAppDomain); + pMethodAddresses[6] = GenFastGetSharedStaticBase(true, true); SetJitHelperFunction(CORINFO_HELP_GETSHARED_GCSTATIC_BASE, pMethodAddresses[6]); - pMethodAddresses[7] = GenFastGetSharedStaticBase(true, false, bSingleAppDomain); + pMethodAddresses[7] = GenFastGetSharedStaticBase(true, false); SetJitHelperFunction(CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE, pMethodAddresses[7]); - pMethodAddresses[8] = GenFastGetSharedStaticBase(false, true, bSingleAppDomain); + pMethodAddresses[8] = GenFastGetSharedStaticBase(false, true); SetJitHelperFunction(CORINFO_HELP_GETSHARED_GCSTATIC_BASE_NOCTOR, pMethodAddresses[8]); - pMethodAddresses[9] = GenFastGetSharedStaticBase(false, false, bSingleAppDomain); + pMethodAddresses[9] = GenFastGetSharedStaticBase(false, false); SetJitHelperFunction(CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE_NOCTOR, pMethodAddresses[9]); ETW::MethodLog::StubsInitialized(pMethodAddresses, (PVOID *)pHelperNames, ETW_NUM_JIT_HELPERS); diff --git a/src/vm/i386/stublinkerx86.cpp b/src/vm/i386/stublinkerx86.cpp index b77609822b11..3b376be26def 100644 --- a/src/vm/i386/stublinkerx86.cpp +++ b/src/vm/i386/stublinkerx86.cpp @@ -2320,7 +2320,7 @@ static const X86Reg c_argRegs[] = { #ifndef CROSSGEN_COMPILE -#if defined(_DEBUG) && (defined(_TARGET_AMD64_) || defined(_TARGET_X86_)) && !defined(FEATURE_PAL) +#if defined(_DEBUG) && !defined(FEATURE_PAL) void StubLinkerCPU::EmitJITHelperLoggingThunk(PCODE pJitHelper, LPVOID helperFuncCount) { STANDARD_VM_CONTRACT; @@ -2358,181 +2358,36 @@ void StubLinkerCPU::EmitJITHelperLoggingThunk(PCODE pJitHelper, LPVOID helperFun #endif X86EmitTailcallWithSinglePop(NewExternalCodeLabel(pJitHelper), kECX); } -#endif // _DEBUG && (_TARGET_AMD64_ || _TARGET_X86_) && !FEATURE_PAL +#endif // _DEBUG && !FEATURE_PAL -#ifndef FEATURE_IMPLICIT_TLS -//--------------------------------------------------------------- -// Emit code to store the current Thread structure in dstreg -// preservedRegSet is a set of registers to be preserved -// TRASHES EAX, EDX, ECX unless they are in preservedRegSet. -// RESULTS dstreg = current Thread -//--------------------------------------------------------------- -VOID StubLinkerCPU::X86EmitTLSFetch(DWORD idx, X86Reg dstreg, unsigned preservedRegSet) +#if !defined(FEATURE_STUBS_AS_IL) +VOID StubLinkerCPU::X86EmitCurrentThreadFetch(X86Reg dstreg) { CONTRACTL { STANDARD_VM_CHECK; - // It doesn't make sense to have the destination register be preserved - PRECONDITION((preservedRegSet & (1<> 16); } +#endif // !FEATURE_STUBS_AS_IL #if defined(_TARGET_X86_) @@ -2861,56 +2716,7 @@ VOID StubLinkerCPU::EmitSetup(CodeLabel *pForwardRef) { STANDARD_VM_CONTRACT; -#ifdef FEATURE_IMPLICIT_TLS - DWORD idx = 0; - TLSACCESSMODE mode = TLSACCESS_GENERIC; -#else - DWORD idx = GetThreadTLSIndex(); - TLSACCESSMODE mode = GetTLSAccessMode(idx); -#endif - -#ifdef _DEBUG - { - static BOOL f = TRUE; - f = !f; - if (f) - { - mode = TLSACCESS_GENERIC; - } - } -#endif - - switch (mode) - { - case TLSACCESS_WNT: -#ifndef FEATURE_PAL - { - unsigned __int32 tlsofs = offsetof(TEB, TlsSlots) + (idx * sizeof(void*)); - - static const BYTE code[] = {0x64,0x8b,0x1d}; // mov ebx, dword ptr fs:[IMM32] - EmitBytes(code, sizeof(code)); - Emit32(tlsofs); - } -#else // !FEATURE_PAL - _ASSERTE("TLSACCESS_WNT mode is not supported"); -#endif // !FEATURE_PAL - break; - - case TLSACCESS_GENERIC: -#ifdef FEATURE_IMPLICIT_TLS - X86EmitCall(NewExternalCodeLabel((LPVOID) GetThread), sizeof(void*)); -#else - X86EmitPushImm32(idx); - - // call TLSGetValue - X86EmitCall(NewExternalCodeLabel((LPVOID) TlsGetValue), sizeof(void*)); -#endif - // mov ebx,eax - Emit16(0xc389); - break; - default: - _ASSERTE(0); - } + X86EmitCurrentThreadFetch(kEBX); // cmp ebx, 0 static const BYTE b[] = { 0x83, 0xFB, 0x0}; @@ -3150,8 +2956,7 @@ VOID StubLinkerCPU::EmitMethodStubProlog(TADDR pFrameVptr, int transitionBlockOf #endif // _TARGET_X86_ // ebx <-- GetThread() - // Trashes X86TLSFetch_TRASHABLE_REGS - X86EmitCurrentThreadFetch(kEBX, 0); + X86EmitCurrentThreadFetch(kEBX); #if _DEBUG diff --git a/src/vm/i386/stublinkerx86.h b/src/vm/i386/stublinkerx86.h index 76d1f95845ff..d523bb0461ed 100644 --- a/src/vm/i386/stublinkerx86.h +++ b/src/vm/i386/stublinkerx86.h @@ -219,11 +219,7 @@ class StubLinkerCPU : public StubLinker VOID X86EmitLeaRIP(CodeLabel *target, X86Reg reg); #endif - static const unsigned X86TLSFetch_TRASHABLE_REGS = (1<= TLS_MINIMUM_AVAILABLE) - { - SetJitHelperFunction(CORINFO_HELP_GETSHARED_GCSTATIC_BASE, JIT_GetSharedGCStaticBase_Slow); - SetJitHelperFunction(CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE, JIT_GetSharedNonGCStaticBase_Slow); - SetJitHelperFunction(CORINFO_HELP_GETSHARED_GCSTATIC_BASE_NOCTOR, JIT_GetSharedGCStaticBaseNoCtor_Slow); - SetJitHelperFunction(CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE_NOCTOR,JIT_GetSharedNonGCStaticBaseNoCtor_Slow); - } -#endif // !FEATURE_IMPLICIT_TLS #endif // _TARGET_AMD64_ } diff --git a/src/vm/threads.cpp b/src/vm/threads.cpp index 56bdffb1f1ce..941d3645beaa 100644 --- a/src/vm/threads.cpp +++ b/src/vm/threads.cpp @@ -71,10 +71,6 @@ static CrstStatic s_initializeYieldProcessorNormalizedCrst; BOOL Thread::s_fCleanFinalizedThread = FALSE; -#ifdef ENABLE_GET_THREAD_GENERIC_FULL_CHECK -BOOL Thread::s_fEnforceEEThreadNotRequiredContracts = FALSE; -#endif - Volatile Thread::s_threadPoolCompletionCountOverflow = 0; CrstStatic g_DeadlockAwareCrst; @@ -290,9 +286,6 @@ bool Thread::DetectHandleILStubsForDebugger() return false; } - -#ifdef FEATURE_IMPLICIT_TLS - extern "C" { #ifndef __llvm__ __declspec(thread) @@ -304,26 +297,15 @@ ThreadLocalInfo gCurrentThreadInfo = NULL, // m_pThread NULL, // m_pAppDomain NULL, // m_EETlsData -#if defined(FEATURE_MERGE_JIT_AND_ENGINE) - NULL, // m_pCompiler -#endif }; } // extern "C" // index into TLS Array. Definition added by compiler EXTERN_C UINT32 _tls_index; -#else // FEATURE_IMPLICIT_TLS -extern "C" { -GVAL_IMPL_INIT(DWORD, gThreadTLSIndex, TLS_OUT_OF_INDEXES); // index ( (-1) == uninitialized ) -GVAL_IMPL_INIT(DWORD, gAppDomainTLSIndex, TLS_OUT_OF_INDEXES); // index ( (-1) == uninitialized ) -} -#endif // FEATURE_IMPLICIT_TLS - #ifndef DACCESS_COMPILE -#ifdef FEATURE_IMPLICIT_TLS BOOL SetThread(Thread* t) { - LIMITED_METHOD_CONTRACT + LIMITED_METHOD_CONTRACT gCurrentThreadInfo.m_pThread = t; return TRUE; @@ -331,52 +313,12 @@ BOOL SetThread(Thread* t) BOOL SetAppDomain(AppDomain* ad) { - LIMITED_METHOD_CONTRACT + LIMITED_METHOD_CONTRACT gCurrentThreadInfo.m_pAppDomain = ad; return TRUE; } -#if defined(FEATURE_MERGE_JIT_AND_ENGINE) -extern "C" -{ - -void* GetJitTls() -{ - LIMITED_METHOD_CONTRACT - - return gCurrentThreadInfo.m_pJitTls; -} - -void SetJitTls(void* v) -{ - LIMITED_METHOD_CONTRACT - gCurrentThreadInfo.m_pJitTls = v; -} - -} -#endif // defined(FEATURE_MERGE_JIT_AND_ENGINE) - -#define ThreadInited() (TRUE) - -#else // FEATURE_IMPLICIT_TLS -BOOL SetThread(Thread* t) -{ - WRAPPER_NO_CONTRACT - return UnsafeTlsSetValue(GetThreadTLSIndex(), t); -} - -BOOL SetAppDomain(AppDomain* ad) -{ - WRAPPER_NO_CONTRACT - return UnsafeTlsSetValue(GetAppDomainTLSIndex(), ad); -} - -#define ThreadInited() (gThreadTLSIndex != TLS_OUT_OF_INDEXES) - -#endif // FEATURE_IMPLICIT_TLS - - BOOL Thread::Alert () { CONTRACTL { @@ -681,7 +623,6 @@ Thread* SetupThread(BOOL fInternal) } CONTRACTL_END; - _ASSERTE(ThreadInited()); Thread* pThread; if ((pThread = GetThread()) != NULL) return pThread; @@ -782,18 +723,6 @@ Thread* SetupThread(BOOL fInternal) !pThread->PrepareApartmentAndContext()) ThrowOutOfMemory(); -#ifndef FEATURE_IMPLICIT_TLS - // make sure we will not fail when we store in TLS in the future. - if (!UnsafeTlsSetValue(gThreadTLSIndex, NULL)) - { - ThrowOutOfMemory(); - } - if (!UnsafeTlsSetValue(GetAppDomainTLSIndex(), NULL)) - { - ThrowOutOfMemory(); - } -#endif - // reset any unstarted bits on the thread object FastInterlockAnd((ULONG *) &pThread->m_State, ~Thread::TS_Unstarted); FastInterlockOr((ULONG *) &pThread->m_State, Thread::TS_LegalToJoin); @@ -915,7 +844,6 @@ Thread* SetupUnstartedThread(BOOL bRequiresTSL) } CONTRACTL_END; - _ASSERTE(ThreadInited()); Thread* pThread = new Thread(); FastInterlockOr((ULONG *) &pThread->m_State, @@ -1112,42 +1040,11 @@ HRESULT Thread::DetachThread(BOOL fDLLThreadDetach) return S_OK; } -#ifndef FEATURE_IMPLICIT_TLS -//--------------------------------------------------------------------------- -// Returns the TLS index for the Thread. This is strictly for the use of -// our ASM stub generators that generate inline code to access the Thread. -// Normally, you should use GetThread(). -//--------------------------------------------------------------------------- -DWORD GetThreadTLSIndex() -{ - LIMITED_METHOD_CONTRACT; - - return gThreadTLSIndex; -} - -//--------------------------------------------------------------------------- -// Returns the TLS index for the AppDomain. This is strictly for the use of -// our ASM stub generators that generate inline code to access the AppDomain. -// Normally, you should use GetAppDomain(). -//--------------------------------------------------------------------------- -DWORD GetAppDomainTLSIndex() -{ - LIMITED_METHOD_CONTRACT; - - return gAppDomainTLSIndex; -} -#endif - DWORD GetRuntimeId() { LIMITED_METHOD_CONTRACT; -#ifndef FEATURE_IMPLICIT_TLS - _ASSERTE(GetThreadTLSIndex() != TLS_OUT_OF_INDEXES); - return GetThreadTLSIndex() + 3; -#else return _tls_index; -#endif } //--------------------------------------------------------------------------- @@ -1191,165 +1088,6 @@ Thread* WINAPI CreateThreadBlockThrow() DWORD_PTR Thread::OBJREF_HASH = OBJREF_TABSIZE; #endif -#ifndef FEATURE_IMPLICIT_TLS - -#ifdef ENABLE_GET_THREAD_GENERIC_FULL_CHECK - -// ---------------------------------------------------------------------------- -// GetThreadGenericFullCheck -// -// Description: -// The non-PAL, x86 / x64 assembly versions of GetThreadGeneric call into this C -// function to optionally do some verification before returning the EE Thread object -// for the current thread. Currently the primary enforcement this function does is -// around the EE_THREAD_(NOT)_REQUIRED contracts. For a definition of these -// contracts, how they're used, and how temporary "safe" scopes may be created -// using BEGIN_GETTHREAD_ALLOWED / END_GETTHREAD_ALLOWED, see the comments at the top -// of contract.h. -// -// The EE_THREAD_(NOT)_REQUIRED contracts are enforced as follows: -// * code:EEContract::DoChecks enforces the following: -// * On entry to an EE_THREAD_REQUIRED function, GetThread() != NULL -// * An EE_THREAD_REQUIRED function may not be called from an -// EE_THREAD_NOT_REQUIRED function, unless there is an intervening -// BEGIN/END_GETTHREAD_ALLOWED scope -// * This function (GetThreadGenericFullCheck) enforces that an -// EE_THREAD_NOT_REQUIRED function may not call GetThread(), unless there is -// an intervening BEGIN/END_GETTHREAD_ALLOWED scope. While this enforcement -// is straightforward below, the tricky part is getting -// GetThreadGenericFullCheck() to actually be called when GetThread() is -// called, given the optimizations around GetThread(): -// * code:InitThreadManager ensures that non-PAL, debug, x86/x64 builds that -// run with COMPlus_EnforceEEThreadNotRequiredContracts set are forced to -// use GetThreadGeneric instead of the dynamically generated optimized -// TLS getter. -// * The non-PAL, debug, x86/x64 GetThreadGeneric() (implemented in the -// processor-specific assembly files) knows to call -// GetThreadGenericFullCheck() to do the enforcement. -// -Thread * GetThreadGenericFullCheck() -{ - // Can not have a dynamic contract here. Contract depends on GetThreadGeneric. - // Contract here causes stack overflow. - STATIC_CONTRACT_NOTHROW; - STATIC_CONTRACT_GC_NOTRIGGER; - - if (!ThreadInited()) - { - // #GTInfiniteRecursion - // - // Normally, we'd want to assert here, but that could lead to infinite recursion. - // Bringing up the assert dialog requires a string lookup, which requires getting - // the Thread's UI culture ID, which, or course, requires getting the Thread. So - // we'll just break instead. - DebugBreak(); - } - - if (g_fEEStarted && - - // Using ShouldEnforceEEThreadNotRequiredContracts() instead - // of directly checking CLRConfig::GetConfigValue, as the latter contains a dynamic - // contract and therefore calls GetThread(), which would cause infinite recursion. - Thread::ShouldEnforceEEThreadNotRequiredContracts() && - - // The following verifies that it's safe to call GetClrDebugState() below without - // risk of its callees invoking extra error checking or fiber code that could - // recursively call GetThread() and overflow the stack - (CExecutionEngine::GetTlsData() != NULL)) - { - // It's safe to peek into the debug state, so let's do so, to see if - // our caller is really allowed to be calling GetThread(). This enforces - // the EE_THREAD_NOT_REQUIRED contract. - ClrDebugState * pDbg = GetClrDebugState(FALSE); // FALSE=don't allocate - if ((pDbg != NULL) && (!pDbg->IsGetThreadAllowed())) - { - // We need to bracket the ASSERTE with BEGIN/END_GETTHREAD_ALLOWED to avoid - // infinite recursion (see - // code:GetThreadGenericFullCheck#GTInfiniteRecursion). The ASSERTE here will - // cause us to reenter this function to get the thread (again). However, - // BEGIN/END_GETTHREAD_ALLOWED at least stops the recursion right then and - // there, as it prevents us from reentering this block yet again (since - // BEGIN/END_GETTHREAD_ALLOWED causes pDbg->IsGetThreadAllowed() to be TRUE). - // All such reentries to this function will quickly return the thread without - // executing the code below, so the original ASSERTE can proceed. - BEGIN_GETTHREAD_ALLOWED; - _ASSERTE(!"GetThread() called in a EE_THREAD_NOT_REQUIRED scope. If the GetThread() call site has a clear code path for a return of NULL, then consider using GetThreadNULLOk() or BEGIN/END_GETTHREAD_ALLOWED"); - END_GETTHREAD_ALLOWED; - } - } - - Thread * pThread = (Thread *) UnsafeTlsGetValue(gThreadTLSIndex); - - // set bogus last error to help find places that fail to save it across GetThread calls - ::SetLastError(LAST_ERROR_TRASH_VALUE); - - return pThread; -} - -#endif // ENABLE_GET_THREAD_GENERIC_FULL_CHECK - -#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_) -// -// Some platforms have this implemented in assembly -// -EXTERN_C Thread* STDCALL GetThreadGeneric(VOID); -EXTERN_C AppDomain* STDCALL GetAppDomainGeneric(VOID); -#else -Thread* STDCALL GetThreadGeneric() -{ - // Can not have contract here. Contract depends on GetThreadGeneric. - // Contract here causes stack overflow. - //CONTRACTL { - // NOTHROW; - // GC_NOTRIGGER; - //} - //CONTRACTL_END; - - // see code:GetThreadGenericFullCheck#GTInfiniteRecursion - _ASSERTE(ThreadInited()); - - Thread* pThread = (Thread*)UnsafeTlsGetValue(gThreadTLSIndex); - - TRASH_LASTERROR; - - return pThread; -} - -AppDomain* STDCALL GetAppDomainGeneric() -{ - // No contract. This function is called during ExitTask. - //CONTRACTL { - // NOTHROW; - // GC_NOTRIGGER; - //} - //CONTRACTL_END; - - _ASSERTE(ThreadInited()); - - AppDomain* pAppDomain = (AppDomain*)UnsafeTlsGetValue(GetAppDomainTLSIndex()); - - TRASH_LASTERROR; - - return pAppDomain; -} -#endif // defined(_TARGET_X86_) || defined(_TARGET_AMD64_) - -// -// FLS getter to avoid unnecessary indirection via execution engine. It will be used if we get high TLS slot -// from the OS where we cannot use the fast optimized assembly helpers. (It happens pretty often in hosted scenarios). -// -LPVOID* ClrFlsGetBlockDirect() -{ - LIMITED_METHOD_CONTRACT; - - return (LPVOID*)UnsafeTlsGetValue(CExecutionEngine::GetTlsIndex()); -} - -extern "C" void * ClrFlsGetBlock(); - -#endif // FEATURE_IMPLICIT_TLS - - extern "C" void STDCALL JIT_PatchedCodeStart(); extern "C" void STDCALL JIT_PatchedCodeLast(); @@ -1387,98 +1125,24 @@ void InitThreadManager() #ifndef FEATURE_PAL -#ifdef FEATURE_IMPLICIT_TLS _ASSERTE(GetThread() == NULL); - // Mscordbi calculates the address of currentThread pointer using OFFSETOF__TLS__tls_CurrentThread. Ensure that - // value is correct. + PTEB Teb = NtCurrentTeb(); + BYTE** tlsArray = (BYTE**)Teb->ThreadLocalStoragePointer; + BYTE* tlsData = (BYTE*)tlsArray[_tls_index]; - PTEB Teb; - BYTE* tlsData; - BYTE** tlsArray; + size_t offsetOfCurrentThreadInfo = (BYTE*)&gCurrentThreadInfo - tlsData; - Teb = NtCurrentTeb(); - tlsArray = (BYTE**)Teb->ThreadLocalStoragePointer; - tlsData = (BYTE*)tlsArray[_tls_index]; + _ASSERTE(offsetOfCurrentThreadInfo < 0x8000); + _ASSERTE(_tls_index < 0x10000); - Thread **ppThread = (Thread**) (tlsData + OFFSETOF__TLS__tls_CurrentThread); - _ASSERTE_ALL_BUILDS("clr/src/VM/Threads.cpp", - (&(gCurrentThreadInfo.m_pThread) == ppThread) && - "Offset of m_pThread as specified by OFFSETOF__TLS__tls_CurrentThread is not correct. " - "This can change due to addition/removal of declspec(Thread) thread local variables."); + // Save gCurrentThreadInfo location for debugger + g_TlsIndex = (DWORD)(_tls_index + (offsetOfCurrentThreadInfo << 16) + 0x80000000); - _ASSERTE_ALL_BUILDS("clr/src/VM/Threads.cpp", - ((BYTE*)&(gCurrentThreadInfo.m_EETlsData) == tlsData + OFFSETOF__TLS__tls_EETlsData) && - "Offset of m_EETlsData as specified by OFFSETOF__TLS__tls_EETlsData is not correct. " - "This can change due to addition/removal of declspec(Thread) thread local variables."); -#else - _ASSERTE(gThreadTLSIndex == TLS_OUT_OF_INDEXES); -#endif _ASSERTE(g_TrapReturningThreads == 0); #endif // !FEATURE_PAL - // Consult run-time switches that choose whether to use generic or optimized - // versions of GetThread and GetAppDomain - - BOOL fUseGenericTlsGetters = FALSE; - -#ifdef ENABLE_GET_THREAD_GENERIC_FULL_CHECK - // Debug builds allow user to throw a switch to force use of the generic GetThread - // for the sole purpose of enforcing EE_THREAD_NOT_REQUIRED contracts - if (CLRConfig::GetConfigValue(CLRConfig::INTERNAL_EnforceEEThreadNotRequiredContracts) != 0) - { - // Set this static on Thread so this value can be safely read later on by - // code:GetThreadGenericFullCheck - Thread::s_fEnforceEEThreadNotRequiredContracts = TRUE; - - fUseGenericTlsGetters = TRUE; - } -#endif - -#ifndef FEATURE_IMPLICIT_TLS - // Now, we setup GetThread and GetAppDomain to point to their optimized or generic versions. Irrespective - // of the version they call into, we write opcode sequence into the dummy GetThread/GetAppDomain - // implementations (living in jithelp.s/.asm) via the MakeOptimizedTlsGetter calls below. - // - // For this to work, we must ensure that the dummy versions lie between the JIT_PatchedCodeStart - // and JIT_PatchedCodeLast code range (which lies in the .text section) so that when we change the protection - // above, we do so for GetThread and GetAppDomain as well. - - //--------------------------------------------------------------------------- - // INITIALIZE GetThread - //--------------------------------------------------------------------------- - - // No backout necessary - part of the one time global initialization - gThreadTLSIndex = UnsafeTlsAlloc(); - if (gThreadTLSIndex == TLS_OUT_OF_INDEXES) - COMPlusThrowWin32(); - - MakeOptimizedTlsGetter(gThreadTLSIndex, (PVOID)GetThread, TLS_GETTER_MAX_SIZE, (POPTIMIZEDTLSGETTER)GetThreadGeneric, fUseGenericTlsGetters); - - //--------------------------------------------------------------------------- - // INITIALIZE GetAppDomain - //--------------------------------------------------------------------------- - - // No backout necessary - part of the one time global initialization - gAppDomainTLSIndex = UnsafeTlsAlloc(); - if (gAppDomainTLSIndex == TLS_OUT_OF_INDEXES) - COMPlusThrowWin32(); - - MakeOptimizedTlsGetter(gAppDomainTLSIndex, (PVOID)GetAppDomain, TLS_GETTER_MAX_SIZE, (POPTIMIZEDTLSGETTER)GetAppDomainGeneric, fUseGenericTlsGetters); - - //--------------------------------------------------------------------------- - // Switch general purpose TLS getter to more efficient one if possible - //--------------------------------------------------------------------------- - - // Make sure that the TLS index is allocated - CExecutionEngine::CheckThreadState(0, FALSE); - - DWORD masterSlotIndex = CExecutionEngine::GetTlsIndex(); - CLRFLSGETBLOCK pGetter = (CLRFLSGETBLOCK)MakeOptimizedTlsGetter(masterSlotIndex, (PVOID)ClrFlsGetBlock, TLS_GETTER_MAX_SIZE); - __ClrFlsGetBlock = pGetter ? pGetter : ClrFlsGetBlockDirect; -#else __ClrFlsGetBlock = CExecutionEngine::GetTlsData; -#endif // FEATURE_IMPLICIT_TLS IfFailThrow(Thread::CLRSetThreadStackGuarantee(Thread::STSGuarantee_Force)); diff --git a/src/vm/threads.h b/src/vm/threads.h index 17cc1f305d0e..05e01b300432 100644 --- a/src/vm/threads.h +++ b/src/vm/threads.h @@ -586,8 +586,6 @@ enum ThreadpoolThreadType // // Public functions for ASM code generators // -// int GetThreadTLSIndex() - returns TLS index used to point to Thread -// int GetAppDomainTLSIndex() - returns TLS index used to point to AppDomain // Thread* __stdcall CreateThreadBlockThrow() - creates new Thread on reverse p-invoke // // Public functions for one-time init/cleanup @@ -629,14 +627,6 @@ Thread* SetupThreadNoThrow(HRESULT *phresult = NULL); Thread* SetupUnstartedThread(BOOL bRequiresTSL=TRUE); void DestroyThread(Thread *th); - -//--------------------------------------------------------------------------- -//--------------------------------------------------------------------------- -#ifndef FEATURE_IMPLICIT_TLS -DWORD GetThreadTLSIndex(); -DWORD GetAppDomainTLSIndex(); -#endif - DWORD GetRuntimeId(); EXTERN_C Thread* WINAPI CreateThreadBlockThrow(); @@ -3891,23 +3881,6 @@ class Thread: public IUnknown ULONG m_ulEnablePreemptiveGCCount; #endif // _DEBUG -#ifdef ENABLE_GET_THREAD_GENERIC_FULL_CHECK - -private: - // Set once on initialization, single-threaded, inside friend code:InitThreadManager, - // based on whether the user has set COMPlus_EnforceEEThreadNotRequiredContracts. - // This is then later accessed via public - // code:Thread::ShouldEnforceEEThreadNotRequiredContracts. See - // code:GetThreadGenericFullCheck for details. - static BOOL s_fEnforceEEThreadNotRequiredContracts; - -public: - static BOOL ShouldEnforceEEThreadNotRequiredContracts(); - -#endif // ENABLE_GET_THREAD_GENERIC_FULL_CHECK - - - private: // For suspends: CLREvent m_DebugSuspendEvent; @@ -7386,7 +7359,6 @@ inline void SetTypeHandleOnThreadForAlloc(TypeHandle th) #endif // CROSSGEN_COMPILE -#ifdef FEATURE_IMPLICIT_TLS class Compiler; // users of OFFSETOF__TLS__tls_CurrentThread macro expect the offset of these variables wrt to _tls_start to be stable. // Defining each of the following thread local variable separately without the struct causes the offsets to change in @@ -7398,11 +7370,7 @@ struct ThreadLocalInfo Thread* m_pThread; AppDomain* m_pAppDomain; void** m_EETlsData; // ClrTlsInfo::data -#ifdef FEATURE_MERGE_JIT_AND_ENGINE - void* m_pJitTls; -#endif }; -#endif // FEATURE_IMPLICIT_TLS class ThreadStateHolder { diff --git a/src/vm/threads.inl b/src/vm/threads.inl index ee2aaacf94ea..f5a439c350b1 100644 --- a/src/vm/threads.inl +++ b/src/vm/threads.inl @@ -22,7 +22,6 @@ #include "frames.h" #ifndef DACCESS_COMPILE -#ifdef FEATURE_IMPLICIT_TLS #ifndef __llvm__ EXTERN_C __declspec(thread) ThreadLocalInfo gCurrentThreadInfo; @@ -40,18 +39,8 @@ EXTERN_C inline AppDomain* STDCALL GetAppDomain() return gCurrentThreadInfo.m_pAppDomain; } -#endif // FEATURE_IMPLICIT_TLS #endif // !DACCESS_COMPILE -#ifdef ENABLE_GET_THREAD_GENERIC_FULL_CHECK -// See code:GetThreadGenericFullCheck -inline /* static */ BOOL Thread::ShouldEnforceEEThreadNotRequiredContracts() -{ - LIMITED_METHOD_CONTRACT; - return s_fEnforceEEThreadNotRequiredContracts; -} -#endif // ENABLE_GET_THREAD_GENERIC_FULL_CHECK - inline void Thread::IncLockCount() { LIMITED_METHOD_CONTRACT; diff --git a/src/vm/vars.cpp b/src/vm/vars.cpp index ff941d2101f7..464560c3a65d 100644 --- a/src/vm/vars.cpp +++ b/src/vm/vars.cpp @@ -120,13 +120,10 @@ GPTR_IMPL_INIT(StressLog, g_pStressLog, &StressLog::theLog); GPTR_IMPL(RCWCleanupList,g_pRCWCleanupList); #endif // FEATURE_COMINTEROP +GVAL_IMPL_INIT(DWORD, g_TlsIndex, TLS_OUT_OF_INDEXES); #ifndef DACCESS_COMPILE -// @TODO Remove eventually - determines whether the verifier throws an exception when something fails -bool g_fVerifierOff; - - // @TODO - PROMOTE. OBJECTHANDLE g_pPreallocatedOutOfMemoryException; OBJECTHANDLE g_pPreallocatedStackOverflowException; diff --git a/src/vm/vars.hpp b/src/vm/vars.hpp index c9f48486924a..a7b6fb617353 100644 --- a/src/vm/vars.hpp +++ b/src/vm/vars.hpp @@ -406,9 +406,7 @@ GPTR_DECL(MethodDesc, g_pExecuteBackoutCodeHelperMethod); GPTR_DECL(MethodDesc, g_pObjectFinalizerMD); -// @TODO Remove eventually - determines whether the verifier throws an exception when something fails -EXTERN bool g_fVerifierOff; - +GVAL_DECL(DWORD, g_TlsIndex); // Global System Information extern SYSTEM_INFO g_SystemInfo;