Skip to content

Unify TLS destructor list implementations #116850

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 67 additions & 18 deletions library/std/src/sys/pal/common/thread_local/fast_local.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use super::lazy::LazyKeyInner;
use crate::cell::Cell;
use crate::sys::thread_local_dtor::register_dtor;
use crate::cell::{Cell, RefCell};
use crate::{fmt, mem, panic};

#[doc(hidden)]
@@ -39,13 +38,11 @@ pub macro thread_local_inner {

// Safety: Performs `drop_in_place(ptr as *mut $t)`, and requires
// all that comes with it.
unsafe extern "C" fn destroy(ptr: *mut $crate::primitive::u8) {
$crate::thread::local_impl::abort_on_dtor_unwind(|| {
let old_state = STATE.replace(2);
$crate::debug_assert_eq!(old_state, 1);
// Safety: safety requirement is passed on to caller.
unsafe { $crate::ptr::drop_in_place(ptr.cast::<$t>()); }
});
unsafe fn destroy(ptr: *mut $crate::primitive::u8) {
let old_state = STATE.replace(2);
$crate::debug_assert_eq!(old_state, 1);
// Safety: safety requirement is passed on to caller.
unsafe { $crate::ptr::drop_in_place(ptr.cast::<$t>()); }
}

unsafe {
@@ -155,8 +152,8 @@ impl<T> Key<T> {

// note that this is just a publicly-callable function only for the
// const-initialized form of thread locals, basically a way to call the
// free `register_dtor` function defined elsewhere in std.
pub unsafe fn register_dtor(a: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) {
// free `register_dtor` function.
pub unsafe fn register_dtor(a: *mut u8, dtor: unsafe fn(*mut u8)) {
unsafe {
register_dtor(a, dtor);
}
@@ -220,7 +217,7 @@ impl<T> Key<T> {
}
}

unsafe extern "C" fn destroy_value<T>(ptr: *mut u8) {
unsafe fn destroy_value<T>(ptr: *mut u8) {
let ptr = ptr as *mut Key<T>;

// SAFETY:
@@ -233,14 +230,66 @@ unsafe extern "C" fn destroy_value<T>(ptr: *mut u8) {
// `Option<T>` to `None`, and `dtor_state` to `RunningOrHasRun`. This
// causes future calls to `get` to run `try_initialize_drop` again,
// which will now fail, and return `None`.
//
// Wrap the call in a catch to ensure unwinding is caught in the event
// a panic takes place in a destructor.
if let Err(_) = panic::catch_unwind(panic::AssertUnwindSafe(|| unsafe {
unsafe {
let value = (*ptr).inner.take();
(*ptr).dtor_state.set(DtorState::RunningOrHasRun);
drop(value);
})) {
rtabort!("thread local panicked on drop");
}
}

#[thread_local]
static DTORS: RefCell<Vec<(*mut u8, unsafe fn(*mut u8))>> = RefCell::new(Vec::new());

// Ensure this can never be inlined on Windows because otherwise this may break
// in dylibs. See #44391.
#[cfg_attr(windows, inline(never))]
unsafe fn register_dtor(t: *mut u8, dtor: unsafe fn(*mut u8)) {
// Ensure that destructors are run on thread exit.
crate::sys::thread_local_guard::activate();

let mut dtors = match DTORS.try_borrow_mut() {
Ok(dtors) => dtors,
// The only place this function can be called reentrantly is inside the
// heap allocator. This is currently forbidden.
Err(_) => rtabort!("the global allocator may not register TLS destructors"),
};
dtors.push((t, dtor));
}

/// Called by the platform on thread exit to run all registered destructors.
/// The signature was chosen so that this function may be passed as a callback
/// to platform functions. The argument is ignored.
///
/// # Safety
/// May only be called on thread exit. In particular, no thread locals may
/// currently be referenced.
pub unsafe extern "C" fn run_dtors(_unused: *mut u8) {
// This function must not unwind. This is ensured by the `extern "C"` ABI,
// but by catching the unwind, we can print a more helpful message.

match panic::catch_unwind(|| {
let dtors = &DTORS;

loop {
// Ensure that the `RefMut` guard is not held while the destructor is
// executed to allow initializing TLS variables in destructors.
let (t, dtor) = {
let mut dtors = dtors.borrow_mut();
match dtors.pop() {
Some(entry) => entry,
None => break,
}
};

unsafe {
(dtor)(t);
}
}

// All destructors were run, deallocate the list.
drop(dtors.replace(Vec::new()));
}) {
Ok(()) => {}
Err(_) => rtabort!("thread local panicked on drop"),
}
}
23 changes: 1 addition & 22 deletions library/std/src/sys/pal/common/thread_local/mod.rs
Original file line number Diff line number Diff line change
@@ -15,7 +15,7 @@ cfg_if::cfg_if! {
#[doc(hidden)]
mod fast_local;
#[doc(hidden)]
pub use fast_local::{Key, thread_local_inner};
pub use fast_local::{Key, thread_local_inner, run_dtors};
} else {
#[doc(hidden)]
mod os_local;
@@ -101,24 +101,3 @@ mod lazy {
}
}
}

/// Run a callback in a scenario which must not unwind (such as a `extern "C"
/// fn` declared in a user crate). If the callback unwinds anyway, then
/// `rtabort` with a message about thread local panicking on drop.
#[inline]
pub fn abort_on_dtor_unwind(f: impl FnOnce()) {
// Using a guard like this is lower cost.
let guard = DtorUnwindGuard;
f();
core::mem::forget(guard);

struct DtorUnwindGuard;
impl Drop for DtorUnwindGuard {
#[inline]
fn drop(&mut self) {
// This is not terribly descriptive, but it doesn't need to be as we'll
// already have printed a panic message at this point.
rtabort!("thread local panicked on drop");
}
}
}
6 changes: 3 additions & 3 deletions library/std/src/sys/pal/hermit/mod.rs
Original file line number Diff line number Diff line change
@@ -34,7 +34,7 @@ pub mod pipe;
pub mod process;
pub mod stdio;
pub mod thread;
pub mod thread_local_dtor;
pub mod thread_local_guard;
#[path = "../unsupported/thread_local_key.rs"]
pub mod thread_local_key;
pub mod time;
@@ -109,7 +109,7 @@ pub unsafe extern "C" fn runtime_entry(
argv: *const *const c_char,
env: *const *const c_char,
) -> ! {
use thread_local_dtor::run_dtors;
use crate::sys::common::thread_local::run_dtors;
extern "C" {
fn main(argc: isize, argv: *const *const c_char) -> i32;
}
@@ -119,7 +119,7 @@ pub unsafe extern "C" fn runtime_entry(

let result = main(argc as isize, argv);

run_dtors();
run_dtors(crate::ptr::null_mut());
abi::exit(result);
}

3 changes: 2 additions & 1 deletion library/std/src/sys/pal/hermit/thread.rs
Original file line number Diff line number Diff line change
@@ -7,6 +7,7 @@ use crate::io;
use crate::mem;
use crate::num::NonZero;
use crate::ptr;
use crate::sys::common::thread_local::run_dtors;
use crate::time::Duration;

pub type Tid = abi::Tid;
@@ -50,7 +51,7 @@ impl Thread {
Box::from_raw(ptr::from_exposed_addr::<Box<dyn FnOnce()>>(main).cast_mut())();

// run all destructors
run_dtors();
run_dtors(ptr::null_mut());
}
}
}
29 changes: 0 additions & 29 deletions library/std/src/sys/pal/hermit/thread_local_dtor.rs

This file was deleted.

6 changes: 6 additions & 0 deletions library/std/src/sys/pal/hermit/thread_local_guard.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#![cfg(target_thread_local)]
#![unstable(feature = "thread_local_internals", issue = "none")]

pub fn activate() {
// run_dtors is always executed by the threading support.
}
6 changes: 3 additions & 3 deletions library/std/src/sys/pal/itron/thread.rs
Original file line number Diff line number Diff line change
@@ -12,9 +12,9 @@ use crate::{
hint, io,
mem::ManuallyDrop,
num::NonZero,
ptr::NonNull,
ptr::{self, NonNull},
sync::atomic::{AtomicUsize, Ordering},
sys::thread_local_dtor::run_dtors,
sys::common::thread_local::run_dtors,
time::Duration,
};

@@ -116,7 +116,7 @@ impl Thread {

// Run TLS destructors now because they are not
// called automatically for terminated tasks.
unsafe { run_dtors() };
unsafe { run_dtors(ptr::null_mut()) };

let old_lifecycle = inner
.lifecycle
2 changes: 1 addition & 1 deletion library/std/src/sys/pal/solid/mod.rs
Original file line number Diff line number Diff line change
@@ -36,7 +36,7 @@ pub mod process;
pub mod stdio;
pub use self::itron::thread;
pub mod memchr;
pub mod thread_local_dtor;
pub mod thread_local_guard;
pub mod thread_local_key;
pub use self::itron::thread_parking;
pub mod time;
43 changes: 0 additions & 43 deletions library/std/src/sys/pal/solid/thread_local_dtor.rs

This file was deleted.

21 changes: 21 additions & 0 deletions library/std/src/sys/pal/solid/thread_local_guard.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
//! Ensures that thread-local destructors are run on thread exit.
#![cfg(target_thread_local)]
#![unstable(feature = "thread_local_internals", issue = "none")]

use super::{abi, itron::task};
use crate::cell::Cell;
use crate::sys::common::thread_local::run_dtors;

#[thread_local]
static REGISTERED: Cell<bool> = Cell::new(false);

pub fn activate() {
if !REGISTERED.get() {
let tid = task::current_task_id_aborting();
// Register `tls_dtor` to make sure the TLS destructors are called
// for tasks created by other means than `std::thread`
unsafe { abi::SOLID_TLS_AddDestructor(tid as i32, run_dtors) };
REGISTERED.set(true);
}
}
2 changes: 1 addition & 1 deletion library/std/src/sys/pal/unix/mod.rs
Original file line number Diff line number Diff line change
@@ -33,7 +33,7 @@ pub mod rand;
pub mod stack_overflow;
pub mod stdio;
pub mod thread;
pub mod thread_local_dtor;
pub mod thread_local_guard;
pub mod thread_local_key;
pub mod thread_parking;
pub mod time;
97 changes: 44 additions & 53 deletions library/std/src/sys/pal/unix/thread_local_dtor.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
//! Ensures that thread-local destructors are run on thread exit.
#![cfg(target_thread_local)]
#![unstable(feature = "thread_local_internals", issue = "none")]

//! Provides thread-local destructors without an associated "key", which
//! can be more efficient.
use crate::ptr;
use crate::sys::common::thread_local::run_dtors;

// Since what appears to be glibc 2.18 this symbol has been shipped which
// GCC and clang both use to invoke destructors in thread_local globals, so
@@ -23,9 +25,10 @@
// FIXME: The Rust compiler currently omits weakly function definitions (i.e.,
// __cxa_thread_atexit_impl) and its metadata from LLVM IR.
#[no_sanitize(cfi, kcfi)]
pub unsafe fn register_dtor(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) {
pub fn activate() {
use crate::cell::Cell;
use crate::mem;
use crate::sys_common::thread_local_dtor::register_dtor_fallback;
use crate::sys_common::thread_local_key::StaticKey;

/// This is necessary because the __cxa_thread_atexit_impl implementation
/// std links to by default may be a C or C++ implementation that was not
@@ -50,64 +53,47 @@ pub unsafe fn register_dtor(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) {
>;
}

if let Some(f) = __cxa_thread_atexit_impl {
unsafe {
f(
mem::transmute::<
unsafe extern "C" fn(*mut u8),
unsafe extern "C" fn(*mut libc::c_void),
>(dtor),
t.cast(),
&__dso_handle as *const _ as *mut _,
);
unsafe {
if let Some(atexit) = __cxa_thread_atexit_impl {
#[thread_local]
static REGISTERED: Cell<bool> = Cell::new(false);
if !REGISTERED.get() {
atexit(
mem::transmute::<
unsafe extern "C" fn(*mut u8),
unsafe extern "C" fn(*mut libc::c_void),
>(run_dtors),
ptr::null_mut(),
&__dso_handle as *const _ as *mut _,
);
REGISTERED.set(true);
}
} else {
static KEY: StaticKey = StaticKey::new(Some(run_dtors));

KEY.set(ptr::invalid_mut(1));
}
return;
}
register_dtor_fallback(t, dtor);
}

// This implementation is very similar to register_dtor_fallback in
// sys_common/thread_local.rs. The main difference is that we want to hook into
// macOS's analog of the above linux function, _tlv_atexit. OSX will run the
// registered dtors before any TLS slots get freed, and when the main thread
// We hook into macOS's analog of the above linux function, _tlv_atexit. OSX
// will run `run_dtors` before any TLS slots get freed, and when the main thread
// exits.
//
// Unfortunately, calling _tlv_atexit while tls dtors are running is UB. The
// workaround below is to register, via _tlv_atexit, a custom DTOR list once per
// thread. thread_local dtors are pushed to the DTOR list without calling
// _tlv_atexit.
#[cfg(any(target_os = "macos", target_os = "ios", target_os = "watchos", target_os = "tvos"))]
pub unsafe fn register_dtor(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) {
use crate::cell::{Cell, RefCell};
use crate::ptr;

#[thread_local]
static REGISTERED: Cell<bool> = Cell::new(false);

#[thread_local]
static DTORS: RefCell<Vec<(*mut u8, unsafe extern "C" fn(*mut u8))>> = RefCell::new(Vec::new());

if !REGISTERED.get() {
_tlv_atexit(run_dtors, ptr::null_mut());
REGISTERED.set(true);
}
pub fn activate() {
use crate::cell::Cell;

extern "C" {
fn _tlv_atexit(dtor: unsafe extern "C" fn(*mut u8), arg: *mut u8);
}

match DTORS.try_borrow_mut() {
Ok(mut dtors) => dtors.push((t, dtor)),
Err(_) => rtabort!("global allocator may not use TLS"),
}
#[thread_local]
static REGISTERED: Cell<bool> = Cell::new(false);

unsafe extern "C" fn run_dtors(_: *mut u8) {
let mut list = DTORS.take();
while !list.is_empty() {
for (ptr, dtor) in list {
dtor(ptr);
}
list = DTORS.take();
if !REGISTERED.get() {
unsafe {
_tlv_atexit(run_dtors, ptr::null_mut());
REGISTERED.set(true);
}
}
}
@@ -120,7 +106,12 @@ pub unsafe fn register_dtor(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) {
target_os = "freebsd",
))]
#[cfg_attr(target_family = "wasm", allow(unused))] // might remain unused depending on target details (e.g. wasm32-unknown-emscripten)
pub unsafe fn register_dtor(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) {
use crate::sys_common::thread_local_dtor::register_dtor_fallback;
register_dtor_fallback(t, dtor);
pub fn activate() {
use crate::sys_common::thread_local_key::StaticKey;

static KEY: StaticKey = StaticKey::new(Some(run_dtors));

unsafe {
KEY.set(ptr::invalid_mut(1));
}
}
118 changes: 118 additions & 0 deletions library/std/src/sys/pal/unix/thread_local_guard.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
//! Ensures that thread-local destructors are run on thread exit.
#![cfg(target_thread_local)]
#![unstable(feature = "thread_local_internals", issue = "none")]

use crate::ptr;
use crate::sys::common::thread_local::run_dtors;

// Since what appears to be glibc 2.18 this symbol has been shipped which
// GCC and clang both use to invoke destructors in thread_local globals, so
// let's do the same!
//
// Note, however, that we run on lots older linuxes, as well as cross
// compiling from a newer linux to an older linux, so we also have a
// fallback implementation to use as well.
#[cfg_attr(bootstrap, allow(unexpected_cfgs))]
#[cfg(any(
target_os = "linux",
target_os = "android",
target_os = "fuchsia",
target_os = "redox",
target_os = "hurd",
target_os = "freebsd",
target_os = "netbsd",
target_os = "dragonfly"
))]
// FIXME: The Rust compiler currently omits weakly function definitions (i.e.,
// __cxa_thread_atexit_impl) and its metadata from LLVM IR.
#[no_sanitize(cfi, kcfi)]
pub fn activate() {
use crate::cell::Cell;
use crate::mem;
use crate::sys_common::thread_local_key::StaticKey;

/// This is necessary because the __cxa_thread_atexit_impl implementation
/// std links to by default may be a C or C++ implementation that was not
/// compiled using the Clang integer normalization option.
#[cfg(sanitizer_cfi_normalize_integers)]
use core::ffi::c_int;
#[cfg(not(sanitizer_cfi_normalize_integers))]
#[cfi_encoding = "i"]
#[repr(transparent)]
pub struct c_int(pub libc::c_int);

extern "C" {
#[linkage = "extern_weak"]
static __dso_handle: *mut u8;
#[linkage = "extern_weak"]
static __cxa_thread_atexit_impl: Option<
extern "C" fn(
unsafe extern "C" fn(*mut libc::c_void),
*mut libc::c_void,
*mut libc::c_void,
) -> c_int,
>;
}

unsafe {
if let Some(atexit) = __cxa_thread_atexit_impl {
#[thread_local]
static REGISTERED: Cell<bool> = Cell::new(false);
if !REGISTERED.get() {
atexit(
mem::transmute::<
unsafe extern "C" fn(*mut u8),
unsafe extern "C" fn(*mut libc::c_void),
>(run_dtors),
ptr::null_mut(),
&__dso_handle as *const _ as *mut _,
);
REGISTERED.set(true);
}
} else {
static KEY: StaticKey = StaticKey::new(Some(run_dtors));

KEY.set(ptr::invalid_mut(1));
}
}
}

// We hook into macOS's analog of the above linux function, _tlv_atexit. OSX
// will run `run_dtors` before any TLS slots get freed, and when the main thread
// exits.
#[cfg(any(target_os = "macos", target_os = "ios", target_os = "watchos", target_os = "tvos"))]
pub fn activate() {
use crate::cell::Cell;

extern "C" {
fn _tlv_atexit(dtor: unsafe extern "C" fn(*mut u8), arg: *mut u8);
}

#[thread_local]
static REGISTERED: Cell<bool> = Cell::new(false);

if !REGISTERED.get() {
unsafe {
_tlv_atexit(run_dtors, ptr::null_mut());
REGISTERED.set(true);
}
}
}

#[cfg(any(
target_os = "vxworks",
target_os = "horizon",
target_os = "emscripten",
target_os = "aix"
))]
#[cfg_attr(target_family = "wasm", allow(unused))] // might remain unused depending on target details (e.g. wasm32-unknown-emscripten)
pub fn activate() {
use crate::sys_common::thread_local_key::StaticKey;

static KEY: StaticKey = StaticKey::new(Some(run_dtors));

unsafe {
KEY.set(ptr::invalid_mut(1));
}
}
1 change: 1 addition & 0 deletions library/std/src/sys/pal/windows/c.rs
Original file line number Diff line number Diff line change
@@ -55,6 +55,7 @@ pub const EXIT_FAILURE: u32 = 1;

pub const CONDITION_VARIABLE_INIT: CONDITION_VARIABLE = CONDITION_VARIABLE { Ptr: ptr::null_mut() };
pub const SRWLOCK_INIT: SRWLOCK = SRWLOCK { Ptr: ptr::null_mut() };
#[cfg(not(target_thread_local))] // Only used by key-based TLS.
pub const INIT_ONCE_STATIC_INIT: INIT_ONCE = INIT_ONCE { Ptr: ptr::null_mut() };

// Some windows_sys types have different signs than the types we use.
2 changes: 1 addition & 1 deletion library/std/src/sys/pal/windows/mod.rs
Original file line number Diff line number Diff line change
@@ -28,7 +28,7 @@ pub mod process;
pub mod rand;
pub mod stdio;
pub mod thread;
pub mod thread_local_dtor;
pub mod thread_local_guard;
pub mod thread_local_key;
pub mod thread_parking;
pub mod time;
7 changes: 0 additions & 7 deletions library/std/src/sys/pal/windows/thread_local_dtor.rs

This file was deleted.

120 changes: 120 additions & 0 deletions library/std/src/sys/pal/windows/thread_local_guard.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
//! A TLS destructor system.
//!
//! Turns out, like pretty much everything, Windows is pretty close the
//! functionality that Unix provides, but slightly different! In the case of
//! TLS, Windows does not provide an API to provide a destructor for a TLS
//! variable. This ends up being pretty crucial to this implementation, so we
//! need a way around this.
//!
//! The solution here ended up being a little obscure, but fear not, the
//! internet has informed me [1][2] that this solution is not unique (no way
//! I could have thought of it as well!). The key idea is to insert some hook
//! somewhere to run arbitrary code on thread termination. With this in place
//! we'll be able to run anything we like, including all TLS destructors!
//!
//! If you're looking at this code, and wondering "what is this doing?",
//! you're not alone! I'll try to break this down step by step:
//!
//! # What's up with CRT$XLB?
//!
//! For anything about TLS destructors to work on Windows, we have to be able
//! to run *something* when a thread exits. To do so, we place a very special
//! static in a very special location. If this is encoded in just the right
//! way, the kernel's loader is apparently nice enough to run some function
//! of ours whenever a thread exits! How nice of the kernel!
//!
//! Lots of detailed information can be found in source [1] above, but the
//! gist of it is that this is leveraging a feature of Microsoft's PE format
//! (executable format) which is not actually used by any compilers today.
//! This apparently translates to any callbacks in the ".CRT$XLB" section
//! being run on certain events.
//!
//! So after all that, we use the compiler's #[link_section] feature to place
//! a callback pointer into the magic section so it ends up being called.
//!
//! # What's up with this callback?
//!
//! The callback specified receives a number of parameters from... someone!
//! (the kernel? the runtime? I'm not quite sure!) There are a few events that
//! this gets invoked for, but we're currently only interested on when a
//! thread or a process "detaches" (exits). The process part happens for the
//! last thread and the thread part happens for any normal thread.
//!
//! # The article mentions weird stuff about "/INCLUDE"?
//!
//! It sure does! Specifically we're talking about this quote:
//!
//! > The Microsoft run-time library facilitates this process by defining a
//! > memory image of the TLS Directory and giving it the special name
//! > “__tls_used” (Intel x86 platforms) or “_tls_used” (other platforms). The
//! > linker looks for this memory image and uses the data there to create the
//! > TLS Directory. Other compilers that support TLS and work with the
//! > Microsoft linker must use this same technique.
//!
//! Basically what this means is that if we want support for our TLS
//! destructors/our hook being called then we need to make sure the linker does
//! not omit this symbol. Otherwise it will omit it and our callback won't be
//! wired up.
//!
//! We don't actually use the `/INCLUDE` linker flag here like the article
//! mentions because the Rust compiler doesn't propagate linker flags, but
//! instead we use a shim function which performs a volatile 1-byte load from
//! the address of the symbol to ensure it sticks around.
//!
//! [1]: https://www.codeproject.com/Articles/8113/Thread-Local-Storage-The-C-Way
//! [2]: https://github.com/ChromiumWebApps/chromium/blob/master/base/threading/thread_local_storage_win.cc#L42
#![unstable(feature = "thread_local_internals", issue = "none")]

use crate::ptr;
use crate::sync::atomic::{
AtomicBool,
Ordering::{Acquire, Relaxed},
};
use crate::sys::c;

// If the target uses native TLS, run its destructors.
#[cfg(target_thread_local)]
use crate::sys::common::thread_local::run_dtors;
// Otherwise, run the destructors for the key-based variant.
#[cfg(not(target_thread_local))]
use super::thread_local_key::run_dtors;

/// An optimization hint. The compiler is often smart enough to know if an atomic
/// is never set and can remove dead code based on that fact.
static HAS_DTORS: AtomicBool = AtomicBool::new(false);

/// Ensure that thread-locals are destroyed when the thread exits.
pub fn activate() {
HAS_DTORS.store(true, Relaxed);
}

#[link_section = ".CRT$XLB"]
#[allow(dead_code, unused_variables)]
#[used] // we don't want LLVM eliminating this symbol for any reason, and
// when the symbol makes it to the linker the linker will take over
pub static p_thread_callback: unsafe extern "system" fn(c::LPVOID, c::DWORD, c::LPVOID) =
on_tls_callback;

#[allow(dead_code, unused_variables)]
unsafe extern "system" fn on_tls_callback(h: c::LPVOID, dwReason: c::DWORD, pv: c::LPVOID) {
if !HAS_DTORS.load(Acquire) {
return;
}
if dwReason == c::DLL_THREAD_DETACH || dwReason == c::DLL_PROCESS_DETACH {
run_dtors(ptr::null_mut());
}

// See comments above for what this is doing. Note that we don't need this
// trickery on GNU windows, just on MSVC.
reference_tls_used();
#[cfg(target_env = "msvc")]
unsafe fn reference_tls_used() {
extern "C" {
static _tls_used: u8;
}
crate::intrinsics::volatile_load(&_tls_used);
}
#[cfg(not(target_env = "msvc"))]
unsafe fn reference_tls_used() {}
}
169 changes: 6 additions & 163 deletions library/std/src/sys/pal/windows/thread_local_key.rs
Original file line number Diff line number Diff line change
@@ -1,89 +1,19 @@
#![cfg(not(target_thread_local))]

use crate::cell::UnsafeCell;
use crate::ptr;
use crate::sync::atomic::{
AtomicBool, AtomicPtr, AtomicU32,
AtomicPtr, AtomicU32,
Ordering::{AcqRel, Acquire, Relaxed, Release},
};
use crate::sys::c;

#[cfg(test)]
mod tests;

/// An optimization hint. The compiler is often smart enough to know if an atomic
/// is never set and can remove dead code based on that fact.
static HAS_DTORS: AtomicBool = AtomicBool::new(false);

// Using a per-thread list avoids the problems in synchronizing global state.
#[thread_local]
#[cfg(target_thread_local)]
static DESTRUCTORS: crate::cell::RefCell<Vec<(*mut u8, unsafe extern "C" fn(*mut u8))>> =
crate::cell::RefCell::new(Vec::new());

// Ensure this can never be inlined because otherwise this may break in dylibs.
// See #44391.
#[inline(never)]
#[cfg(target_thread_local)]
pub unsafe fn register_keyless_dtor(t: *mut u8, dtor: unsafe extern "C" fn(*mut u8)) {
match DESTRUCTORS.try_borrow_mut() {
Ok(mut dtors) => dtors.push((t, dtor)),
Err(_) => rtabort!("global allocator may not use TLS"),
}

HAS_DTORS.store(true, Relaxed);
}

#[inline(never)] // See comment above
#[cfg(target_thread_local)]
/// Runs destructors. This should not be called until thread exit.
unsafe fn run_keyless_dtors() {
// Drop all the destructors.
//
// Note: While this is potentially an infinite loop, it *should* be
// the case that this loop always terminates because we provide the
// guarantee that a TLS key cannot be set after it is flagged for
// destruction.
loop {
// Use a let-else binding to ensure the `RefCell` guard is dropped
// immediately. Otherwise, a panic would occur if a TLS destructor
// tries to access the list.
let Some((ptr, dtor)) = DESTRUCTORS.borrow_mut().pop() else {
break;
};
(dtor)(ptr);
}
// We're done so free the memory.
DESTRUCTORS.replace(Vec::new());
}

type Key = c::DWORD;
type Dtor = unsafe extern "C" fn(*mut u8);

// Turns out, like pretty much everything, Windows is pretty close the
// functionality that Unix provides, but slightly different! In the case of
// TLS, Windows does not provide an API to provide a destructor for a TLS
// variable. This ends up being pretty crucial to this implementation, so we
// need a way around this.
//
// The solution here ended up being a little obscure, but fear not, the
// internet has informed me [1][2] that this solution is not unique (no way
// I could have thought of it as well!). The key idea is to insert some hook
// somewhere to run arbitrary code on thread termination. With this in place
// we'll be able to run anything we like, including all TLS destructors!
//
// To accomplish this feat, we perform a number of threads, all contained
// within this module:
//
// * All TLS destructors are tracked by *us*, not the Windows runtime. This
// means that we have a global list of destructors for each TLS key that
// we know about.
// * When a thread exits, we run over the entire list and run dtors for all
// non-null keys. This attempts to match Unix semantics in this regard.
//
// For more details and nitty-gritty, see the code sections below!
//
// [1]: https://www.codeproject.com/Articles/8113/Thread-Local-Storage-The-C-Way
// [2]: https://github.com/ChromiumWebApps/chromium/blob/master/base/threading/thread_local_storage_win.cc#L42

pub struct StaticKey {
/// The key value shifted up by one. Since TLS_OUT_OF_INDEXES == DWORD::MAX
/// is not a valid key value, this allows us to use zero as sentinel value
@@ -215,41 +145,10 @@ unsafe fn register_dtor(key: &'static StaticKey) {
Err(new) => head = new,
}
}
HAS_DTORS.store(true, Release);
super::thread_local_guard::activate();
}

// -------------------------------------------------------------------------
// Where the Magic (TM) Happens
//
// If you're looking at this code, and wondering "what is this doing?",
// you're not alone! I'll try to break this down step by step:
//
// # What's up with CRT$XLB?
//
// For anything about TLS destructors to work on Windows, we have to be able
// to run *something* when a thread exits. To do so, we place a very special
// static in a very special location. If this is encoded in just the right
// way, the kernel's loader is apparently nice enough to run some function
// of ours whenever a thread exits! How nice of the kernel!
//
// Lots of detailed information can be found in source [1] above, but the
// gist of it is that this is leveraging a feature of Microsoft's PE format
// (executable format) which is not actually used by any compilers today.
// This apparently translates to any callbacks in the ".CRT$XLB" section
// being run on certain events.
//
// So after all that, we use the compiler's #[link_section] feature to place
// a callback pointer into the magic section so it ends up being called.
//
// # What's up with this callback?
//
// The callback specified receives a number of parameters from... someone!
// (the kernel? the runtime? I'm not quite sure!) There are a few events that
// this gets invoked for, but we're currently only interested on when a
// thread or a process "detaches" (exits). The process part happens for the
// last thread and the thread part happens for any normal thread.
//
// # Ok, what's up with running all these destructors?
// What's up with running all these destructors?
//
// This will likely need to be improved over time, but this function
// attempts a "poor man's" destructor callback system. Once we've got a list
@@ -258,63 +157,7 @@ unsafe fn register_dtor(key: &'static StaticKey) {
// beforehand). We do this a few times in a loop to basically match Unix
// semantics. If we don't reach a fixed point after a short while then we just
// inevitably leak something most likely.
//
// # The article mentions weird stuff about "/INCLUDE"?
//
// It sure does! Specifically we're talking about this quote:
//
// The Microsoft run-time library facilitates this process by defining a
// memory image of the TLS Directory and giving it the special name
// “__tls_used” (Intel x86 platforms) or “_tls_used” (other platforms). The
// linker looks for this memory image and uses the data there to create the
// TLS Directory. Other compilers that support TLS and work with the
// Microsoft linker must use this same technique.
//
// Basically what this means is that if we want support for our TLS
// destructors/our hook being called then we need to make sure the linker does
// not omit this symbol. Otherwise it will omit it and our callback won't be
// wired up.
//
// We don't actually use the `/INCLUDE` linker flag here like the article
// mentions because the Rust compiler doesn't propagate linker flags, but
// instead we use a shim function which performs a volatile 1-byte load from
// the address of the symbol to ensure it sticks around.

#[link_section = ".CRT$XLB"]
#[allow(dead_code, unused_variables)]
#[used] // we don't want LLVM eliminating this symbol for any reason, and
// when the symbol makes it to the linker the linker will take over
pub static p_thread_callback: unsafe extern "system" fn(c::LPVOID, c::DWORD, c::LPVOID) =
on_tls_callback;

#[allow(dead_code, unused_variables)]
unsafe extern "system" fn on_tls_callback(h: c::LPVOID, dwReason: c::DWORD, pv: c::LPVOID) {
if !HAS_DTORS.load(Acquire) {
return;
}
if dwReason == c::DLL_THREAD_DETACH || dwReason == c::DLL_PROCESS_DETACH {
#[cfg(not(target_thread_local))]
run_dtors();
#[cfg(target_thread_local)]
run_keyless_dtors();
}

// See comments above for what this is doing. Note that we don't need this
// trickery on GNU windows, just on MSVC.
reference_tls_used();
#[cfg(target_env = "msvc")]
unsafe fn reference_tls_used() {
extern "C" {
static _tls_used: u8;
}
crate::intrinsics::volatile_load(&_tls_used);
}
#[cfg(not(target_env = "msvc"))]
unsafe fn reference_tls_used() {}
}

#[allow(dead_code)] // actually called below
unsafe fn run_dtors() {
pub(super) unsafe fn run_dtors(_ptr: *mut u8) {
for _ in 0..5 {
let mut any_run = false;

2 changes: 1 addition & 1 deletion library/std/src/sys_common/mod.rs
Original file line number Diff line number Diff line change
@@ -29,13 +29,13 @@ pub mod once;
pub mod process;
pub mod thread;
pub mod thread_info;
pub mod thread_local_dtor;
pub mod thread_parking;
pub mod wstr;
pub mod wtf8;

cfg_if::cfg_if! {
if #[cfg(target_os = "windows")] {
#[cfg(not(target_thread_local))]
pub use crate::sys::thread_local_key;
} else {
pub mod thread_local_key;
56 changes: 0 additions & 56 deletions library/std/src/sys_common/thread_local_dtor.rs

This file was deleted.

2 changes: 1 addition & 1 deletion library/std/src/thread/mod.rs
Original file line number Diff line number Diff line change
@@ -205,7 +205,7 @@ cfg_if::cfg_if! {
#[doc(hidden)]
#[unstable(feature = "thread_local_internals", issue = "none")]
pub mod local_impl {
pub use crate::sys::common::thread_local::{thread_local_inner, Key, abort_on_dtor_unwind};
pub use crate::sys::common::thread_local::{thread_local_inner, Key};
}
}
}
2 changes: 1 addition & 1 deletion src/tools/miri/src/shims/tls.rs
Original file line number Diff line number Diff line change
@@ -298,7 +298,7 @@ trait EvalContextPrivExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
return Ok(());
}
let thread_callback =
this.eval_windows("thread_local_key", "p_thread_callback").to_pointer(this)?;
this.eval_windows("thread_local_guard", "p_thread_callback").to_pointer(this)?;
let thread_callback = this.get_ptr_fn(thread_callback)?.as_instance()?;

// FIXME: Technically, the reason should be `DLL_PROCESS_DETACH` when the main thread exits