diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index db367e8d91..21aa47b5a7 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -86,6 +86,11 @@ jobs:
       - name: Run cargo test
         run: cargo test --features="alloc,defmt,mpmc_large,portable-atomic-critical-section,serde,ufmt,bytes,zeroize,embedded-io-v0.7"
 
+      - name: Run loom tests
+        run: cargo test -- loom
+        continue-on-error: true
+        env:
+          RUSTFLAGS: '--cfg loom'
   # Run cargo fmt --check
   style:
     name: style
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c06dcfffe2..f8e345b529 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
 - Implement `TryFrom` for `Deque` from array.
 - Switch from `serde` to `serde_core` for enabling faster compilations.
 - Implement `Zeroize` trait for all data structures with the `zeroize` feature to securely clear sensitive data from memory.
+- `mpmc::Queue`: document non-lock free behaviour, and add loom tests
 
 ## [v0.9.1] - 2025-08-19
 
diff --git a/Cargo.toml b/Cargo.toml
index 9fb63e5a66..638c9e313b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -56,11 +56,21 @@ zeroize = ["dep:zeroize"]
 # Enable larger MPMC sizes.
 mpmc_large = []
 
+# Enable crossbeam ArrayQueue MPMC implementation.
+mpmc_crossbeam = ["dep:crossbeam-utils"]
+
+# Enable NBLFQ MPMC implementation.
+mpmc_nblfq = []
+
+# Enable wCQ MPMC implementation.
+mpmc_wcq = ["dep:wcq"]
+
 # Implement some alloc Vec interoperability
 alloc = []
 
 nightly = []
 
+
 [dependencies]
 bytes = { version = "1", default-features = false, optional = true }
 portable-atomic = { version = "1.0", optional = true }
@@ -71,6 +81,8 @@ ufmt-write = { version = "0.1", optional = true }
 defmt = { version = "1.0.1", optional = true }
 zeroize = { version = "1.8", optional = true, default-features = false, features = ["derive"] }
 embedded-io = { version = "0.7", optional = true }
+crossbeam-utils = { version = "0.8", optional = true }
+wcq = { version = "0.1", git = "https://codeberg.org/weathered-steel/lfqueue", optional = true } 
 
 # for the pool module
 [target.'cfg(any(target_arch = "arm", target_pointer_width = "32", target_pointer_width = "64"))'.dependencies]
@@ -79,6 +91,10 @@ stable_deref_trait = { version = "1", default-features = false }
 [dev-dependencies]
 critical-section = { version = "1.1", features = ["std"] }
 static_assertions = "1.1.0"
+thread-priority = "3.0"
+
+[target.'cfg(loom)'.dependencies]
+loom = "0.7.2"
 
 [package.metadata.docs.rs]
 features = [
@@ -93,3 +109,6 @@ features = [
 # for the pool module
 targets = ["i686-unknown-linux-gnu"]
 rustdoc-args = ["--cfg", "docsrs"]
+
+[lints.rust]
+unexpected_cfgs = { level = "warn", check-cfg = ['cfg(loom)'] }
diff --git a/examples/mpmc_crossbeam.rs b/examples/mpmc_crossbeam.rs
new file mode 100644
index 0000000000..073b055374
--- /dev/null
+++ b/examples/mpmc_crossbeam.rs
@@ -0,0 +1,12 @@
+use heapless::mpmc::Queue;
+
+fn main() {
+    let q = Queue::<_, 2>::new();
+    for _ in 0..255 {
+        assert!(q.enqueue(0).is_ok());
+        assert_eq!(q.dequeue(), Some(0));
+    }
+
+    // Queue is empty, this should not block forever.
+    assert_eq!(q.dequeue(), None);
+}
diff --git a/examples/mpmc_nblfq.rs b/examples/mpmc_nblfq.rs
new file mode 100644
index 0000000000..073b055374
--- /dev/null
+++ b/examples/mpmc_nblfq.rs
@@ -0,0 +1,12 @@
+use heapless::mpmc::Queue;
+
+fn main() {
+    let q = Queue::<_, 2>::new();
+    for _ in 0..255 {
+        assert!(q.enqueue(0).is_ok());
+        assert_eq!(q.dequeue(), Some(0));
+    }
+
+    // Queue is empty, this should not block forever.
+    assert_eq!(q.dequeue(), None);
+}
diff --git a/examples/mpmc_vyukov.rs b/examples/mpmc_vyukov.rs
new file mode 100644
index 0000000000..073b055374
--- /dev/null
+++ b/examples/mpmc_vyukov.rs
@@ -0,0 +1,12 @@
+use heapless::mpmc::Queue;
+
+fn main() {
+    let q = Queue::<_, 2>::new();
+    for _ in 0..255 {
+        assert!(q.enqueue(0).is_ok());
+        assert_eq!(q.dequeue(), Some(0));
+    }
+
+    // Queue is empty, this should not block forever.
+    assert_eq!(q.dequeue(), None);
+}
diff --git a/examples/mpmc_wcq.rs b/examples/mpmc_wcq.rs
new file mode 100644
index 0000000000..0521fee993
--- /dev/null
+++ b/examples/mpmc_wcq.rs
@@ -0,0 +1,12 @@
+use wcq::Queue;
+
+fn main() {
+    let q = Queue::<_, 2>::new();
+    for _ in 0..255 {
+        assert!(q.enqueue(0).is_ok());
+        assert_eq!(q.dequeue(), Some(0));
+    }
+
+    // Queue is empty, this should not block forever.
+    assert_eq!(q.dequeue(), None);
+}
diff --git a/src/c_string.rs b/src/c_string.rs
index cf4ce3013c..6ff5ebfa4f 100644
--- a/src/c_string.rs
+++ b/src/c_string.rs
@@ -382,7 +382,7 @@ mod tests {
         let empty = CString::<1>::new();
 
         assert_eq!(empty.as_c_str(), c"");
-        assert_eq!(empty.as_bytes(), &[]);
+        assert!(empty.as_bytes().is_empty());
         assert_eq!(empty.to_str(), Ok(""));
     }
 
diff --git a/src/history_buf.rs b/src/history_buf.rs
index 67df4cd7ee..dbd170dd72 100644
--- a/src/history_buf.rs
+++ b/src/history_buf.rs
@@ -670,7 +670,7 @@ mod tests {
         assert!(x.is_full());
 
         let x: HistoryBuf<u8, 4> = HistoryBuf::new();
-        assert_eq!(x.as_slice(), []);
+        assert!(x.as_slice().is_empty());
         assert!(!x.is_full());
     }
 
@@ -694,7 +694,7 @@ mod tests {
     fn clear() {
         let mut x: HistoryBuf<u8, 4> = HistoryBuf::new_with(1);
         x.clear();
-        assert_eq!(x.as_slice(), []);
+        assert!(x.as_slice().is_empty());
 
         let mut x: HistoryBuf<u8, 4> = HistoryBuf::new();
         x.clear_with(1);
@@ -782,7 +782,7 @@ mod tests {
     fn as_slice() {
         let mut x: HistoryBuf<u8, 4> = HistoryBuf::new();
 
-        assert_eq!(x.as_slice(), []);
+        assert!(x.as_slice().is_empty());
 
         x.extend([1, 2, 3, 4, 5].iter());
 
diff --git a/src/mpmc.rs b/src/mpmc.rs
index f676c7c93a..ac25417c4f 100644
--- a/src/mpmc.rs
+++ b/src/mpmc.rs
@@ -61,6 +61,16 @@
 //! - The numbers reported correspond to the successful path, i.e. `dequeue` returning `Some`
 //!   and `enqueue` returning `Ok`.
 //!
+//!
+//! <div class="warning">
+//!
+//! This implementation is not fully lock-free. If a thread or task gets preempted during
+//! a `dequeue` or `enqueue` operation, it may prevent other operations from succeeding until
+//! it's scheduled again to finish its operation.
+//!
+//! See <https://github.com/rust-embedded/heapless/issues/583> for more details.
+//!
+//! </div>
 //! # References
 //!
 //! This is an implementation of Dmitry Vyukov's [bounded MPMC queue], minus the
@@ -68,16 +78,15 @@
 //!
 //! [bounded MPMC queue]: http://www.1024cores.net/home/lock-free-algorithms/queues/bounded-mpmc-queue
 
-use core::{cell::UnsafeCell, mem::MaybeUninit};
+#[cfg(loom)]
+use loom::sync::atomic;
 
-#[cfg(not(feature = "portable-atomic"))]
+#[cfg(not(any(feature = "portable-atomic", loom)))]
 use core::sync::atomic;
-#[cfg(feature = "portable-atomic")]
+#[cfg(all(feature = "portable-atomic", not(loom)))]
 use portable_atomic as atomic;
 
-use atomic::Ordering;
-
-use crate::storage::{OwnedStorage, Storage, ViewStorage};
+use crate::storage::ViewStorage;
 
 #[cfg(feature = "mpmc_large")]
 type AtomicTargetSize = atomic::AtomicUsize;
@@ -90,264 +99,41 @@ type UintSize = usize;
 type UintSize = u8;
 
 #[cfg(feature = "mpmc_large")]
+#[allow(unused)]
 type IntSize = isize;
 #[cfg(not(feature = "mpmc_large"))]
+#[allow(unused)]
 type IntSize = i8;
 
-/// Base struct for [`Queue`] and [`QueueView`], generic over the [`Storage`].
-///
-/// In most cases you should use [`Queue`] or [`QueueView`] directly. Only use this
-/// struct if you want to write code that's generic over both.
-pub struct QueueInner<T, S: Storage> {
-    dequeue_pos: AtomicTargetSize,
-    enqueue_pos: AtomicTargetSize,
-    buffer: UnsafeCell<S::Buffer<Cell<T>>>,
-}
+#[cfg(all(not(feature = "mpmc_nblfq"), not(feature = "mpmc_crossbeam")))]
+mod original;
+#[cfg(all(not(feature = "mpmc_nblfq"), not(feature = "mpmc_crossbeam")))]
+pub use original::*;
 
-/// A statically allocated multi-producer, multi-consumer queue with a capacity of `N` elements.
-///
-/// <div class="warning">
-///
-/// `N` must be a power of 2.
-///
-/// </div>
-///
-/// The maximum value of `N` is 128 if the `mpmc_large` feature is not enabled.
-pub type Queue<T, const N: usize> = QueueInner<T, OwnedStorage<N>>;
+#[cfg(feature = "mpmc_crossbeam")]
+mod crossbeam_array_queue;
+#[cfg(feature = "mpmc_crossbeam")]
+pub use crossbeam_array_queue::*;
+
+#[cfg(all(feature = "mpmc_nblfq", not(feature = "mpmc_crossbeam")))]
+mod nblfq;
+#[cfg(all(feature = "mpmc_nblfq", not(feature = "mpmc_crossbeam")))]
+pub use nblfq::*;
 
 /// A [`Queue`] with dynamic capacity.
 ///
 /// [`Queue`] coerces to `QueueView`. `QueueView` is `!Sized`, meaning it can only ever be used by reference.
 pub type QueueView<T> = QueueInner<T, ViewStorage>;
 
-impl<T, const N: usize> Queue<T, N> {
-    /// Creates an empty queue.
-    pub const fn new() -> Self {
-        const {
-            assert!(N > 1);
-            assert!(N.is_power_of_two());
-            assert!(N < UintSize::MAX as usize);
-        }
-
-        let mut cell_count = 0;
-
-        let mut result_cells: [Cell<T>; N] = [const { Cell::new(0) }; N];
-        while cell_count != N {
-            result_cells[cell_count] = Cell::new(cell_count);
-            cell_count += 1;
-        }
-
-        Self {
-            buffer: UnsafeCell::new(result_cells),
-            dequeue_pos: AtomicTargetSize::new(0),
-            enqueue_pos: AtomicTargetSize::new(0),
-        }
-    }
-
-    /// Used in `Storage` implementation.
-    pub(crate) fn as_view_private(&self) -> &QueueView<T> {
-        self
-    }
-    /// Used in `Storage` implementation.
-    pub(crate) fn as_view_mut_private(&mut self) -> &mut QueueView<T> {
-        self
-    }
-}
-
-impl<T, S: Storage> QueueInner<T, S> {
-    /// Returns the maximum number of elements the queue can hold.
-    #[inline]
-    pub fn capacity(&self) -> usize {
-        S::len(self.buffer.get())
-    }
-
-    /// Get a reference to the `Queue`, erasing the `N` const-generic.
-    ///
-    ///
-    /// ```rust
-    /// # use heapless::mpmc::{Queue, QueueView};
-    /// let queue: Queue<u8, 2> = Queue::new();
-    /// let view: &QueueView<u8> = queue.as_view();
-    /// ```
-    ///
-    /// It is often preferable to do the same through type coerction, since `Queue<T, N>` implements `Unsize<QueueView<T>>`:
-    ///
-    /// ```rust
-    /// # use heapless::mpmc::{Queue, QueueView};
-    /// let queue: Queue<u8, 2> = Queue::new();
-    /// let view: &QueueView<u8> = &queue;
-    /// ```
-    #[inline]
-    pub fn as_view(&self) -> &QueueView<T> {
-        S::as_mpmc_view(self)
-    }
-
-    /// Get a mutable reference to the `Queue`, erasing the `N` const-generic.
-    ///
-    /// ```rust
-    /// # use heapless::mpmc::{Queue, QueueView};
-    /// let mut queue: Queue<u8, 2> = Queue::new();
-    /// let view: &mut QueueView<u8> = queue.as_mut_view();
-    /// ```
-    ///
-    /// It is often preferable to do the same through type coerction, since `Queue<T, N>` implements `Unsize<QueueView<T>>`:
-    ///
-    /// ```rust
-    /// # use heapless::mpmc::{Queue, QueueView};
-    /// let mut queue: Queue<u8, 2> = Queue::new();
-    /// let view: &mut QueueView<u8> = &mut queue;
-    /// ```
-    #[inline]
-    pub fn as_mut_view(&mut self) -> &mut QueueView<T> {
-        S::as_mpmc_mut_view(self)
-    }
-
-    fn mask(&self) -> UintSize {
-        (S::len(self.buffer.get()) - 1) as _
-    }
-
-    /// Returns the item in the front of the queue, or `None` if the queue is empty.
-    pub fn dequeue(&self) -> Option<T> {
-        unsafe { dequeue(S::as_ptr(self.buffer.get()), &self.dequeue_pos, self.mask()) }
-    }
-
-    /// Adds an `item` to the end of the queue.
-    ///
-    /// Returns back the `item` if the queue is full.
-    pub fn enqueue(&self, item: T) -> Result<(), T> {
-        unsafe {
-            enqueue(
-                S::as_ptr(self.buffer.get()),
-                &self.enqueue_pos,
-                self.mask(),
-                item,
-            )
-        }
-    }
-}
-
-impl<T, const N: usize> Default for Queue<T, N> {
-    fn default() -> Self {
-        Self::new()
-    }
-}
-
-impl<T, S: Storage> Drop for QueueInner<T, S> {
-    fn drop(&mut self) {
-        // Drop all elements currently in the queue.
-        while self.dequeue().is_some() {}
-    }
-}
-
-unsafe impl<T, S: Storage> Sync for QueueInner<T, S> where T: Send {}
-
-struct Cell<T> {
-    data: MaybeUninit<T>,
-    sequence: AtomicTargetSize,
-}
-
-impl<T> Cell<T> {
-    const fn new(seq: usize) -> Self {
-        Self {
-            data: MaybeUninit::uninit(),
-            sequence: AtomicTargetSize::new(seq as UintSize),
-        }
-    }
-}
-
-unsafe fn dequeue<T>(
-    buffer: *mut Cell<T>,
-    dequeue_pos: &AtomicTargetSize,
-    mask: UintSize,
-) -> Option<T> {
-    let mut pos = dequeue_pos.load(Ordering::Relaxed);
-
-    let mut cell;
-    loop {
-        cell = buffer.add(usize::from(pos & mask));
-        let seq = (*cell).sequence.load(Ordering::Acquire);
-        let dif = (seq as IntSize).wrapping_sub((pos.wrapping_add(1)) as IntSize);
-
-        match dif.cmp(&0) {
-            core::cmp::Ordering::Equal => {
-                if dequeue_pos
-                    .compare_exchange_weak(
-                        pos,
-                        pos.wrapping_add(1),
-                        Ordering::Relaxed,
-                        Ordering::Relaxed,
-                    )
-                    .is_ok()
-                {
-                    break;
-                }
-            }
-            core::cmp::Ordering::Less => {
-                return None;
-            }
-            core::cmp::Ordering::Greater => {
-                pos = dequeue_pos.load(Ordering::Relaxed);
-            }
-        }
-    }
-
-    let data = (*cell).data.as_ptr().read();
-    (*cell)
-        .sequence
-        .store(pos.wrapping_add(mask).wrapping_add(1), Ordering::Release);
-    Some(data)
-}
-
-unsafe fn enqueue<T>(
-    buffer: *mut Cell<T>,
-    enqueue_pos: &AtomicTargetSize,
-    mask: UintSize,
-    item: T,
-) -> Result<(), T> {
-    let mut pos = enqueue_pos.load(Ordering::Relaxed);
-
-    let mut cell;
-    loop {
-        cell = buffer.add(usize::from(pos & mask));
-        let seq = (*cell).sequence.load(Ordering::Acquire);
-        let dif = (seq as IntSize).wrapping_sub(pos as IntSize);
-
-        match dif.cmp(&0) {
-            core::cmp::Ordering::Equal => {
-                if enqueue_pos
-                    .compare_exchange_weak(
-                        pos,
-                        pos.wrapping_add(1),
-                        Ordering::Relaxed,
-                        Ordering::Relaxed,
-                    )
-                    .is_ok()
-                {
-                    break;
-                }
-            }
-            core::cmp::Ordering::Less => {
-                return Err(item);
-            }
-            core::cmp::Ordering::Greater => {
-                pos = enqueue_pos.load(Ordering::Relaxed);
-            }
-        }
-    }
-
-    (*cell).data.as_mut_ptr().write(item);
-    (*cell)
-        .sequence
-        .store(pos.wrapping_add(1), Ordering::Release);
-    Ok(())
-}
-
+#[cfg(not(loom))]
 #[cfg(test)]
 mod tests {
     use static_assertions::assert_not_impl_any;
 
     use super::Queue;
 
+    const N: usize = 4;
+
     // Ensure a `Queue` containing `!Send` values stays `!Send` itself.
     assert_not_impl_any!(Queue<*const (), 4>: Send);
 
@@ -419,4 +205,210 @@ mod tests {
         // Queue is full, this should not block forever.
         q.enqueue(0x55).unwrap_err();
     }
+
+    #[test]
+    fn test_enqueue_contention_rt() {
+        use thread_priority::*;
+
+        let q0 = std::sync::Arc::new(Queue::<u8, N>::new());
+
+        for i in 0..N {
+            q0.enqueue(i as u8).expect("new enqueue");
+        }
+
+        let model_thread = |q0: std::sync::Arc<Queue<u8, N>>| {
+            for k in 0..N {
+                match q0.dequeue() {
+                    Some(_i) => (),
+                    None if q0.is_empty() => (),
+                    None => panic!(
+                        "enqueue: Dequeue failed on iteration: {k}, empty queue?: {}, queue len: {}",
+                        q0.is_empty(),
+                        q0.len()
+                    ),
+                };
+
+                q0.enqueue(k as u8).unwrap();
+            }
+        };
+
+        let q1 = q0.clone();
+        let h1 = ThreadBuilder::default()
+            .name("h1")
+            .priority(ThreadPriority::Max)
+            .policy(ThreadSchedulePolicy::Realtime(
+                RealtimeThreadSchedulePolicy::Fifo,
+            ))
+            .spawn(move |_| model_thread(q1))
+            .unwrap();
+
+        let q2 = q0.clone();
+        let h2 = ThreadBuilder::default()
+            .name("h2")
+            .priority(ThreadPriority::Max)
+            .policy(ThreadSchedulePolicy::Realtime(
+                RealtimeThreadSchedulePolicy::Fifo,
+            ))
+            .spawn(move |_| model_thread(q2))
+            .unwrap();
+
+        h1.join().unwrap();
+        h2.join().unwrap();
+    }
+
+    #[test]
+    fn test_dequeue_contention_rt() {
+        use thread_priority::*;
+
+        let q0 = std::sync::Arc::new(Queue::<u8, N>::new());
+
+        let model_thread = |q0: std::sync::Arc<Queue<u8, N>>| {
+            for k in 0..N {
+                q0.enqueue(k as u8).unwrap();
+                match q0.dequeue() {
+                    Some(_i) => (),
+                    None if q0.is_empty() => (),
+                    None => {
+                        panic!(
+                            "dequeue: Dequeue failed on iteration: {k}, queue is empty?: {}, queue len: {}",
+                            q0.is_empty(),
+                            q0.len()
+                        );
+                    }
+                }
+            }
+        };
+
+        let q1 = q0.clone();
+        let h1 = ThreadBuilder::default()
+            .name("h1")
+            .priority(ThreadPriority::Max)
+            .policy(ThreadSchedulePolicy::Realtime(
+                RealtimeThreadSchedulePolicy::Fifo,
+            ))
+            .spawn(move |_| model_thread(q1))
+            .unwrap();
+
+        let q2 = q0.clone();
+        let h2 = ThreadBuilder::default()
+            .name("h2")
+            .priority(ThreadPriority::Max)
+            .policy(ThreadSchedulePolicy::Realtime(
+                RealtimeThreadSchedulePolicy::Fifo,
+            ))
+            .spawn(move |_| model_thread(q2))
+            .unwrap();
+
+        h1.join().unwrap();
+        h2.join().unwrap();
+    }
+
+    #[test]
+    fn test_issue_583_enqueue_rt() {
+        use thread_priority::*;
+
+        fn to_vec<T>(q: &Queue<T, N>) -> Vec<T> {
+            // inaccurate
+            let mut ret = vec![];
+            while let Some(v) = q.dequeue() {
+                ret.push(v);
+            }
+            ret
+        }
+
+        let q0 = std::sync::Arc::new(Queue::<u8, N>::new());
+
+        let model_thread = move |q0: std::sync::Arc<Queue<u8, N>>| {
+            for k in 0..1_000_000 {
+                if let Some(v) = q0.dequeue() {
+                    q0.enqueue(v)
+                        .unwrap_or_else(|v| panic!("{}: q0 -> q0: {}, {:?}", k, v, to_vec(&q0)));
+                }
+            }
+        };
+
+        let q1 = q0.clone();
+        let h1 = ThreadBuilder::default()
+            .name("h1")
+            .priority(ThreadPriority::Max)
+            .policy(ThreadSchedulePolicy::Realtime(
+                RealtimeThreadSchedulePolicy::Fifo,
+            ))
+            .spawn(move |_| model_thread(q1))
+            .unwrap();
+
+        let q2 = q0.clone();
+        let h2 = ThreadBuilder::default()
+            .name("h2")
+            .priority(ThreadPriority::Max)
+            .policy(ThreadSchedulePolicy::Realtime(
+                RealtimeThreadSchedulePolicy::Fifo,
+            ))
+            .spawn(move |_| model_thread(q2))
+            .unwrap();
+
+        h1.join().unwrap();
+        h2.join().unwrap();
+    }
+}
+
+#[cfg(all(loom, test))]
+mod tests_loom {
+    use super::*;
+    use std::sync::Arc;
+    const N: usize = 4;
+
+    #[test]
+    #[cfg(loom)]
+    fn loom_issue_583_enqueue() {
+        loom::model(|| {
+            let q0 = Arc::new(Queue::<u8, N>::new());
+            q0.enqueue(0).unwrap();
+            q0.enqueue(1).unwrap();
+            q0.enqueue(2).unwrap();
+            q0.enqueue(3).unwrap();
+            let model_thread = || {
+                let q0 = q0.clone();
+                move || {
+                    for k in 0..10 {
+                        let Some(i) = q0.dequeue() else {
+                            panic!("{k}");
+                        };
+                        if q0.enqueue(k as u8).is_err() {
+                            panic!("{i}");
+                        }
+                    }
+                }
+            };
+
+            let h1 = loom::thread::spawn(model_thread());
+            let h2 = loom::thread::spawn(model_thread());
+            h1.join().unwrap();
+            h2.join().unwrap();
+        });
+    }
+
+    #[test]
+    #[cfg(loom)]
+    fn loom_issue_583_dequeue() {
+        loom::model(|| {
+            let q0 = Arc::new(Queue::<u8, N>::new());
+            let model_thread = || {
+                let q0 = q0.clone();
+                move || {
+                    for k in 0..10 {
+                        q0.enqueue(k as u8).unwrap();
+                        if q0.dequeue().is_none() {
+                            panic!("{k}");
+                        }
+                    }
+                }
+            };
+
+            let h1 = loom::thread::spawn(model_thread());
+            let h2 = loom::thread::spawn(model_thread());
+            h1.join().unwrap();
+            h2.join().unwrap();
+        });
+    }
 }
diff --git a/src/mpmc/crossbeam_array_queue.rs b/src/mpmc/crossbeam_array_queue.rs
new file mode 100644
index 0000000000..de91f99dbf
--- /dev/null
+++ b/src/mpmc/crossbeam_array_queue.rs
@@ -0,0 +1,645 @@
+//! The implementation is based on Dmitry Vyukov's bounded MPMC queue.
+//!
+//! Source:
+//!   - <http://www.1024cores.net/home/lock-free-algorithms/queues/bounded-mpmc-queue>
+//!
+//! From the [crossbeam-queue](https://github.com/crossbeam-rs/crossbeam/blob/master/crossbeam-queue/src/array_queue.rs) implementation.
+
+use super::{atomic, atomic::Ordering};
+use core::cell::UnsafeCell;
+use core::fmt;
+use core::mem::MaybeUninit;
+use core::panic::{RefUnwindSafe, UnwindSafe};
+
+use crate::storage::{OwnedStorage, Storage};
+
+use crossbeam_utils::{Backoff, CachePadded};
+
+use super::{AtomicTargetSize, QueueView, UintSize};
+
+/// A slot in a queue.
+struct Slot<T: Sized> {
+    /// The current stamp.
+    ///
+    /// If the stamp equals the tail, this node will be next written to. If it equals head + 1,
+    /// this node will be next read from.
+    stamp: AtomicTargetSize,
+
+    /// The value in this slot.
+    value: UnsafeCell<MaybeUninit<T>>,
+}
+
+impl<T> Slot<T> {
+    /// Creates a new uninitialized [Slot].
+    #[cfg(not(loom))]
+    pub const fn new() -> Self {
+        Self {
+            stamp: AtomicTargetSize::new(0),
+            value: UnsafeCell::new(MaybeUninit::uninit()),
+        }
+    }
+
+    /// Creates a new uninitialized [Slot].
+    #[cfg(loom)]
+    pub fn new() -> Self {
+        Self {
+            stamp: AtomicTargetSize::new(0),
+            value: UnsafeCell::new(MaybeUninit::uninit()),
+        }
+    }
+
+    /// Creates a new uninitialized [Slot] with the provided stamp.
+    #[cfg(not(loom))]
+    pub const fn create_uninit(stamp: UintSize) -> Self {
+        Self {
+            stamp: AtomicTargetSize::new(stamp),
+            value: UnsafeCell::new(MaybeUninit::uninit()),
+        }
+    }
+
+    /// Creates a new uninitialized [Slot] with the provided stamp.
+    #[cfg(loom)]
+    pub fn create_uninit(stamp: UintSize) -> Self {
+        Self {
+            stamp: AtomicTargetSize::new(stamp),
+            value: UnsafeCell::new(MaybeUninit::uninit()),
+        }
+    }
+}
+
+impl<T> Default for Slot<T> {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+/// A bounded multi-producer multi-consumer queue.
+///
+/// This queue allocates a fixed-capacity buffer on construction, which is used to store pushed
+/// elements. The queue cannot hold more elements than the buffer allows. Attempting to push an
+/// element into a full queue will fail. Alternatively, [`force_push`] makes it possible for
+/// this queue to be used as a ring-buffer. Having a buffer allocated upfront makes this queue
+/// a bit faster than [`SegQueue`].
+///
+/// [`force_push`]: Queue::force_push
+/// [`SegQueue`]: super::SegQueue
+///
+/// # Examples
+///
+/// ```
+/// use heapless::mpmc::Queue;
+/// const N: usize = 2;
+///
+/// let q = Queue::new::<N>();
+///
+/// assert_eq!(q.enqueue('a'), Ok(()));
+/// assert_eq!(q.enqueue('b'), Ok(()));
+/// assert_eq!(q.enqueue('c'), Err('c'));
+/// assert_eq!(q.dequeue(), Some('a'));
+/// ```
+pub type Queue<T, const N: usize> = QueueInner<T, OwnedStorage<N>>;
+
+/// Base struct for [`Queue`] and [`QueueView`], generic over the [`Storage`].
+///
+/// In most cases you should use [`Queue`] or [`QueueView`] directly. Only use this
+/// struct if you want to write code that's generic over both.
+pub struct QueueInner<T, S: Storage> {
+    /// The head of the queue.
+    ///
+    /// This value is a "stamp" consisting of an index into the buffer and a lap, but packed into a
+    /// single `usize`. The lower bits represent the index, while the upper bits represent the lap.
+    ///
+    /// Elements are popped from the head of the queue.
+    head: CachePadded<AtomicTargetSize>,
+
+    /// The tail of the queue.
+    ///
+    /// This value is a "stamp" consisting of an index into the buffer and a lap, but packed into a
+    /// single `usize`. The lower bits represent the index, while the upper bits represent the lap.
+    ///
+    /// Elements are pushed into the tail of the queue.
+    tail: CachePadded<AtomicTargetSize>,
+
+    /// A stamp with the value of `{ lap: 1, index: 0 }`.
+    one_lap: UintSize,
+
+    /// The buffer holding slots.
+    buffer: UnsafeCell<S::Buffer<Slot<T>>>,
+}
+
+impl<T, S: Storage> QueueInner<T, S> {
+    /// Attempts to push an element into the queue.
+    ///
+    /// If the queue is full, the element is returned back as an error.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use heapless::mpmc::Queue;
+    /// const N: usize = 1;
+    ///
+    /// let q = Queue::new::<N>();
+    ///
+    /// assert_eq!(q.enqueue(10), Ok(()));
+    /// assert_eq!(q.enqueue(20), Err(20));
+    /// ```
+    pub fn enqueue(&self, value: T) -> Result<(), T> {
+        self.push_or_else(value, |v, tail, _, _| {
+            let head = self.head.load(Ordering::Relaxed);
+
+            // If the head lags one lap behind the tail as well...
+            if head.wrapping_add(self.one_lap) == tail as UintSize {
+                // ...then the queue is full.
+                Err(v)
+            } else {
+                Ok(v)
+            }
+        })
+    }
+
+    /// Returns the number of elements in the queue.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use heapless::mpmc::Queue;
+    /// const N: usize = 100;
+    ///
+    /// let q = Queue::new::<N>();
+    /// assert_eq!(q.len(), 0);
+    ///
+    /// q.enqueue(10).unwrap();
+    /// assert_eq!(q.len(), 1);
+    ///
+    /// q.enqueue(20).unwrap();
+    /// assert_eq!(q.len(), 2);
+    /// ```
+    pub fn len(&self) -> usize {
+        loop {
+            // Load the tail, then load the head.
+            let tail = self.tail.load(Ordering::SeqCst);
+            let head = self.head.load(Ordering::SeqCst);
+
+            // If the tail didn't change, we've got consistent values to work with
+
+            if self.tail.load(Ordering::SeqCst) == tail {
+                let hix = head & (self.one_lap - 1);
+                let tix = tail & (self.one_lap - 1);
+
+                return if hix < tix {
+                    usize::from(tix - hix)
+                } else if hix > tix {
+                    self.capacity() - usize::from(hix + tix)
+                } else if tail == head {
+                    0
+                } else {
+                    self.capacity()
+                };
+            }
+        }
+    }
+
+    fn as_ptr(&self) -> *const Slot<T> {
+        S::as_ptr(self.buffer.get() as *mut S::Buffer<Slot<T>>) as *const _
+    }
+
+    fn push_or_else<F>(&self, mut value: T, f: F) -> Result<(), T>
+    where
+        F: Fn(T, UintSize, UintSize, &Slot<T>) -> Result<T, T>,
+    {
+        let backoff = Backoff::new();
+        let mut tail = self.tail.load(Ordering::Relaxed);
+
+        loop {
+            // Deconstruct the tail.
+            let lap_mask = self.one_lap.wrapping_sub(1);
+            let index = usize::from(tail & lap_mask);
+            let lap = tail & !lap_mask;
+
+            let new_tail = if index + 1 < self.capacity() {
+                // Same lap, incremented index.
+                // Set to `{ lap: lap, index: index + 1 }`.
+                tail + 1
+            } else {
+                // One lap forward, index wraps around to zero.
+                // Set to `{ lap: lap.wrapping_add(1), index: 0 }`.
+                lap.wrapping_add(self.one_lap)
+            };
+
+            // Inspect the corresponding slot.
+            debug_assert!(index < self.capacity());
+            // SAFETY: index is a valid offset, and buffer is valid contiguous memory.
+            let slot = unsafe { &*self.as_ptr().add(index) };
+            let stamp = slot.stamp.load(Ordering::Acquire);
+
+            // If the tail and the stamp match, we may attempt to push.
+            if tail == stamp {
+                // Try moving the tail.
+                match self.tail.compare_exchange_weak(
+                    tail,
+                    new_tail,
+                    Ordering::SeqCst,
+                    Ordering::Relaxed,
+                ) {
+                    Ok(_) => {
+                        // Write the value into the slot and update the stamp.
+                        unsafe {
+                            slot.value.get().write(MaybeUninit::new(value));
+                        }
+                        slot.stamp.store(tail + 1, Ordering::Release);
+                        return Ok(());
+                    }
+                    Err(t) => {
+                        tail = t;
+                        backoff.spin();
+                    }
+                }
+            } else if stamp.wrapping_add(self.one_lap) == tail + 1 {
+                atomic::fence(Ordering::SeqCst);
+                value = f(value, tail, new_tail, slot)?;
+                backoff.spin();
+                tail = self.tail.load(Ordering::Relaxed);
+            } else {
+                // Snooze because we need to wait for the stamp to get updated.
+                backoff.snooze();
+                tail = self.tail.load(Ordering::Relaxed);
+            }
+        }
+    }
+
+    /// Pushes an element into the queue, replacing the oldest element if necessary.
+    ///
+    /// If the queue is full, the oldest element is replaced and returned,
+    /// otherwise `None` is returned.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use heapless::mpmc::Queue;
+    /// const N: usize = 2;
+    ///
+    /// let q = Queue::new::<N>();
+    ///
+    /// assert_eq!(q.force_enqueue(10), None);
+    /// assert_eq!(q.force_enqueue(20), None);
+    /// assert_eq!(q.force_enqueue(30), Some(10));
+    /// assert_eq!(q.dequeue(), Some(20));
+    /// ```
+    pub fn force_enqueue(&self, value: T) -> Option<T> {
+        self.push_or_else(value, |v, tail, new_tail, slot| {
+            let head = (tail as UintSize).wrapping_sub(self.one_lap);
+            let new_head = (new_tail as UintSize).wrapping_sub(self.one_lap);
+
+            // Try moving the head.
+            if self
+                .head
+                .compare_exchange_weak(head, new_head, Ordering::SeqCst, Ordering::Relaxed)
+                .is_ok()
+            {
+                // Move the tail.
+                self.tail.store(new_tail, Ordering::SeqCst);
+
+                // Swap the previous value.
+                let old = unsafe { slot.value.get().replace(MaybeUninit::new(v)).assume_init() };
+
+                // Update the stamp.
+                slot.stamp.store(tail + 1, Ordering::Release);
+
+                Err(old)
+            } else {
+                Ok(v)
+            }
+        })
+        .err()
+    }
+
+    /// Attempts to pop an element from the queue.
+    ///
+    /// If the queue is empty, `None` is returned.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use heapless::mpmc::Queue;
+    /// const N: usize = 1;
+    ///
+    /// let q = Queue::new::<N>();
+    /// assert_eq!(q.enqueue(10), Ok(()));
+    ///
+    /// assert_eq!(q.dequeue(), Some(10));
+    /// assert!(q.dequeue().is_none());
+    /// ```
+    pub fn dequeue(&self) -> Option<T> {
+        let backoff = Backoff::new();
+        let mut head = self.head.load(Ordering::Relaxed);
+
+        loop {
+            // Deconstruct the head.
+            let lap_mask = self.one_lap.wrapping_sub(1);
+            let index = usize::from(head & lap_mask);
+            let lap = head & !lap_mask;
+
+            // Inspect the corresponding slot.
+            debug_assert!(index < self.capacity());
+            // SAFETY: index is a valid offset, and buffer is valid contiguous memory.
+            let slot = unsafe { &*self.as_ptr().add(index) };
+            let stamp = slot.stamp.load(Ordering::Acquire);
+
+            // If the stamp is ahead of the head by 1, we may attempt to pop.
+            if head + 1 == stamp {
+                let new = if index + 1 < self.capacity() {
+                    // Same lap, incremented index.
+                    // Set to `{ lap: lap, index: index + 1 }`.
+                    head + 1
+                } else {
+                    // One lap forward, index wraps around to zero.
+                    // Set to `{ lap: lap.wrapping_add(1), index: 0 }`.
+                    lap.wrapping_add(self.one_lap)
+                };
+
+                // Try moving the head.
+                match self.head.compare_exchange_weak(
+                    head,
+                    new,
+                    Ordering::SeqCst,
+                    Ordering::Relaxed,
+                ) {
+                    Ok(_) => {
+                        // Read the value from the slot and update the stamp.
+                        let msg = unsafe { slot.value.get().read().assume_init() };
+                        slot.stamp
+                            .store(head.wrapping_add(self.one_lap), Ordering::Release);
+                        return Some(msg);
+                    }
+                    Err(h) => {
+                        head = h;
+                        backoff.spin();
+                    }
+                }
+            } else if stamp == head {
+                atomic::fence(Ordering::SeqCst);
+                let tail = self.tail.load(Ordering::Relaxed);
+
+                // If the tail equals the head, that means the channel is empty.
+                if tail == head {
+                    return None;
+                }
+
+                backoff.spin();
+                head = self.head.load(Ordering::Relaxed);
+            } else {
+                // Snooze because we need to wait for the stamp to get updated.
+                backoff.snooze();
+                head = self.head.load(Ordering::Relaxed);
+            }
+        }
+    }
+
+    /// Returns the maximum number of elements the queue can hold.
+    #[inline]
+    pub fn capacity(&self) -> usize {
+        S::len(self.buffer.get())
+    }
+
+    /// Get a reference to the `Queue`, erasing the `N` const-generic.
+    ///
+    ///
+    /// ```rust
+    /// # use heapless::mpmc::{Queue, QueueView};
+    /// let queue: Queue<u8, 2> = Queue::new();
+    /// let view: &QueueView<u8> = queue.as_view();
+    /// ```
+    ///
+    /// It is often preferable to do the same through type coerction, since `Queue<T, N>` implements `Unsize<QueueView<T>>`:
+    ///
+    /// ```rust
+    /// # use heapless::mpmc::{Queue, QueueView};
+    /// let queue: Queue<u8, 2> = Queue::new();
+    /// let view: &QueueView<u8> = &queue;
+    /// ```
+    #[inline]
+    pub fn as_view(&self) -> &QueueView<T> {
+        S::as_mpmc_view(self)
+    }
+
+    /// Get a mutable reference to the `Queue`, erasing the `N` const-generic.
+    ///
+    /// ```rust
+    /// # use heapless::mpmc::{Queue, QueueView};
+    /// let mut queue: Queue<u8, 2> = Queue::new();
+    /// let view: &mut QueueView<u8> = queue.as_mut_view();
+    /// ```
+    ///
+    /// It is often preferable to do the same through type coerction, since `Queue<T, N>` implements `Unsize<QueueView<T>>`:
+    ///
+    /// ```rust
+    /// # use heapless::mpmc::{Queue, QueueView};
+    /// let mut queue: Queue<u8, 2> = Queue::new();
+    /// let view: &mut QueueView<u8> = &mut queue;
+    /// ```
+    #[inline]
+    pub fn as_mut_view(&mut self) -> &mut QueueView<T> {
+        S::as_mpmc_mut_view(self)
+    }
+}
+
+impl<T, S: Storage> Drop for QueueInner<T, S> {
+    fn drop(&mut self) {
+        while self.dequeue().is_some() {}
+    }
+}
+
+unsafe impl<T: Send, const N: usize> Sync for Queue<T, N> {}
+unsafe impl<T: Send, const N: usize> Send for Queue<T, N> {}
+
+impl<T, const N: usize> UnwindSafe for Queue<T, N> {}
+impl<T, const N: usize> RefUnwindSafe for Queue<T, N> {}
+
+impl<T, const N: usize> Queue<T, N> {
+    const _MIN_SIZE: () = assert!(N > 1, "capacity must be at least two");
+    const _IS_POW2: () = assert!(N.is_power_of_two(), "capacity must be power of two");
+    const _CAP_MAX: () = assert!(N < UintSize::MAX as usize, "capacity maximum exceeded");
+
+    /// Creates a new bounded queue with the given capacity.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the capacity is zero.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use heapless::mpmc::Queue;
+    /// const N: usize = 100;
+    ///
+    /// let q = Queue::<i32, N>::new();
+    /// ```
+    #[cfg(not(loom))]
+    pub const fn new() -> Self {
+        // Head is initialized to `{ lap: 0, index: 0 }`.
+        // Tail is initialized to `{ lap: 0, index: 0 }`.
+        let head = 0;
+        let tail = 0;
+
+        // Allocate a buffer of `cap` slots initialized
+        // with stamps.
+        let mut slot_count = 0usize;
+        let mut buffer: [Slot<T>; N] = [const { Slot::<T>::new() }; N];
+        while slot_count < N {
+            // Set the stamp to `{ lap: 0, index: i }`.
+            buffer[slot_count] = Slot::create_uninit(slot_count as UintSize);
+            slot_count += 1;
+        }
+
+        // One lap is the smallest power of two greater than `cap`.
+        let one_lap = (N + 1).next_power_of_two() as UintSize;
+
+        Self {
+            buffer: UnsafeCell::new(buffer),
+            one_lap,
+            head: CachePadded::new(AtomicTargetSize::new(head)),
+            tail: CachePadded::new(AtomicTargetSize::new(tail)),
+        }
+    }
+
+    /// Creates a new [Queue].
+    #[cfg(loom)]
+    pub fn new() -> Self {
+        // Head is initialized to `{ lap: 0, index: 0 }`.
+        // Tail is initialized to `{ lap: 0, index: 0 }`.
+        let head = 0;
+        let tail = 0;
+
+        // Allocate a buffer of `cap` slots initialized
+        // with stamps.
+        let mut buffer: [Slot<T>; N] =
+            core::array::from_fn(|slot_count| Slot::<T>::create_uninit(slot_count as UintSize));
+
+        // One lap is the smallest power of two greater than `cap`.
+        let one_lap = (N + 1).next_power_of_two() as UintSize;
+
+        Self {
+            buffer: UnsafeCell::new(buffer),
+            one_lap,
+            head: CachePadded::new(AtomicTargetSize::new(head)),
+            tail: CachePadded::new(AtomicTargetSize::new(tail)),
+        }
+    }
+
+    /// Returns `true` if the queue is empty.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use heapless::mpmc::Queue;
+    /// const N: usize = 100;
+    ///
+    /// let q = Queue::new::<N>();
+    ///
+    /// assert!(q.is_empty());
+    /// q.push(1).unwrap();
+    /// assert!(!q.is_empty());
+    /// ```
+    pub fn is_empty(&self) -> bool {
+        let head = self.head.load(Ordering::SeqCst);
+        let tail = self.tail.load(Ordering::SeqCst);
+
+        // Is the tail lagging one lap behind head?
+        // Is the tail equal to the head?
+        //
+        // Note: If the head changes just before we load the tail, that means there was a moment
+        // when the channel was not empty, so it is safe to just return `false`.
+        tail == head
+    }
+
+    /// Returns `true` if the queue is full.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use heapless::mpmc::Queue;
+    /// const N: usize = 1;
+    ///
+    /// let q = Queue::new::<N>();
+    ///
+    /// assert!(!q.is_full());
+    /// q.enqueue(1).unwrap();
+    /// assert!(q.is_full());
+    /// ```
+    pub fn is_full(&self) -> bool {
+        let tail = self.tail.load(Ordering::SeqCst);
+        let head = self.head.load(Ordering::SeqCst);
+
+        // Is the head lagging one lap behind tail?
+        //
+        // Note: If the tail changes just before we load the head, that means there was a moment
+        // when the queue was not full, so it is safe to just return `false`.
+        head.wrapping_add(self.one_lap) == tail
+    }
+
+    /// Used in `Storage` implementation.
+    pub(crate) fn as_view_private(&self) -> &QueueView<T> {
+        self
+    }
+    /// Used in `Storage` implementation.
+    pub(crate) fn as_view_mut_private(&mut self) -> &mut QueueView<T> {
+        self
+    }
+}
+
+impl<T, const N: usize> fmt::Debug for Queue<T, N> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.pad("Queue { .. }")
+    }
+}
+
+impl<T, const N: usize> IntoIterator for Queue<T, N> {
+    type Item = T;
+
+    type IntoIter = IntoIter<T, N>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        IntoIter { value: self }
+    }
+}
+
+/// Represents the iterator container for implementing the [`Iterator`] trait for [Queue].
+#[derive(Debug)]
+pub struct IntoIter<T, const N: usize> {
+    value: Queue<T, N>,
+}
+
+impl<T, const N: usize> Iterator for IntoIter<T, N> {
+    type Item = T;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let value = &mut self.value;
+        let head = value.head.load(Ordering::Relaxed);
+        if value.head.load(Ordering::Relaxed) != value.tail.load(Ordering::Relaxed) {
+            let index = usize::from(head & (value.one_lap - 1));
+            let lap = head & !(value.one_lap - 1);
+            // SAFETY: We have mutable access to this, so we can read without
+            // worrying about concurrency. Furthermore, we know this is
+            // initialized because it is the value pointed at by `value.head`
+            // and this is a non-empty queue.
+            let val = unsafe {
+                debug_assert!(index < N);
+                let slot = (&mut *value.buffer.get_mut()).get_unchecked_mut(index);
+                slot.value.get().read().assume_init()
+            };
+            let new = if index + 1 < value.capacity() {
+                // Same lap, incremented index.
+                // Set to `{ lap: lap, index: index + 1 }`.
+                head + 1
+            } else {
+                // One lap forward, index wraps around to zero.
+                // Set to `{ lap: lap.wrapping_add(1), index: 0 }`.
+                lap.wrapping_add(value.one_lap)
+            };
+            value.head.store(new, Ordering::Release);
+            Some(val)
+        } else {
+            None
+        }
+    }
+}
diff --git a/src/mpmc/nblfq.rs b/src/mpmc/nblfq.rs
new file mode 100644
index 0000000000..2206317317
--- /dev/null
+++ b/src/mpmc/nblfq.rs
@@ -0,0 +1,549 @@
+use core::cell::UnsafeCell;
+use core::mem::MaybeUninit;
+
+use crate::storage::{OwnedStorage, Storage};
+
+use super::atomic::{AtomicPtr, Ordering};
+use super::{AtomicTargetSize, QueueView, UintSize};
+
+/// Represents the byte-size of the sequence word.
+const W: usize = usize::MAX;
+
+/// Represents the inner [Queue] entry slot.
+///
+/// Provides safe wrappers around the dangerous `unsafe` bits.
+#[derive(Debug)]
+pub struct Slot<T> {
+    sequence: AtomicTargetSize,
+    ptr: AtomicPtr<T>,
+    elem: UnsafeCell<Option<MaybeUninit<T>>>,
+}
+
+impl<T> Slot<T> {
+    /// Creates a new [Slot].
+    #[cfg(not(loom))]
+    pub const fn new() -> Self {
+        Self {
+            sequence: AtomicTargetSize::new(0),
+            ptr: AtomicPtr::new(core::ptr::null_mut()),
+            elem: UnsafeCell::new(None),
+        }
+    }
+
+    /// Creates a new [Slot].
+    #[cfg(loom)]
+    pub fn new() -> Self {
+        Self {
+            sequence: AtomicTargetSize::new(0),
+            ptr: AtomicPtr::new(core::ptr::null_mut()),
+            elem: UnsafeCell::new(None),
+        }
+    }
+
+    /// Creates a new [Slot].
+    #[cfg(not(loom))]
+    pub const fn new_sequence(sequence: UintSize) -> Self {
+        Self {
+            sequence: AtomicTargetSize::new(sequence),
+            ptr: AtomicPtr::new(core::ptr::null_mut()),
+            elem: UnsafeCell::new(None),
+        }
+    }
+
+    /// Creates a new [Slot].
+    #[cfg(loom)]
+    pub fn new_sequence(sequence: UintSize) -> Self {
+        Self {
+            sequence: AtomicTargetSize::new(sequence),
+            ptr: AtomicPtr::new(core::ptr::null_mut()),
+            elem: UnsafeCell::new(None),
+        }
+    }
+
+    /// Creates a new [Slot] from the provided parameters.
+    pub fn create(sequence: UintSize, elem: T) -> Self {
+        let s = Self {
+            sequence: AtomicTargetSize::new(sequence),
+            ptr: AtomicPtr::new(core::ptr::null_mut()),
+            elem: UnsafeCell::new(Some(MaybeUninit::new(elem))),
+        };
+
+        // SAFETY: we just initialized the element, the pointer references valid memory.
+        s.ptr.store(unsafe { s.elem_mut_ptr() }, Ordering::Release);
+
+        s
+    }
+
+    /// Gets a reference to the sequence.
+    pub const fn sequence(&self) -> &AtomicTargetSize {
+        &self.sequence
+    }
+
+    /// Gets the sequence.
+    pub fn load_sequence(&self) -> UintSize {
+        self.sequence.load(Ordering::Acquire)
+    }
+
+    /// Sets the sequence.
+    pub fn store_sequence(&self, seq: UintSize) {
+        self.sequence.store(seq, Ordering::Release);
+    }
+
+    /// Loads the [Slot] element.
+    ///
+    /// Returns:
+    ///
+    /// - `Some(T)`: if the [Slot] has been initialized
+    /// - `None`: if the [Slot] is empty.
+    pub fn load_element(&self) -> Option<&T> {
+        let ptr = self.ptr.load(Ordering::Acquire);
+        if ptr.is_null() {
+            None
+        } else {
+            // SAFETY: if non-null, `ptr` references an owned element.
+            unsafe { Some(&*ptr) }
+        }
+    }
+
+    /// Stores the [Slot] sequence and element.
+    pub fn store_element(&self, sequence: UintSize, elem: T) {
+        self.sequence.store(sequence, Ordering::Release);
+        // SAFETY: we run a slight risk of overwriting an entry at the same position.
+        // However, we are not in danger of undefined behavior.
+        unsafe { (&mut *self.elem.get()).replace(MaybeUninit::new(elem)) };
+        // SAFETY: we just iniitialized the element, so the pointer references valid memory.
+        self.ptr
+            .store(unsafe { self.elem_mut_ptr() }, Ordering::Release);
+    }
+
+    /// Gets a mutable pointer to the element.
+    ///
+    /// # Safety
+    ///
+    /// Caller must ensure exclusive access to the element.
+    #[inline]
+    unsafe fn elem_mut_ptr(&self) -> *mut T {
+        unsafe { (&mut *self.elem.get()).as_mut().unwrap().as_mut_ptr() }
+    }
+
+    /// Clears the [Slot].
+    ///
+    /// Calls the destructor of the stored element, if it exists.
+    pub fn clear(&self) -> Option<T> {
+        self.sequence.store(0, Ordering::Release);
+
+        if self.ptr.load(Ordering::Acquire).is_null() {
+            None
+        } else {
+            self.ptr.store(core::ptr::null_mut(), Ordering::Release);
+            // SAFETY: we only call `assume_init` on a populated element.
+            unsafe { (&mut *self.elem.get()).take().map(|t| t.assume_init()) }
+        }
+    }
+
+    /// Performs a compare-and-exchange (CAS) operation on the [Slot].
+    ///
+    /// Returns:
+    ///
+    /// - `Ok(Slot)`: returns the previous held entry on success
+    /// - `Err(Slot)`: returns the replacement entry on error.
+    pub fn compare_exchange(&self, cmp: &Self, rep: Self) -> Result<Self, Self> {
+        let Ok(s) = self.sequence.compare_exchange(
+            cmp.load_sequence(),
+            rep.load_sequence(),
+            Ordering::AcqRel,
+            Ordering::Acquire,
+        ) else {
+            return Err(rep);
+        };
+
+        let Ok(p) = self.ptr.compare_exchange(
+            cmp.ptr.load(Ordering::Acquire),
+            rep.ptr.load(Ordering::Acquire),
+            Ordering::AcqRel,
+            Ordering::Acquire,
+        ) else {
+            self.sequence.store(s, Ordering::Release);
+            return Err(rep);
+        };
+
+        let t = if p.is_null() {
+            None
+        } else {
+            unsafe { (&mut *self.elem.get()).take() }
+        };
+
+        if !rep.ptr.load(Ordering::Acquire).is_null() {
+            // SAFETY: we only call `assume_init` on a populated element.
+            unsafe {
+                if let Some(elem) = rep.elem.into_inner() {
+                    (&mut *self.elem.get()).replace(MaybeUninit::new(elem.assume_init()));
+                }
+            };
+        }
+
+        if let Some(elem) = t {
+            // SAFETY: we only call `assume_init` on a populated element.
+            Ok(Self::create(s, unsafe { elem.assume_init() }))
+        } else {
+            Ok(Self::new_sequence(s))
+        }
+    }
+
+    /// Gets whether the [Slot] is empty.
+    pub fn is_empty(&self) -> bool {
+        self.ptr.load(Ordering::Relaxed).is_null() && unsafe { (&*self.elem.get()).is_none() }
+    }
+
+    /// Compares if [Slot] at position `i` is before [Slot] at position `j`.
+    pub fn comp(&self, i: UintSize, oth: &Self, j: UintSize) -> bool {
+        let l_seq = self.load_sequence();
+        let r_seq = oth.load_sequence();
+
+        if l_seq == r_seq {
+            i < j
+        } else {
+            r_seq.wrapping_add(W as UintSize).wrapping_sub(l_seq) < (1 << (UintSize::BITS - 1))
+        }
+    }
+
+    /// Destructs the [Slot] into its inner element.
+    ///
+    /// Returns:
+    ///
+    /// - `Some(T)`: if the [Slot] was initialized.
+    /// - `None`: if the [Slot] was uninitialized.
+    pub fn into_inner(self) -> Option<T> {
+        let ptr = self.ptr.load(Ordering::Acquire);
+        if ptr.is_null() {
+            None
+        } else {
+            // SAFETY: if non-null, `ptr` references an owned element.
+            unsafe { self.elem.into_inner().map(|e| e.assume_init()) }
+        }
+    }
+}
+
+impl<T> Default for Slot<T> {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl<T: PartialEq> PartialEq for Slot<T> {
+    fn eq(&self, oth: &Self) -> bool {
+        self.load_sequence() == oth.load_sequence() && self.load_element() == oth.load_element()
+    }
+}
+
+/// Implement the `Sync` marker trait to share `Slot` across threads.
+///
+/// # Safety
+///
+/// All mutability is handled through lock-free guards implemented with atomic operations.
+unsafe impl<T> Sync for Slot<T> where T: Sync {}
+/// Implement the `Send` marker trait to share `Slot` across threads.
+///
+/// # Safety
+///
+/// All mutability is handled through lock-free guards implemented with atomic operations.
+unsafe impl<T> Send for Slot<T> where T: Send {}
+
+/// Base struct for [`Queue`] and [`QueueView`], generic over the [`Storage`].
+///
+/// In most cases you should use [`Queue`] or [`QueueView`] directly. Only use this
+/// struct if you want to write code that's generic over both.
+pub struct QueueInner<T, S: Storage> {
+    head: AtomicTargetSize,
+    tail: AtomicTargetSize,
+    buffer: UnsafeCell<S::Buffer<Slot<T>>>,
+}
+
+impl<T, S: Storage> QueueInner<T, S> {
+    fn buffer(&self) -> &[Slot<T>] {
+        // SAFETY: buffer is initialized properly, and the pointer references valid memory.
+        unsafe { core::slice::from_raw_parts(S::as_ptr(self.buffer.get()), self.capacity()) }
+    }
+
+    /// Returns the maximum number of elements the queue can hold.
+    #[inline]
+    pub fn capacity(&self) -> usize {
+        S::len(self.buffer.get())
+    }
+
+    /// Gets the [Self] length.
+    pub fn len(&self) -> usize {
+        let mut len = 0;
+        while len < self.capacity() && !self.buffer()[len].is_empty() {
+            len = len.saturating_add(1);
+        }
+        len
+    }
+
+    /// Gets whether the [Self] is empty.
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    /// Get a reference to the `Queue`, erasing the `N` const-generic.
+    ///
+    ///
+    /// ```rust
+    /// # use heapless::mpmc::{Queue, QueueView};
+    /// let queue: Queue<u8, 2> = Queue::new();
+    /// let view: &QueueView<u8> = queue.as_view();
+    /// ```
+    ///
+    /// It is often preferable to do the same through type coerction, since `Queue<T, N>` implements `Unsize<QueueView<T>>`:
+    ///
+    /// ```rust
+    /// # use heapless::mpmc::{Queue, QueueView};
+    /// let queue: Queue<u8, 2> = Queue::new();
+    /// let view: &QueueView<u8> = &queue;
+    /// ```
+    #[inline]
+    pub fn as_view(&self) -> &QueueView<T> {
+        S::as_mpmc_view(self)
+    }
+
+    /// Get a mutable reference to the `Queue`, erasing the `N` const-generic.
+    ///
+    /// ```rust
+    /// # use heapless::mpmc::{Queue, QueueView};
+    /// let mut queue: Queue<u8, 2> = Queue::new();
+    /// let view: &mut QueueView<u8> = queue.as_mut_view();
+    /// ```
+    ///
+    /// It is often preferable to do the same through type coerction, since `Queue<T, N>` implements `Unsize<QueueView<T>>`:
+    ///
+    /// ```rust
+    /// # use heapless::mpmc::{Queue, QueueView};
+    /// let mut queue: Queue<u8, 2> = Queue::new();
+    /// let view: &mut QueueView<u8> = &mut queue;
+    /// ```
+    #[inline]
+    pub fn as_mut_view(&mut self) -> &mut QueueView<T> {
+        S::as_mpmc_mut_view(self)
+    }
+
+    /// Adds an `item` to the end of the queue.
+    ///
+    /// Returns back the `item` if the queue is full.
+    pub fn enqueue(&self, item: T) -> Result<(), T> {
+        let mut patience: usize = self.capacity();
+
+        let mut res = Ok(());
+        let mut new_slot = Slot::create(0, item);
+
+        'retry: while {
+            let c = patience > 0;
+            patience = patience.saturating_sub(1);
+            c
+        } {
+            let mut pos = self.load_head();
+            let mut prev_pos = self.prev(pos);
+            let mut slot;
+            let mut prev_slot = &self.buffer()[prev_pos as usize];
+
+            let mut chase_patience = self.capacity();
+            'chase: while {
+                let c = chase_patience > 0;
+                chase_patience = chase_patience.saturating_sub(1);
+                c
+            } {
+                slot = &self.buffer()[pos as usize];
+                prev_pos = self.prev(pos);
+                prev_slot = &self.buffer()[prev_pos as usize];
+
+                if prev_slot.load_element().is_some() && slot.load_element().is_none() {
+                    /* null cell, non-empty predecessor */
+                    break 'chase;
+                }
+
+                if !prev_slot.comp(prev_pos, slot, pos) {
+                    /* found step */
+                    if prev_slot.load_element().is_none() && slot.load_element().is_none() {
+                        /* empty list */
+                        break 'chase;
+                    }
+
+                    if prev_slot.load_element().is_some() && slot.load_element().is_some() {
+                        /* full list */
+                        res = Err(());
+                        break 'retry;
+                    }
+                }
+
+                pos = pos.wrapping_add(1) % self.capacity() as UintSize;
+            }
+
+            let mut seq = prev_slot.load_sequence();
+
+            if prev_slot.load_element().is_none() {
+                seq = seq.wrapping_add(W as UintSize);
+            }
+
+            if pos == 0 {
+                seq = seq.wrapping_add(1);
+            }
+
+            new_slot.store_sequence(seq);
+
+            match self.buffer()[pos as usize].compare_exchange(&Slot::new_sequence(seq), new_slot) {
+                Ok(_) => {
+                    self.store_head((pos + 1) % self.capacity() as UintSize);
+                    return Ok(());
+                }
+                Err(v) => new_slot = v,
+            }
+        }
+
+        // SAFETY: we only call `unwrap` and `assume_init` on populated element.
+        res.map_err(|_| new_slot.into_inner().unwrap())
+    }
+
+    /// Returns the item in the front of the queue, or `None` if the queue is empty.
+    pub fn dequeue(&self) -> Option<T> {
+        let mut patience = self.capacity();
+
+        /* retry loop */
+        while {
+            let c = patience > 0;
+            patience = patience.saturating_sub(1);
+            c
+        } {
+            let mut tail = self.load_tail();
+            let prev_tail = self.prev(tail);
+
+            let mut prev_slot = &self.buffer()[prev_tail as usize];
+            let mut slot = &self.buffer()[tail as usize];
+
+            /* chase the tail */
+            while prev_slot.comp(prev_tail, slot, tail) {
+                tail = tail.wrapping_add(1) % self.capacity() as UintSize;
+                prev_slot = slot;
+                slot = &self.buffer()[tail as usize];
+            }
+
+            if prev_slot.load_element().is_none() && slot.load_element().is_none() {
+                /* empty queue */
+                return None;
+            }
+
+            let seq = slot.load_sequence().wrapping_add(1);
+
+            if let Ok(slot) =
+                self.buffer()[tail as usize].compare_exchange(slot, Slot::new_sequence(seq))
+            {
+                self.store_tail(tail.wrapping_add(1) % self.capacity() as UintSize);
+                return slot.clear();
+            }
+        }
+
+        None
+    }
+
+    /// Helper function to get the previous queue position.
+    pub fn prev(&self, i: UintSize) -> UintSize {
+        let n = self.capacity() as UintSize;
+        (i + n - 1) % n
+    }
+
+    /// Gets a reference to the head position.
+    pub const fn head(&self) -> &AtomicTargetSize {
+        &self.head
+    }
+
+    /// Gets the head position.
+    pub fn load_head(&self) -> UintSize {
+        self.head.load(Ordering::Acquire)
+    }
+
+    /// Sets the head position.
+    pub fn store_head(&self, head: UintSize) {
+        self.head.store(head, Ordering::Release);
+    }
+
+    /// Gets a reference to the tail position.
+    pub const fn tail(&self) -> &AtomicTargetSize {
+        &self.tail
+    }
+
+    /// Gets the tail position.
+    pub fn load_tail(&self) -> UintSize {
+        self.tail.load(Ordering::Acquire)
+    }
+
+    /// Sets the tail position.
+    pub fn store_tail(&self, tail: UintSize) {
+        self.tail.store(tail, Ordering::Release);
+    }
+}
+
+/// Implement the `Sync` marker trait to share `Slot` across threads.
+///
+/// # Safety
+///
+/// All mutability is handled through lock-free guards implemented with atomic operations.
+unsafe impl<T, S: Storage> Sync for QueueInner<T, S> where T: Sync {}
+/// Implement the `Send` marker trait to share `Slot` across threads.
+///
+/// # Safety
+///
+/// All mutability is handled through lock-free guards implemented with atomic operations.
+unsafe impl<T, S: Storage> Send for QueueInner<T, S> where T: Send {}
+
+/// A statically allocated multi-producer, multi-consumer queue with a capacity of `N` elements.
+///
+/// <div class="warning">
+///
+/// `N` must be a power of 2.
+///
+/// </div>
+///
+/// The maximum value of `N` is 128 if the `mpmc_large` feature is not enabled.
+pub type Queue<T, const N: usize> = QueueInner<T, OwnedStorage<N>>;
+
+impl<T, const N: usize> Queue<T, N> {
+    /// Creates a new [Queue].
+    #[cfg(not(loom))]
+    pub const fn new() -> Self {
+        Self {
+            head: AtomicTargetSize::new(0),
+            tail: AtomicTargetSize::new(0),
+            buffer: UnsafeCell::new([const { Slot::new() }; N]),
+        }
+    }
+
+    /// Creates a new [Queue].
+    #[cfg(loom)]
+    pub fn new() -> Self {
+        Self {
+            head: AtomicTargetSize::new(0),
+            tail: AtomicTargetSize::new(0),
+            buffer: UnsafeCell::new(core::array::from_fn(|_| Slot::new())),
+        }
+    }
+
+    /// Used in `Storage` implementation.
+    pub(crate) fn as_view_private(&self) -> &QueueView<T> {
+        self
+    }
+    /// Used in `Storage` implementation.
+    pub(crate) fn as_view_mut_private(&mut self) -> &mut QueueView<T> {
+        self
+    }
+}
+
+impl<T, const N: usize> Default for Queue<T, N> {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl<T, S: Storage> Drop for QueueInner<T, S> {
+    fn drop(&mut self) {
+        // Drop all elements currently in the queue.
+        while self.dequeue().is_some() {}
+    }
+}
diff --git a/src/mpmc/original.rs b/src/mpmc/original.rs
new file mode 100644
index 0000000000..bc2ac38607
--- /dev/null
+++ b/src/mpmc/original.rs
@@ -0,0 +1,311 @@
+use core::{cell::UnsafeCell, mem::MaybeUninit};
+
+use crate::storage::{OwnedStorage, Storage};
+
+use super::{atomic, AtomicTargetSize, IntSize, QueueView, UintSize};
+use atomic::Ordering;
+
+/// Base struct for [`Queue`] and [`QueueView`], generic over the [`Storage`].
+///
+/// In most cases you should use [`Queue`] or [`QueueView`] directly. Only use this
+/// struct if you want to write code that's generic over both.
+pub struct QueueInner<T, S: Storage> {
+    dequeue_pos: AtomicTargetSize,
+    enqueue_pos: AtomicTargetSize,
+    buffer: UnsafeCell<S::Buffer<Cell<T>>>,
+}
+
+/// A statically allocated multi-producer, multi-consumer queue with a capacity of `N` elements.
+///
+/// <div class="warning">
+///
+/// `N` must be a power of 2.
+///
+/// </div>
+///
+/// The maximum value of `N` is 128 if the `mpmc_large` feature is not enabled.
+///
+/// <div class="warning">
+///
+/// This implementation is not fully lock-free. If a thread or task gets preempted during
+/// a `dequeue` or `enqueue` operation, it may prevent other operations from succeeding until
+/// it's scheduled again to finish its operation.
+///
+/// See <https://github.com/rust-embedded/heapless/issues/583> for more details.
+///
+/// </div>
+pub type Queue<T, const N: usize> = QueueInner<T, OwnedStorage<N>>;
+
+impl<T, const N: usize> Queue<T, N> {
+    #[cfg(not(loom))]
+    /// Creates an empty queue.
+    pub const fn new() -> Self {
+        const {
+            assert!(N > 1);
+            assert!(N.is_power_of_two());
+            assert!(N < UintSize::MAX as usize);
+        }
+
+        let mut cell_count = 0;
+
+        let mut result_cells: [Cell<T>; N] = [const { Cell::new(0) }; N];
+        while cell_count != N {
+            result_cells[cell_count] = Cell::new(cell_count);
+            cell_count += 1;
+        }
+
+        Self {
+            buffer: UnsafeCell::new(result_cells),
+            dequeue_pos: AtomicTargetSize::new(0),
+            enqueue_pos: AtomicTargetSize::new(0),
+        }
+    }
+
+    /// Creates an empty queue.
+    #[cfg(loom)]
+    pub fn new() -> Self {
+        use core::array;
+
+        const {
+            assert!(N > 1);
+            assert!(N.is_power_of_two());
+            assert!(N < UintSize::MAX as usize);
+        }
+
+        let result_cells: [Cell<T>; N] = array::from_fn(|idx| Cell::new(idx));
+
+        Self {
+            buffer: UnsafeCell::new(result_cells),
+            dequeue_pos: AtomicTargetSize::new(0),
+            enqueue_pos: AtomicTargetSize::new(0),
+        }
+    }
+
+    /// Used in `Storage` implementation.
+    pub(crate) fn as_view_private(&self) -> &QueueView<T> {
+        self
+    }
+    /// Used in `Storage` implementation.
+    pub(crate) fn as_view_mut_private(&mut self) -> &mut QueueView<T> {
+        self
+    }
+}
+
+impl<T, S: Storage> QueueInner<T, S> {
+    fn buffer(&self) -> &[Cell<T>] {
+        // SAFETY: buffer is initialized properly, and the pointer references valid memory.
+        unsafe { core::slice::from_raw_parts(S::as_ptr(self.buffer.get()), self.capacity()) }
+    }
+
+    /// Returns the maximum number of elements the queue can hold.
+    #[inline]
+    pub fn capacity(&self) -> usize {
+        S::len(self.buffer.get())
+    }
+
+    /// Gets the [Self] length.
+    pub fn len(&self) -> usize {
+        let mut len = 0;
+        while len < self.capacity() && !self.buffer()[len].is_empty() {
+            len = len.saturating_add(1);
+        }
+        len
+    }
+
+    /// Gets whether the [Self] is empty.
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    /// Get a reference to the `Queue`, erasing the `N` const-generic.
+    ///
+    ///
+    /// ```rust
+    /// # use heapless::mpmc::{Queue, QueueView};
+    /// let queue: Queue<u8, 2> = Queue::new();
+    /// let view: &QueueView<u8> = queue.as_view();
+    /// ```
+    ///
+    /// It is often preferable to do the same through type coerction, since `Queue<T, N>` implements `Unsize<QueueView<T>>`:
+    ///
+    /// ```rust
+    /// # use heapless::mpmc::{Queue, QueueView};
+    /// let queue: Queue<u8, 2> = Queue::new();
+    /// let view: &QueueView<u8> = &queue;
+    /// ```
+    #[inline]
+    pub fn as_view(&self) -> &QueueView<T> {
+        S::as_mpmc_view(self)
+    }
+
+    /// Get a mutable reference to the `Queue`, erasing the `N` const-generic.
+    ///
+    /// ```rust
+    /// # use heapless::mpmc::{Queue, QueueView};
+    /// let mut queue: Queue<u8, 2> = Queue::new();
+    /// let view: &mut QueueView<u8> = queue.as_mut_view();
+    /// ```
+    ///
+    /// It is often preferable to do the same through type coerction, since `Queue<T, N>` implements `Unsize<QueueView<T>>`:
+    ///
+    /// ```rust
+    /// # use heapless::mpmc::{Queue, QueueView};
+    /// let mut queue: Queue<u8, 2> = Queue::new();
+    /// let view: &mut QueueView<u8> = &mut queue;
+    /// ```
+    #[inline]
+    pub fn as_mut_view(&mut self) -> &mut QueueView<T> {
+        S::as_mpmc_mut_view(self)
+    }
+
+    fn mask(&self) -> UintSize {
+        (S::len(self.buffer.get()) - 1) as _
+    }
+
+    /// Returns the item in the front of the queue, or `None` if the queue is empty.
+    pub fn dequeue(&self) -> Option<T> {
+        unsafe { dequeue(S::as_ptr(self.buffer.get()), &self.dequeue_pos, self.mask()) }
+    }
+
+    /// Adds an `item` to the end of the queue.
+    ///
+    /// Returns back the `item` if the queue is full.
+    pub fn enqueue(&self, item: T) -> Result<(), T> {
+        unsafe {
+            enqueue(
+                S::as_ptr(self.buffer.get()),
+                &self.enqueue_pos,
+                self.mask(),
+                item,
+            )
+        }
+    }
+}
+
+impl<T, const N: usize> Default for Queue<T, N> {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl<T, S: Storage> Drop for QueueInner<T, S> {
+    fn drop(&mut self) {
+        // Drop all elements currently in the queue.
+        while self.dequeue().is_some() {}
+    }
+}
+
+unsafe impl<T, S: Storage> Sync for QueueInner<T, S> where T: Send {}
+
+struct Cell<T> {
+    data: MaybeUninit<T>,
+    sequence: AtomicTargetSize,
+}
+
+impl<T> Cell<T> {
+    #[cfg(not(loom))]
+    const fn new(seq: usize) -> Self {
+        Self {
+            data: MaybeUninit::uninit(),
+            sequence: AtomicTargetSize::new(seq as UintSize),
+        }
+    }
+    #[cfg(loom)]
+    fn new(seq: usize) -> Self {
+        Self {
+            data: MaybeUninit::uninit(),
+            sequence: AtomicTargetSize::new(seq as UintSize),
+        }
+    }
+
+    pub(crate) fn is_empty(&self) -> bool {
+        self.sequence.load(Ordering::Relaxed) != 0
+    }
+}
+
+unsafe fn dequeue<T>(
+    buffer: *mut Cell<T>,
+    dequeue_pos: &AtomicTargetSize,
+    mask: UintSize,
+) -> Option<T> {
+    let mut pos = dequeue_pos.load(Ordering::Relaxed);
+
+    let mut cell;
+    loop {
+        cell = buffer.add(usize::from(pos & mask));
+        let seq = (*cell).sequence.load(Ordering::Acquire);
+        let dif = (seq as IntSize).wrapping_sub((pos.wrapping_add(1)) as IntSize);
+
+        match dif.cmp(&0) {
+            core::cmp::Ordering::Equal => {
+                if dequeue_pos
+                    .compare_exchange_weak(
+                        pos,
+                        pos.wrapping_add(1),
+                        Ordering::Relaxed,
+                        Ordering::Relaxed,
+                    )
+                    .is_ok()
+                {
+                    break;
+                }
+            }
+            core::cmp::Ordering::Less => {
+                return None;
+            }
+            core::cmp::Ordering::Greater => {
+                pos = dequeue_pos.load(Ordering::Relaxed);
+            }
+        }
+    }
+
+    let data = (*cell).data.as_ptr().read();
+    (*cell)
+        .sequence
+        .store(pos.wrapping_add(mask).wrapping_add(1), Ordering::Release);
+    Some(data)
+}
+
+unsafe fn enqueue<T>(
+    buffer: *mut Cell<T>,
+    enqueue_pos: &AtomicTargetSize,
+    mask: UintSize,
+    item: T,
+) -> Result<(), T> {
+    let mut pos = enqueue_pos.load(Ordering::Relaxed);
+
+    let mut cell;
+    loop {
+        cell = buffer.add(usize::from(pos & mask));
+        let seq = (*cell).sequence.load(Ordering::Acquire);
+        let dif = (seq as IntSize).wrapping_sub(pos as IntSize);
+
+        match dif.cmp(&0) {
+            core::cmp::Ordering::Equal => {
+                if enqueue_pos
+                    .compare_exchange_weak(
+                        pos,
+                        pos.wrapping_add(1),
+                        Ordering::Relaxed,
+                        Ordering::Relaxed,
+                    )
+                    .is_ok()
+                {
+                    break;
+                }
+            }
+            core::cmp::Ordering::Less => {
+                return Err(item);
+            }
+            core::cmp::Ordering::Greater => {
+                pos = enqueue_pos.load(Ordering::Relaxed);
+            }
+        }
+    }
+
+    (*cell).data.as_mut_ptr().write(item);
+    (*cell)
+        .sequence
+        .store(pos.wrapping_add(1), Ordering::Release);
+    Ok(())
+}
diff --git a/tests/tsan.rs b/tests/tsan.rs
index 14391e2435..00bf9d3361 100644
--- a/tests/tsan.rs
+++ b/tests/tsan.rs
@@ -1,5 +1,6 @@
 #![deny(rust_2018_compatibility)]
 #![deny(rust_2018_idioms)]
+#![cfg(not(loom))]
 
 use std::{ptr::addr_of_mut, thread};