From fd75de5a4a5bcb87b0ea8b7484350cea964fd5f2 Mon Sep 17 00:00:00 2001 From: Benjamin Woodruff Date: Thu, 6 Nov 2025 16:50:08 -0800 Subject: [PATCH 1/2] Turbopack: Add crate with bincode serialization helpers --- Cargo.lock | 56 +++- Cargo.toml | 3 + turbopack/crates/turbo-bincode/Cargo.toml | 19 ++ turbopack/crates/turbo-bincode/src/lib.rs | 383 ++++++++++++++++++++++ 4 files changed, 454 insertions(+), 7 deletions(-) create mode 100644 turbopack/crates/turbo-bincode/Cargo.toml create mode 100644 turbopack/crates/turbo-bincode/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 776aa300ab6869..2245c9c1d20aee 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -572,6 +572,24 @@ dependencies = [ "serde", ] +[[package]] +name = "bincode" +version = "2.0.1" +source = "git+https://github.com/bgw/bincode.git?branch=bgw%2Fpatches#19f09c5f6895d769883c10b3d374f761ab7fe83d" +dependencies = [ + "bincode_derive", + "serde", + "unty", +] + +[[package]] +name = "bincode_derive" +version = "2.0.1" +source = "git+https://github.com/bgw/bincode.git?branch=bgw%2Fpatches#19f09c5f6895d769883c10b3d374f761ab7fe83d" +dependencies = [ + "virtue", +] + [[package]] name = "bindgen" version = "0.70.1" @@ -1178,7 +1196,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "35584c5fcba8059780748866387fb97c5a203bcfc563fc3d0790af406727a117" dependencies = [ "anyhow", - "bincode", + "bincode 1.3.3", "colored", "glob", "libc", @@ -6749,9 +6767,9 @@ checksum = "03b634d87b960ab1a38c4fe143b508576f075e7c978bfad18217645ebfdfa2ec" [[package]] name = "smallvec" -version = "1.13.1" +version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" dependencies = [ "serde", ] @@ -9078,6 +9096,18 @@ dependencies = [ "utf-8", ] +[[package]] +name = "turbo-bincode" +version = "0.0.0" +dependencies = [ + "bincode 2.0.1", + "either", + "indexmap 2.9.0", + "mime", + "serde", + "serde_json", +] + [[package]] name = "turbo-dyn-eq-hash" version = "0.0.1" @@ -9150,7 +9180,7 @@ dependencies = [ name = "turbo-static" version = "0.1.0" dependencies = [ - "bincode", + "bincode 1.3.3", "clap", "ctrlc", "ignore", @@ -10230,6 +10260,12 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" +[[package]] +name = "unty" +version = "0.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d49784317cd0d1ee7ec5c716dd598ec5b4483ea832a2dced265471cc0f690ae" + [[package]] name = "url" version = "2.5.4" @@ -10412,7 +10448,7 @@ dependencies = [ "anyhow", "async-trait", "base64 0.22.1", - "bincode", + "bincode 1.3.3", "bytecheck 0.6.11", "bytes", "derive_more 2.0.1", @@ -10429,6 +10465,12 @@ dependencies = [ "virtual-mio", ] +[[package]] +name = "virtue" +version = "0.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "051eb1abcf10076295e815102942cc58f9d5e3b4560e46e53c21e8ff6f3af7b1" + [[package]] name = "vlq" version = "0.5.1" @@ -10799,7 +10841,7 @@ dependencies = [ "anyhow", "async-trait", "base64 0.22.1", - "bincode", + "bincode 1.3.3", "bytecheck 0.6.11", "bytes", "derive_more 2.0.1", @@ -10905,7 +10947,7 @@ dependencies = [ "anyhow", "async-trait", "base64 0.22.1", - "bincode", + "bincode 1.3.3", "blake3", "bus", "bytecheck 0.6.11", diff --git a/Cargo.toml b/Cargo.toml index 5a0f23cd74f0b0..b37a233c2f65e3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -293,6 +293,7 @@ next-taskless = { path = "crates/next-taskless" } # Turbopack auto-hash-map = { path = "turbopack/crates/turbo-tasks-auto-hash-map" } +turbo-bincode = { path = "turbopack/crates/turbo-bincode" } turbo-prehash = { path = "turbopack/crates/turbo-prehash" } turbo-rcstr = { path = "turbopack/crates/turbo-rcstr" } turbo-dyn-eq-hash = { path = "turbopack/crates/turbo-dyn-eq-hash" } @@ -359,6 +360,7 @@ preset_env_base = "6.0.0" # General Deps +bincode = { version = "2.0.1", features = ["serde"] } chromiumoxide = { version = "0.5.4", features = [ "tokio-runtime", ], default-features = false } @@ -481,4 +483,5 @@ webbrowser = "1.0.6" inventory = "0.3.21" [patch.crates-io] +bincode = { git = "https://github.com/bgw/bincode.git", branch = "bgw/patches" } mdxjs = { git = "https://github.com/mischnic/mdxjs-rs.git", branch = "swc-core-32" } diff --git a/turbopack/crates/turbo-bincode/Cargo.toml b/turbopack/crates/turbo-bincode/Cargo.toml new file mode 100644 index 00000000000000..cf18c496c513f9 --- /dev/null +++ b/turbopack/crates/turbo-bincode/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "turbo-bincode" +version = "0.0.0" +description = "Utilities for bincode used in turbo-tasks and turbopack" +license = "MIT" +edition = "2024" + +[lib] + +[lints] +workspace = true + +[dependencies] +bincode = { workspace = true } +either = { workspace = true } +indexmap = { workspace = true } +mime = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } diff --git a/turbopack/crates/turbo-bincode/src/lib.rs b/turbopack/crates/turbo-bincode/src/lib.rs new file mode 100644 index 00000000000000..24965e39a25f4d --- /dev/null +++ b/turbopack/crates/turbo-bincode/src/lib.rs @@ -0,0 +1,383 @@ +use bincode::{ + BorrowDecode, Decode, Encode, + de::{BorrowDecoder, Decoder}, + enc::Encoder, + error::{DecodeError, EncodeError}, +}; + +pub mod indexmap { + use std::hash::{BuildHasher, Hash}; + + use ::indexmap::IndexMap; + + use super::*; + + pub fn encode(map: &IndexMap, encoder: &mut E) -> Result<(), EncodeError> + where + E: Encoder, + K: Encode, + V: Encode, + { + usize::encode(&map.len(), encoder)?; + for (k, v) in map { + K::encode(k, encoder)?; + V::encode(v, encoder)?; + } + Ok(()) + } + + pub fn decode(decoder: &mut D) -> Result, DecodeError> + where + D: Decoder, + K: Decode + Eq + Hash, + V: Decode, + S: BuildHasher + Default, + { + let len = usize::decode(decoder)?; + let mut map = IndexMap::with_capacity_and_hasher(len, Default::default()); + for _i in 0..len { + map.insert(K::decode(decoder)?, V::decode(decoder)?); + } + Ok(map) + } + + pub fn borrow_decode<'de, Context, D, K, V, S>( + decoder: &mut D, + ) -> Result, DecodeError> + where + D: BorrowDecoder<'de, Context = Context>, + K: BorrowDecode<'de, Context> + Eq + Hash, + V: BorrowDecode<'de, Context>, + S: BuildHasher + Default, + { + let len = usize::decode(decoder)?; + let mut map = IndexMap::with_capacity_and_hasher(len, Default::default()); + for _i in 0..len { + map.insert(K::borrow_decode(decoder)?, V::borrow_decode(decoder)?); + } + Ok(map) + } + + #[cfg(test)] + mod tests { + use bincode::{decode_from_slice, encode_to_vec}; + + use super::*; + + #[test] + fn test_roundtrip() { + let cfg = bincode::config::standard(); + + #[derive(Encode, Decode)] + struct Wrapper(#[bincode(with = "crate::indexmap")] IndexMap); + + let map1 = Wrapper(IndexMap::from([ + ("key1".to_string(), 12345u32), + ("key2".to_string(), 23456u32), + ])); + + let map2: Wrapper = decode_from_slice(&encode_to_vec(&map1, cfg).unwrap(), cfg) + .unwrap() + .0; + + assert_eq!(map1.0, map2.0); + } + } +} + +pub mod indexset { + use std::hash::{BuildHasher, Hash}; + + use ::indexmap::IndexSet; + + use super::*; + + pub fn encode(set: &IndexSet, encoder: &mut E) -> Result<(), EncodeError> + where + E: Encoder, + T: Encode, + { + usize::encode(&set.len(), encoder)?; + for item in set { + T::encode(item, encoder)?; + } + Ok(()) + } + + pub fn decode(decoder: &mut D) -> Result, DecodeError> + where + D: Decoder, + T: Decode + Eq + Hash, + S: BuildHasher + Default, + { + let len = usize::decode(decoder)?; + let mut set = IndexSet::with_capacity_and_hasher(len, Default::default()); + for _i in 0..len { + set.insert(T::decode(decoder)?); + } + Ok(set) + } + + pub fn borrow_decode<'de, Context, D, T, S>( + decoder: &mut D, + ) -> Result, DecodeError> + where + D: BorrowDecoder<'de, Context = Context>, + T: BorrowDecode<'de, Context> + Eq + Hash, + S: BuildHasher + Default, + { + let len = usize::decode(decoder)?; + let mut set = IndexSet::with_capacity_and_hasher(len, Default::default()); + for _i in 0..len { + set.insert(T::borrow_decode(decoder)?); + } + Ok(set) + } + + #[cfg(test)] + mod tests { + use bincode::{decode_from_slice, encode_to_vec}; + + use super::*; + + #[test] + fn test_roundtrip() { + let cfg = bincode::config::standard(); + + #[derive(Encode, Decode)] + struct Wrapper(#[bincode(with = "crate::indexset")] IndexSet); + + let set1 = Wrapper(IndexSet::from([ + "value1".to_string(), + "value2".to_string(), + "value3".to_string(), + ])); + + let set2: Wrapper = decode_from_slice(&encode_to_vec(&set1, cfg).unwrap(), cfg) + .unwrap() + .0; + + assert_eq!(set1.0, set2.0); + } + } +} + +pub mod mime_option { + use std::str::FromStr; + + use mime::Mime; + + use super::*; + + pub fn encode(mime: &Option, encoder: &mut E) -> Result<(), EncodeError> { + let mime_str: Option<&str> = mime.as_ref().map(AsRef::as_ref); + Encode::encode(&mime_str, encoder) + } + + pub fn decode>( + decoder: &mut D, + ) -> Result, DecodeError> { + if let Some(mime_str) = as Decode>::decode(decoder)? { + Ok(Some( + Mime::from_str(&mime_str).map_err(|e| DecodeError::OtherString(e.to_string()))?, + )) + } else { + Ok(None) + } + } + + pub fn borrow_decode<'de, Context, D: BorrowDecoder<'de, Context = Context>>( + decoder: &mut D, + ) -> Result, DecodeError> { + decode(decoder) + } + + #[cfg(test)] + mod tests { + use bincode::{decode_from_slice, encode_to_vec}; + + use super::*; + + #[derive(Encode, Decode)] + struct Wrapper(#[bincode(with = "crate::mime_option")] Option); + + #[test] + fn test_roundtrip() { + let cfg = bincode::config::standard(); + + let mime1 = Wrapper(Some("text/html; charset=utf-8".parse().unwrap())); + + let mime2: Wrapper = decode_from_slice(&encode_to_vec(&mime1, cfg).unwrap(), cfg) + .unwrap() + .0; + + assert_eq!(mime1.0, mime2.0); + } + + #[test] + fn test_roundtrip_none() { + let cfg = bincode::config::standard(); + + let mime1 = Wrapper(None); + + let mime2: Wrapper = decode_from_slice(&encode_to_vec(&mime1, cfg).unwrap(), cfg) + .unwrap() + .0; + + assert_eq!(mime1.0, mime2.0); + } + } +} + +/// Encode/decode as a serialized string encoded using `serde_json`. +/// +/// This encodes less efficiently than `#[bincode(with_serde)]` would, but avoids [bincode's known +/// compatibility issues][serde-issues]. Use this for infrequently-serialized types and when you're +/// unsure if the underlying type may trigger a serde compatibility issue. +/// +/// In the future this could be replaced with a more efficient serde-compatible self-describing +/// format with a compact binary representation (e.g. pot or MessagePack), but `serde_json` is +/// convenient because it avoids introducing additional dependencies. +/// +/// [serde-issues]: https://docs.rs/bincode/latest/bincode/serde/index.html#known-issues +pub mod serde_json { + use super::*; + + pub fn encode( + value: &T, + encoder: &mut E, + ) -> Result<(), EncodeError> { + let json_str = + ::serde_json::to_string(value).map_err(|e| EncodeError::OtherString(e.to_string()))?; + Encode::encode(&json_str, encoder) + } + + pub fn decode, T: serde::de::DeserializeOwned>( + decoder: &mut D, + ) -> Result { + let json_str: String = Decode::decode(decoder)?; + ::serde_json::from_str(&json_str).map_err(|e| DecodeError::OtherString(e.to_string())) + } + + pub fn borrow_decode< + 'de, + Context, + D: BorrowDecoder<'de, Context = Context>, + T: serde::de::Deserialize<'de>, + >( + decoder: &mut D, + ) -> Result { + let json_str: &str = BorrowDecode::borrow_decode(decoder)?; + ::serde_json::from_str(json_str).map_err(|e| DecodeError::OtherString(e.to_string())) + } + + #[cfg(test)] + mod tests { + use ::serde_json::{Value, json}; + use bincode::{decode_from_slice, encode_to_vec}; + + use super::*; + + #[test] + fn test_roundtrip() { + let cfg = bincode::config::standard(); + + #[derive(Encode, Decode)] + struct Wrapper(#[bincode(with = "crate::serde_json")] Value); + + let value1 = Wrapper(json!({ + "key1": [1, 2, 3], + "key2": [4, 5, 6] + })); + + let value2: Wrapper = decode_from_slice(&encode_to_vec(&value1, cfg).unwrap(), cfg) + .unwrap() + .0; + + assert_eq!(value1.0, value2.0); + } + } +} + +pub mod either { + use ::either::Either; + + use super::*; + + pub fn encode( + value: &Either, + encoder: &mut E, + ) -> Result<(), EncodeError> { + value.is_left().encode(encoder)?; + ::either::for_both!(value, v => Encode::encode(v, encoder)) + } + + pub fn decode< + Context, + D: Decoder, + L: Decode, + R: Decode, + >( + decoder: &mut D, + ) -> Result, DecodeError> { + let is_left = bool::decode(decoder)?; + Ok(if is_left { + Either::Left(L::decode(decoder)?) + } else { + Either::Right(R::decode(decoder)?) + }) + } + + pub fn borrow_decode< + 'de, + Context, + D: BorrowDecoder<'de, Context = Context>, + L: BorrowDecode<'de, Context>, + R: BorrowDecode<'de, Context>, + >( + decoder: &mut D, + ) -> Result, DecodeError> { + let is_left = bool::borrow_decode(decoder)?; + Ok(if is_left { + Either::Left(L::borrow_decode(decoder)?) + } else { + Either::Right(R::borrow_decode(decoder)?) + }) + } + + #[cfg(test)] + mod tests { + use bincode::{decode_from_slice, encode_to_vec}; + + use super::*; + + #[derive(Encode, Decode)] + struct Wrapper(#[bincode(with = "crate::either")] Either); + + #[test] + fn test_roundtrip_left() { + let cfg = bincode::config::standard(); + + let either1 = Wrapper(Either::Left("hello".to_string())); + + let either2: Wrapper = decode_from_slice(&encode_to_vec(&either1, cfg).unwrap(), cfg) + .unwrap() + .0; + + assert_eq!(either1.0, either2.0); + } + + #[test] + fn test_roundtrip_right() { + let cfg = bincode::config::standard(); + + let either1 = Wrapper(Either::Right(42u32)); + + let either2: Wrapper = decode_from_slice(&encode_to_vec(&either1, cfg).unwrap(), cfg) + .unwrap() + .0; + + assert_eq!(either1.0, either2.0); + } + } +} From 9ed4581c8e54705187ab6b037460739f1ca8b54f Mon Sep 17 00:00:00 2001 From: Benjamin Woodruff Date: Fri, 28 Nov 2025 20:33:10 -0500 Subject: [PATCH 2/2] Add implementations for smallvec and ringmap --- Cargo.lock | 2 + turbopack/crates/turbo-bincode/Cargo.toml | 2 + turbopack/crates/turbo-bincode/src/lib.rs | 185 ++++++++++++++++++++++ 3 files changed, 189 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 2245c9c1d20aee..7e754fd2d922a4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9104,8 +9104,10 @@ dependencies = [ "either", "indexmap 2.9.0", "mime", + "ringmap", "serde", "serde_json", + "smallvec", ] [[package]] diff --git a/turbopack/crates/turbo-bincode/Cargo.toml b/turbopack/crates/turbo-bincode/Cargo.toml index cf18c496c513f9..abac7b2d57c910 100644 --- a/turbopack/crates/turbo-bincode/Cargo.toml +++ b/turbopack/crates/turbo-bincode/Cargo.toml @@ -15,5 +15,7 @@ bincode = { workspace = true } either = { workspace = true } indexmap = { workspace = true } mime = { workspace = true } +ringmap = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } +smallvec = { workspace = true } diff --git a/turbopack/crates/turbo-bincode/src/lib.rs b/turbopack/crates/turbo-bincode/src/lib.rs index 24965e39a25f4d..a6569d1e873ae0 100644 --- a/turbopack/crates/turbo-bincode/src/lib.rs +++ b/turbopack/crates/turbo-bincode/src/lib.rs @@ -1,3 +1,4 @@ +use ::smallvec::SmallVec; use bincode::{ BorrowDecode, Decode, Encode, de::{BorrowDecoder, Decoder}, @@ -162,6 +163,83 @@ pub mod indexset { } } +pub mod ringset { + use std::hash::{BuildHasher, Hash}; + + use ::ringmap::RingSet; + + use super::*; + + pub fn encode(set: &RingSet, encoder: &mut E) -> Result<(), EncodeError> + where + E: Encoder, + T: Encode, + { + usize::encode(&set.len(), encoder)?; + for item in set { + T::encode(item, encoder)?; + } + Ok(()) + } + + pub fn decode(decoder: &mut D) -> Result, DecodeError> + where + D: Decoder, + T: Decode + Eq + Hash, + S: BuildHasher + Default, + { + let len = usize::decode(decoder)?; + let mut set = RingSet::with_capacity_and_hasher(len, Default::default()); + for _i in 0..len { + set.insert(T::decode(decoder)?); + } + Ok(set) + } + + pub fn borrow_decode<'de, Context, D, T, S>( + decoder: &mut D, + ) -> Result, DecodeError> + where + D: BorrowDecoder<'de, Context = Context>, + T: BorrowDecode<'de, Context> + Eq + Hash, + S: BuildHasher + Default, + { + let len = usize::decode(decoder)?; + let mut set = RingSet::with_capacity_and_hasher(len, Default::default()); + for _i in 0..len { + set.insert(T::borrow_decode(decoder)?); + } + Ok(set) + } + + #[cfg(test)] + mod tests { + use bincode::{decode_from_slice, encode_to_vec}; + + use super::*; + + #[test] + fn test_roundtrip() { + let cfg = bincode::config::standard(); + + #[derive(Encode, Decode)] + struct Wrapper(#[bincode(with = "crate::ringset")] RingSet); + + let set1 = Wrapper(RingSet::from([ + "value1".to_string(), + "value2".to_string(), + "value3".to_string(), + ])); + + let set2: Wrapper = decode_from_slice(&encode_to_vec(&set1, cfg).unwrap(), cfg) + .unwrap() + .0; + + assert_eq!(set1.0, set2.0); + } + } +} + pub mod mime_option { use std::str::FromStr; @@ -381,3 +459,110 @@ pub mod either { } } } + +pub mod smallvec { + use ::smallvec::Array; + + use super::*; + + pub fn encode>( + vec: &SmallVec, + encoder: &mut E, + ) -> Result<(), EncodeError> { + usize::encode(&vec.len(), encoder)?; + for item in vec { + Encode::encode(item, encoder)?; + } + Ok(()) + } + + pub fn decode, A: Array>>( + decoder: &mut D, + ) -> Result, DecodeError> { + let len = usize::decode(decoder)?; + let mut vec = SmallVec::with_capacity(len); + for _ in 0..len { + vec.push(Decode::decode(decoder)?); + } + Ok(vec) + } + + pub fn borrow_decode< + 'de, + Context, + D: BorrowDecoder<'de, Context = Context>, + A: Array>, + >( + decoder: &mut D, + ) -> Result, DecodeError> { + let len = usize::decode(decoder)?; + let mut vec = SmallVec::with_capacity(len); + for _ in 0..len { + vec.push(BorrowDecode::borrow_decode(decoder)?); + } + Ok(vec) + } + + #[cfg(test)] + mod tests { + use bincode::{decode_from_slice, encode_to_vec}; + + use super::*; + + #[test] + fn test_roundtrip() { + let cfg = bincode::config::standard(); + + #[derive(Encode, Decode)] + struct Wrapper(#[bincode(with = "crate::smallvec")] SmallVec<[u32; 4]>); + + let vec1 = Wrapper(SmallVec::from_slice(&[1u32, 2, 3, 4, 5])); + + let vec2: Wrapper = decode_from_slice(&encode_to_vec(&vec1, cfg).unwrap(), cfg) + .unwrap() + .0; + + assert_eq!(vec1.0, vec2.0); + } + } +} + +pub mod owned_cow { + //! Overrides the default [`BorrowDecode`] implementation to always use the owned representation + //! of [`Cow`], so that the resulting [`BorrowDecode`] type is independent of the [`Cow`]'s + //! lifetime. + + use std::borrow::Cow; + + use super::*; + + #[allow(clippy::ptr_arg)] + pub fn encode(cow: &Cow<'_, T>, encoder: &mut E) -> Result<(), EncodeError> + where + E: Encoder, + T: ToOwned + ?Sized, + for<'a> &'a T: Encode, + { + cow.encode(encoder) + } + + pub fn decode<'cow, Context, D, T>(decoder: &mut D) -> Result, DecodeError> + where + D: Decoder, + T: ToOwned + ?Sized, + ::Owned: Decode, + { + Decode::decode(decoder) + } + + pub fn borrow_decode<'de, 'cow, Context, D, T>( + decoder: &mut D, + ) -> Result, DecodeError> + where + D: BorrowDecoder<'de, Context = Context>, + T: ToOwned + ?Sized, + ::Owned: Decode, + { + Decode::decode(decoder) + } +}