Skip to content

Commit 80594ff

Browse files
committed
NIXL data passing, install ffmpeg
Signed-off-by: Alexandre Milesi <[email protected]>
1 parent a0f0e5a commit 80594ff

File tree

11 files changed

+126
-30
lines changed

11 files changed

+126
-30
lines changed

container/Dockerfile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,11 @@ RUN apt-get update -y \
125125
clang \
126126
libclang-dev \
127127
protobuf-compiler \
128+
# media-loading rust build+runtime dependencies
129+
libavcodec-dev \
130+
libavutil-dev \
131+
libavformat-dev \
132+
pkg-config \
128133
&& apt-get clean \
129134
&& rm -rf /var/lib/apt/lists/*
130135

container/Dockerfile.vllm

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,11 @@ ARG WORKSPACE_DIR=/workspace
305305
RUN apt-get update -y && \
306306
apt-get install -y --no-install-recommends \
307307
# Install utilities
308+
wget \
309+
&& rm -f /etc/apt/sources.list.d/cuda*.list \
310+
&& wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/x86_64/cuda-keyring_1.1-1_all.deb && dpkg -i cuda-keyring_1.1-1_all.deb && \
311+
apt-get update -y && \
312+
apt-get install -y --no-install-recommends \
308313
nvtop \
309314
wget \
310315
tmux \

lib/bindings/python/src/dynamo/nixl_connect/__init__.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from __future__ import annotations
1717

1818
import asyncio
19+
import base64
1920
import logging
2021
import socket
2122
import uuid
@@ -1211,7 +1212,7 @@ def metadata(self) -> RdmaMetadata:
12111212

12121213
self._serialized_request = RdmaMetadata(
12131214
descriptors=descriptors,
1214-
nixl_metadata=nixl_metadata.hex(),
1215+
nixl_metadata=base64.b64encode(nixl_metadata).decode("utf-8"),
12151216
notification_key=self._notification_key,
12161217
operation_kind=int(self._operation_kind),
12171218
)
@@ -1471,13 +1472,18 @@ def __init__(
14711472
self._connector = connector
14721473

14731474
# When `nixl_metadata` is a string, it is assumed to have come from a remote worker
1474-
# via a `RdmaMetadata` object and therefore can assumed be a hex-encoded, compressed
1475+
# via a `RdmaMetadata` object and therefore can assumed be a base64-encoded, compressed
14751476
# representation of the NIXL metadata.
14761477
if isinstance(nixl_metadata, str):
1477-
# Decode the hex-encoded string into bytes.
1478-
nixl_metadata = bytes.fromhex(nixl_metadata)
1479-
# Decompress the NIXL metadata.
1480-
nixl_metadata = zlib.decompress(nixl_metadata)
1478+
# Decode the base64-encoded string into bytes.
1479+
nixl_metadata = base64.b64decode(nixl_metadata)
1480+
try:
1481+
# Decompress the NIXL metadata.
1482+
nixl_metadata = zlib.decompress(nixl_metadata)
1483+
except zlib.error:
1484+
# we accept cases when the data might not be compressed
1485+
if nixl_metadata[0] == 0x78 and nixl_metadata[1] in (0x01, 0x5E, 0x9C, 0xDA):
1486+
raise
14811487

14821488
self._name = connector._nixl.add_remote_agent(nixl_metadata)
14831489
if isinstance(self._name, bytes):

lib/llm/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ testing-etcd = []
2424
block-manager = ["dep:nixl-sys", "dep:cudarc", "dep:ndarray", "dep:nix"]
2525
cuda = ["dep:cudarc"]
2626
integration = ["dynamo-runtime/integration"]
27-
media-loading = ["dep:ndarray", "dep:video-rs", "dep:image", "dep:reqwest", "dep:base64", "dep:tokio-rayon"]
27+
media-loading = ["dep:ndarray", "dep:video-rs", "dep:image", "dep:reqwest", "dep:base64", "dep:tokio-rayon", "block-manager"]
2828

2929
[[bench]]
3030
name = "tokenizer"

lib/llm/src/block_manager/storage.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,30 @@ impl SystemStorage {
364364
}
365365
}
366366

367+
impl TryFrom<Vec<u8>> for SystemStorage {
368+
type Error = StorageError;
369+
370+
/// Create SystemStorage from an existing Vec<u8>
371+
/// Takes ownership of the Vec and uses its memory directly (zero-copy)
372+
fn try_from(mut vec: Vec<u8>) -> Result<Self, Self::Error> {
373+
let size = vec.len();
374+
let layout =
375+
Layout::array::<u8>(size).map_err(|e| StorageError::AllocationFailed(e.to_string()))?;
376+
let ptr = NonNull::new(vec.as_mut_ptr())
377+
.ok_or_else(|| StorageError::AllocationFailed("vec pointer is null".into()))?;
378+
379+
// prevents Vec from freeing the memory
380+
std::mem::forget(vec);
381+
382+
Ok(Self {
383+
ptr,
384+
layout,
385+
len: size,
386+
handles: RegistrationHandles::new(),
387+
})
388+
}
389+
}
390+
367391
impl Drop for SystemStorage {
368392
fn drop(&mut self) {
369393
self.handles.release();

lib/llm/src/preprocessor.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -315,11 +315,11 @@ impl OpenAIPreprocessor {
315315
_ => continue,
316316
};
317317

318-
let map_item = media_map.entry(type_str.clone()).or_insert_with(Vec::new);
318+
let map_item = media_map.entry(type_str.clone()).or_default();
319319

320320
if let Some(loader) = &self.media_loader {
321-
let decoded_data = loader.fetch_and_decode_media_part(content_part).await?;
322-
map_item.push(MultimodalData::Decoded(decoded_data));
321+
let rdma_descriptor = loader.fetch_and_decode_media_part(content_part).await?;
322+
map_item.push(MultimodalData::Decoded(rdma_descriptor));
323323
} else {
324324
map_item.push(MultimodalData::Url(url));
325325
}

lib/llm/src/preprocessor/media/common.rs

Lines changed: 71 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,45 @@
44
use anyhow::Result;
55
use base64::{Engine as _, engine::general_purpose};
66
use ndarray::{ArrayBase, Dimension, OwnedRepr};
7+
use serde::{Deserialize, Serialize};
8+
use std::sync::Arc;
79

810
use dynamo_async_openai::types::ChatCompletionRequestUserMessageContentPart;
911

12+
use crate::block_manager::storage::{
13+
StorageError, SystemStorage, nixl::NixlRegisterableStorage, nixl::NixlStorage,
14+
};
1015
use crate::preprocessor::media::{ImageDecoder, VideoDecoder};
16+
use nixl_sys::Agent as NixlAgent;
1117

1218
// Raw encoded media data (.png, .mp4, ...), optionally b64-encoded
1319
pub struct EncodedMediaData {
1420
bytes: Vec<u8>,
1521
b64_encoded: bool,
1622
}
1723

24+
// Decoded media data (image RGB, video frames pixels, ...)
25+
pub struct DecodedMediaData {
26+
data: SystemStorage,
27+
shape: Vec<usize>,
28+
dtype: String,
29+
}
30+
31+
// Decoded media data NIXL descriptor (sent to the next step in the pipeline / NATS)
32+
#[derive(Serialize, Deserialize, Clone, Debug)]
33+
pub struct RdmaMediaDataDescriptor {
34+
// b64 agent metadata
35+
nixl_metadata: String,
36+
// tensor descriptor
37+
nixl_descriptor: NixlStorage,
38+
shape: Vec<usize>,
39+
dtype: String,
40+
// reference to the actual data, kept alive while the rdma descriptor is alive
41+
#[serde(skip, default)]
42+
#[allow(dead_code)]
43+
source_storage: Option<Arc<SystemStorage>>,
44+
}
45+
1846
impl EncodedMediaData {
1947
// Handles both web URLs (will download the bytes) and data URLs (will keep b64-encoded)
2048
// This function is kept in tokio runtime so we do not want any expensive operations
@@ -56,25 +84,42 @@ impl EncodedMediaData {
5684
}
5785
}
5886

59-
// Decoded media data (image RGB, video frames pixels, ...)
60-
#[derive(serde::Serialize, serde::Deserialize, Debug, Clone)]
61-
pub struct DecodedMediaData {
62-
data: Vec<u8>,
63-
shape: Vec<usize>,
64-
dtype: String,
87+
impl DecodedMediaData {
88+
pub fn into_rdma_descriptor(self, nixl_agent: &NixlAgent) -> Result<RdmaMediaDataDescriptor> {
89+
// get NIXL metadata and descriptor
90+
let mut source_storage = self.data;
91+
source_storage.nixl_register(nixl_agent, None)?;
92+
let nixl_descriptor = unsafe { source_storage.as_nixl_descriptor() }
93+
.ok_or_else(|| anyhow::anyhow!("Cannot convert storage to NIXL descriptor"))?;
94+
95+
// TODO: cache this if this is constant across the worker lifetime?
96+
let nixl_local_md = nixl_agent.get_local_md()?;
97+
let nixl_metadata = general_purpose::STANDARD.encode(&nixl_local_md);
98+
99+
Ok(RdmaMediaDataDescriptor {
100+
nixl_metadata,
101+
nixl_descriptor,
102+
shape: self.shape,
103+
dtype: self.dtype,
104+
// do not drop / free the storage yet
105+
source_storage: Some(Arc::new(source_storage)),
106+
})
107+
}
65108
}
66109

67110
// convert Array{N}<u8> to DecodedMediaData
68111
// TODO: Array1<f32> for audio
69-
impl<D: Dimension> From<ArrayBase<OwnedRepr<u8>, D>> for DecodedMediaData {
70-
fn from(array: ArrayBase<OwnedRepr<u8>, D>) -> Self {
112+
impl<D: Dimension> TryFrom<ArrayBase<OwnedRepr<u8>, D>> for DecodedMediaData {
113+
type Error = StorageError;
114+
115+
fn try_from(array: ArrayBase<OwnedRepr<u8>, D>) -> Result<Self, Self::Error> {
71116
let shape = array.shape().to_vec();
72117
let (data, _) = array.into_raw_vec_and_offset();
73-
Self {
74-
data,
118+
Ok(Self {
119+
data: SystemStorage::try_from(data)?,
75120
shape,
76121
dtype: "uint8".to_string(),
77-
}
122+
})
78123
}
79124
}
80125

@@ -102,28 +147,36 @@ pub struct MediaDecoder {
102147
pub struct MediaLoader {
103148
media_decoder: MediaDecoder,
104149
http_client: reqwest::Client,
150+
nixl_agent: NixlAgent,
105151
}
106152

107153
impl MediaLoader {
108154
pub fn new(media_decoder: MediaDecoder) -> Result<Self> {
109155
let http_client = reqwest::Client::builder()
110156
.user_agent(
111-
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:143.0) Gecko/20100101 Firefox/143.0",
157+
"dynamo-ai/dynamo", // TODO: use a proper user agent
112158
)
113159
.build()?;
114160

161+
let uuid = uuid::Uuid::new_v4();
162+
let nixl_agent = NixlAgent::new(&format!("media-loader-{}", uuid))?;
163+
let (_, ucx_params) = nixl_agent.get_plugin_params("UCX")?;
164+
nixl_agent.create_backend("UCX", &ucx_params)?;
165+
115166
Ok(Self {
116167
media_decoder,
117168
http_client,
169+
nixl_agent,
118170
})
119171
}
120172

121173
pub async fn fetch_and_decode_media_part(
122174
&self,
123175
oai_content_part: &ChatCompletionRequestUserMessageContentPart,
124-
) -> Result<DecodedMediaData> {
176+
) -> Result<RdmaMediaDataDescriptor> {
125177
// TODO: request-level options
126-
match oai_content_part {
178+
// fetch and decode the media
179+
let decoded = match oai_content_part {
127180
ChatCompletionRequestUserMessageContentPart::ImageUrl(image_part) => {
128181
let url = &image_part.image_url.url;
129182
let data = EncodedMediaData::from_url(url, &self.http_client).await?;
@@ -138,6 +191,9 @@ impl MediaLoader {
138191
anyhow::bail!("Audio decoding is not supported yet");
139192
}
140193
_ => anyhow::bail!("Unsupported media type"),
141-
}
194+
}?;
195+
196+
let rdma_descriptor = decoded.into_rdma_descriptor(&self.nixl_agent)?;
197+
Ok(rdma_descriptor)
142198
}
143199
}

lib/llm/src/preprocessor/media/image.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,6 @@ impl Decoder for ImageDecoder {
3636
};
3737
let shape = (height as usize, width as usize, n_channels as usize);
3838
let array = Array3::from_shape_vec(shape, data)?;
39-
Ok(array.into())
39+
Ok(array.try_into()?)
4040
}
4141
}

lib/llm/src/preprocessor/media/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,6 @@ mod common;
55
mod image;
66
mod video;
77

8-
pub use common::{DecodedMediaData, Decoder, EncodedMediaData, MediaDecoder, MediaLoader};
8+
pub use common::{Decoder, EncodedMediaData, MediaDecoder, MediaLoader, RdmaMediaDataDescriptor};
99
pub use image::ImageDecoder;
1010
pub use video::VideoDecoder;

lib/llm/src/preprocessor/media/video.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,6 @@ impl Decoder for VideoDecoder {
111111
3,
112112
);
113113
let array = Array4::from_shape_vec(shape, all_frames)?;
114-
Ok(array.into())
114+
Ok(array.try_into()?)
115115
}
116116
}

0 commit comments

Comments
 (0)