Skip to content

Commit 7ca3075

Browse files
committed
chore: Cleanup restructure
Signed-off-by: Alexandre Milesi <[email protected]>
1 parent 72ca692 commit 7ca3075

File tree

11 files changed

+197
-158
lines changed

11 files changed

+197
-158
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

container/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ ENV CARGO_BUILD_JOBS=${CARGO_BUILD_JOBS:-16} \
303303
# Install system dependencies
304304
RUN dnf install -y https://download1.rpmfusion.org/free/el/rpmfusion-free-release-8.noarch.rpm && dnf install -y https://download1.rpmfusion.org/nonfree/el/rpmfusion-nonfree-release-8.noarch.rpm
305305
RUN dnf update -y \
306-
&& dnf install -y llvm-toolset protobuf-compiler wget unzip ffmpeg-devel \
306+
&& dnf install -y llvm-toolset protobuf-compiler wget unzip libavdevice-dev libavutil-dev libavcodec-dev libavformat-dev pkg-config \
307307
&& dnf clean all \
308308
&& rm -rf /var/cache/dnf
309309

lib/llm/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ testing-etcd = []
2424
block-manager = ["dep:nixl-sys", "dep:cudarc", "dep:ndarray", "dep:nix"]
2525
cuda = ["dep:cudarc"]
2626
integration = ["dynamo-runtime/integration"]
27-
media-loading = ["dep:ndarray", "dep:video-rs", "dep:image", "dep:reqwest", "dep:base64", "dep:tokio-rayon", "block-manager"]
27+
media-loading = ["dep:ndarray", "dep:video-rs", "dep:image", "dep:reqwest", "dep:base64", "dep:tokio-rayon", "dep:flate2", "block-manager"]
2828

2929
[[bench]]
3030
name = "tokenizer"
@@ -145,6 +145,7 @@ image = { version = "0.25", optional = true }
145145
reqwest = { workspace = true, optional = true }
146146
base64 = { version = "0.22", optional = true }
147147
tokio-rayon = {version = "2", optional = true }
148+
flate2 = { version = "1.1.2", optional = true }
148149

149150
# Publishers
150151
zeromq = "0.4.1"

lib/llm/src/mocker/engine.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,6 @@ impl AsyncEngine<SingleIn<PreprocessedRequest>, ManyOut<LLMEngineOutput>, Error>
312312
) -> Result<ManyOut<LLMEngineOutput>, Error> {
313313
let (request, ctx) = input.into_parts();
314314

315-
println!("multi_modal_data: {:?}", request.multi_modal_data);
316315
// Extract dp_rank from request field (defaults to 0 if not set)
317316
let dp_rank = request.dp_rank.unwrap_or(0);
318317

lib/llm/src/preprocessor/media/common.rs

Lines changed: 0 additions & 148 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,6 @@
33

44
use anyhow::Result;
55
use base64::{Engine as _, engine::general_purpose};
6-
use ndarray::{ArrayBase, Dimension, OwnedRepr};
7-
use serde::{Deserialize, Serialize};
8-
use std::sync::Arc;
9-
10-
use dynamo_async_openai::types::ChatCompletionRequestUserMessageContentPart;
11-
12-
use crate::block_manager::storage::{
13-
StorageError, SystemStorage, nixl::NixlRegisterableStorage, nixl::NixlStorage,
14-
};
15-
use crate::preprocessor::media::{ImageDecoder, VideoDecoder};
16-
use nixl_sys::Agent as NixlAgent;
176

187
// Raw encoded media data (.png, .mp4, ...), optionally b64-encoded
198
#[derive(Debug)]
@@ -22,29 +11,6 @@ pub struct EncodedMediaData {
2211
pub(crate) b64_encoded: bool,
2312
}
2413

25-
// Decoded media data (image RGB, video frames pixels, ...)
26-
#[derive(Debug)]
27-
pub struct DecodedMediaData {
28-
pub(crate) data: SystemStorage,
29-
pub(crate) shape: Vec<usize>,
30-
pub(crate) dtype: String,
31-
}
32-
33-
// Decoded media data NIXL descriptor (sent to the next step in the pipeline / NATS)
34-
#[derive(Serialize, Deserialize, Clone, Debug)]
35-
pub struct RdmaMediaDataDescriptor {
36-
// b64 agent metadata
37-
nixl_metadata: String,
38-
// tensor descriptor
39-
nixl_descriptor: NixlStorage,
40-
shape: Vec<usize>,
41-
dtype: String,
42-
// reference to the actual data, kept alive while the rdma descriptor is alive
43-
#[serde(skip, default)]
44-
#[allow(dead_code)]
45-
source_storage: Option<Arc<SystemStorage>>,
46-
}
47-
4814
impl EncodedMediaData {
4915
// Handles both web URLs (will download the bytes) and data URLs (will keep b64-encoded)
5016
// This function is kept in tokio runtime so we do not want any expensive operations
@@ -86,120 +52,6 @@ impl EncodedMediaData {
8652
}
8753
}
8854

89-
impl DecodedMediaData {
90-
pub fn into_rdma_descriptor(self, nixl_agent: &NixlAgent) -> Result<RdmaMediaDataDescriptor> {
91-
// get NIXL metadata and descriptor
92-
let mut source_storage = self.data;
93-
source_storage.nixl_register(nixl_agent, None)?;
94-
let nixl_descriptor = unsafe { source_storage.as_nixl_descriptor() }
95-
.ok_or_else(|| anyhow::anyhow!("Cannot convert storage to NIXL descriptor"))?;
96-
97-
// TODO: cache this if this is constant across the worker lifetime?
98-
let nixl_local_md = nixl_agent.get_local_md()?;
99-
let nixl_metadata = general_purpose::STANDARD.encode(&nixl_local_md);
100-
101-
Ok(RdmaMediaDataDescriptor {
102-
nixl_metadata,
103-
nixl_descriptor,
104-
shape: self.shape,
105-
dtype: self.dtype,
106-
// do not drop / free the storage yet
107-
source_storage: Some(Arc::new(source_storage)),
108-
})
109-
}
110-
}
111-
112-
// convert Array{N}<u8> to DecodedMediaData
113-
// TODO: Array1<f32> for audio
114-
impl<D: Dimension> TryFrom<ArrayBase<OwnedRepr<u8>, D>> for DecodedMediaData {
115-
type Error = StorageError;
116-
117-
fn try_from(array: ArrayBase<OwnedRepr<u8>, D>) -> Result<Self, Self::Error> {
118-
let shape = array.shape().to_vec();
119-
let (data, _) = array.into_raw_vec_and_offset();
120-
Ok(Self {
121-
data: SystemStorage::try_from(data)?,
122-
shape,
123-
dtype: "uint8".to_string(),
124-
})
125-
}
126-
}
127-
128-
#[async_trait::async_trait]
129-
pub trait Decoder: Clone + Send + 'static {
130-
fn decode(&self, data: EncodedMediaData) -> Result<DecodedMediaData>;
131-
132-
async fn decode_async(&self, data: EncodedMediaData) -> Result<DecodedMediaData> {
133-
// light clone (only config params)
134-
let decoder = self.clone();
135-
// compute heavy -> rayon
136-
let result = tokio_rayon::spawn(move || decoder.decode(data)).await?;
137-
Ok(result)
138-
}
139-
}
140-
141-
#[derive(Clone, Debug, Default, serde::Serialize, serde::Deserialize)]
142-
pub struct MediaDecoder {
143-
#[serde(default)]
144-
pub image_decoder: ImageDecoder,
145-
#[serde(default)]
146-
pub video_decoder: VideoDecoder,
147-
}
148-
149-
pub struct MediaLoader {
150-
media_decoder: MediaDecoder,
151-
http_client: reqwest::Client,
152-
nixl_agent: NixlAgent,
153-
}
154-
155-
impl MediaLoader {
156-
pub fn new(media_decoder: MediaDecoder) -> Result<Self> {
157-
let http_client = reqwest::Client::builder()
158-
.user_agent(
159-
"dynamo-ai/dynamo", // TODO: use a proper user agent
160-
)
161-
.build()?;
162-
163-
let uuid = uuid::Uuid::new_v4();
164-
let nixl_agent = NixlAgent::new(&format!("media-loader-{}", uuid))?;
165-
let (_, ucx_params) = nixl_agent.get_plugin_params("UCX")?;
166-
nixl_agent.create_backend("UCX", &ucx_params)?;
167-
168-
Ok(Self {
169-
media_decoder,
170-
http_client,
171-
nixl_agent,
172-
})
173-
}
174-
175-
pub async fn fetch_and_decode_media_part(
176-
&self,
177-
oai_content_part: &ChatCompletionRequestUserMessageContentPart,
178-
) -> Result<RdmaMediaDataDescriptor> {
179-
// TODO: request-level options
180-
// fetch and decode the media
181-
let decoded = match oai_content_part {
182-
ChatCompletionRequestUserMessageContentPart::ImageUrl(image_part) => {
183-
let url = &image_part.image_url.url;
184-
let data = EncodedMediaData::from_url(url, &self.http_client).await?;
185-
self.media_decoder.image_decoder.decode_async(data).await
186-
}
187-
ChatCompletionRequestUserMessageContentPart::VideoUrl(video_part) => {
188-
let url = &video_part.video_url.url;
189-
let data = EncodedMediaData::from_url(url, &self.http_client).await?;
190-
self.media_decoder.video_decoder.decode_async(data).await
191-
}
192-
ChatCompletionRequestUserMessageContentPart::AudioUrl(_) => {
193-
anyhow::bail!("Audio decoding is not supported yet");
194-
}
195-
_ => anyhow::bail!("Unsupported media type"),
196-
}?;
197-
198-
let rdma_descriptor = decoded.into_rdma_descriptor(&self.nixl_agent)?;
199-
Ok(rdma_descriptor)
200-
}
201-
}
202-
20355
#[cfg(test)]
20456
mod tests {
20557
use super::*;

lib/llm/src/preprocessor/media/image.rs renamed to lib/llm/src/preprocessor/media/decoders/image.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@ use anyhow::Result;
55
use image::GenericImageView;
66
use ndarray::Array3;
77

8-
use super::common::{DecodedMediaData, Decoder, EncodedMediaData};
8+
use super::super::common::EncodedMediaData;
9+
use super::super::rdma::DecodedMediaData;
10+
use super::Decoder;
911

1012
#[derive(Clone, Debug, Default, serde::Serialize, serde::Deserialize)]
1113
#[serde(deny_unknown_fields)]
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
use anyhow::Result;
5+
6+
use super::common::EncodedMediaData;
7+
use super::rdma::DecodedMediaData;
8+
9+
mod image;
10+
mod video;
11+
12+
pub use image::ImageDecoder;
13+
pub use video::VideoDecoder;
14+
15+
#[async_trait::async_trait]
16+
pub trait Decoder: Clone + Send + 'static {
17+
fn decode(&self, data: EncodedMediaData) -> Result<DecodedMediaData>;
18+
19+
async fn decode_async(&self, data: EncodedMediaData) -> Result<DecodedMediaData> {
20+
// light clone (only config params)
21+
let decoder = self.clone();
22+
// compute heavy -> rayon
23+
let result = tokio_rayon::spawn(move || decoder.decode(data)).await?;
24+
Ok(result)
25+
}
26+
}
27+
28+
#[derive(Clone, Debug, Default, serde::Serialize, serde::Deserialize)]
29+
pub struct MediaDecoder {
30+
#[serde(default)]
31+
pub image_decoder: ImageDecoder,
32+
#[serde(default)]
33+
pub video_decoder: VideoDecoder,
34+
}

lib/llm/src/preprocessor/media/video.rs renamed to lib/llm/src/preprocessor/media/decoders/video.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22
// SPDX-License-Identifier: Apache-2.0
33

4-
use super::common::{DecodedMediaData, Decoder, EncodedMediaData};
4+
use super::super::common::EncodedMediaData;
5+
use super::super::rdma::DecodedMediaData;
6+
use super::Decoder;
57
use anyhow::Result;
68
use ndarray::Array4;
79
use std::io::Write;
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
use anyhow::Result;
5+
6+
use dynamo_async_openai::types::ChatCompletionRequestUserMessageContentPart;
7+
8+
use super::common::EncodedMediaData;
9+
use super::decoders::{Decoder, MediaDecoder};
10+
use super::rdma::RdmaMediaDataDescriptor;
11+
use nixl_sys::Agent as NixlAgent;
12+
13+
pub struct MediaLoader {
14+
media_decoder: MediaDecoder,
15+
http_client: reqwest::Client,
16+
nixl_agent: NixlAgent,
17+
}
18+
19+
impl MediaLoader {
20+
pub fn new(media_decoder: MediaDecoder) -> Result<Self> {
21+
let http_client = reqwest::Client::builder()
22+
.user_agent(
23+
"dynamo-ai/dynamo", // TODO: use a proper user agent
24+
)
25+
.build()?;
26+
27+
let uuid = uuid::Uuid::new_v4();
28+
let nixl_agent = NixlAgent::new(&format!("media-loader-{}", uuid))?;
29+
let (_, ucx_params) = nixl_agent.get_plugin_params("UCX")?;
30+
nixl_agent.create_backend("UCX", &ucx_params)?;
31+
32+
Ok(Self {
33+
media_decoder,
34+
http_client,
35+
nixl_agent,
36+
})
37+
}
38+
39+
pub async fn fetch_and_decode_media_part(
40+
&self,
41+
oai_content_part: &ChatCompletionRequestUserMessageContentPart,
42+
) -> Result<RdmaMediaDataDescriptor> {
43+
// TODO: request-level options
44+
// fetch and decode the media
45+
let decoded = match oai_content_part {
46+
ChatCompletionRequestUserMessageContentPart::ImageUrl(image_part) => {
47+
let url = &image_part.image_url.url;
48+
let data = EncodedMediaData::from_url(url, &self.http_client).await?;
49+
self.media_decoder.image_decoder.decode_async(data).await
50+
}
51+
ChatCompletionRequestUserMessageContentPart::VideoUrl(video_part) => {
52+
let url = &video_part.video_url.url;
53+
let data = EncodedMediaData::from_url(url, &self.http_client).await?;
54+
self.media_decoder.video_decoder.decode_async(data).await
55+
}
56+
ChatCompletionRequestUserMessageContentPart::AudioUrl(_) => {
57+
anyhow::bail!("Audio decoding is not supported yet");
58+
}
59+
_ => anyhow::bail!("Unsupported media type"),
60+
}?;
61+
62+
let rdma_descriptor = decoded.into_rdma_descriptor(&self.nixl_agent)?;
63+
Ok(rdma_descriptor)
64+
}
65+
}

lib/llm/src/preprocessor/media/mod.rs

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,11 @@
22
// SPDX-License-Identifier: Apache-2.0
33

44
mod common;
5-
mod image;
6-
mod video;
5+
mod decoders;
6+
mod loader;
7+
mod rdma;
78

8-
pub use common::{Decoder, EncodedMediaData, MediaDecoder, MediaLoader, RdmaMediaDataDescriptor};
9-
pub use image::ImageDecoder;
10-
pub use video::VideoDecoder;
9+
pub use common::EncodedMediaData;
10+
pub use decoders::{Decoder, ImageDecoder, MediaDecoder, VideoDecoder};
11+
pub use loader::MediaLoader;
12+
pub use rdma::{DecodedMediaData, RdmaMediaDataDescriptor};

0 commit comments

Comments
 (0)