From ac1eebaa9088d0b159bfa21b23492d9e24b02ab8 Mon Sep 17 00:00:00 2001 From: Erik Svilich Date: Tue, 5 May 2026 20:36:34 +0000 Subject: [PATCH 1/2] feat: add text/plain asset handler via c2pa-text crate Add a TextIO asset handler that embeds and extracts C2PA JUMBF manifests in plain text files using the c2pa-text crate. The crate encodes binary manifest data as invisible Unicode Variation Selectors, following the C2PA text embedding specification (Section A.7). The handler implements CAIReader, CAIWriter, and AssetPatch for full read/write/patch support. Hash object positions span the entire text content with an exclusion range covering the embedded manifest bytes. Registers "txt" and "text/plain" as supported types in the MIME utility and adds TextIO to all three handler maps (readers, writers, file-based). The c2pa-text reference implementation is at: https://github.com/encypherai/c2pa-text --- sdk/Cargo.toml | 1 + sdk/src/asset_handlers/mod.rs | 1 + sdk/src/asset_handlers/text_io.rs | 289 ++++++++++++++++++++++++++++++ sdk/src/jumbf_io.rs | 6 +- sdk/src/utils/mime.rs | 2 + 5 files changed, 298 insertions(+), 1 deletion(-) create mode 100644 sdk/src/asset_handlers/text_io.rs diff --git a/sdk/Cargo.toml b/sdk/Cargo.toml index d3b6eac72..2220dcb8f 100644 --- a/sdk/Cargo.toml +++ b/sdk/Cargo.toml @@ -109,6 +109,7 @@ byteorder = { version = "1.4.3", default-features = false } byteordered = "0.6.0" chrono = { version = "0.4.42", default-features = false, features = ["serde"] } c2pa_cbor = "0.77.2" +c2pa-text = { git = "https://github.com/encypherai/c2pa-text.git" } config = { version = "0.14.0", default-features = false, features = [ "json", "toml", diff --git a/sdk/src/asset_handlers/mod.rs b/sdk/src/asset_handlers/mod.rs index 69ba19f58..fda4b78f8 100644 --- a/sdk/src/asset_handlers/mod.rs +++ b/sdk/src/asset_handlers/mod.rs @@ -22,6 +22,7 @@ pub mod mp3_io; pub mod png_io; pub mod riff_io; pub mod svg_io; +pub mod text_io; pub mod tiff_io; #[cfg(feature = "pdf")] diff --git a/sdk/src/asset_handlers/text_io.rs b/sdk/src/asset_handlers/text_io.rs new file mode 100644 index 000000000..bb91195b8 --- /dev/null +++ b/sdk/src/asset_handlers/text_io.rs @@ -0,0 +1,289 @@ +// Copyright 2024 Encypher Corporation. All rights reserved. +// Licensed under the Apache License, Version 2.0 or the MIT license, +// at your option. + +//! C2PA Text asset handler. +//! +//! Embeds and extracts C2PA JUMBF manifests in plain text using the c2pa-text +//! crate, which encodes binary data as invisible Unicode Variation Selectors +//! per the C2PA Text Embedding specification. + +use std::{fs::File, path::Path}; + +use c2pa_text::{embed_manifest, extract_manifest}; + +use crate::{ + asset_io::{ + rename_or_move, AssetIO, CAIRead, CAIReadWrite, CAIReader, CAIWriter, + HashBlockObjectType, HashObjectPositions, + }, + error::{Error, Result}, + utils::io_utils::tempfile_builder, +}; + +static SUPPORTED_TYPES: [&str; 2] = ["txt", "text/plain"]; + +pub struct TextIO {} + +/// Read the entire stream as a UTF-8 string. +fn stream_to_string(reader: &mut dyn CAIRead) -> Result { + reader.rewind()?; + let mut buf = Vec::new(); + reader.read_to_end(&mut buf)?; + String::from_utf8(buf) + .map_err(|_| Error::InvalidAsset("text asset is not valid UTF-8".to_string())) +} + +impl CAIReader for TextIO { + fn read_cai(&self, reader: &mut dyn CAIRead) -> Result> { + let text = stream_to_string(reader)?; + + let result = extract_manifest(&text) + .map_err(|e| Error::InvalidAsset(format!("text manifest extraction failed: {e}")))?; + + match result.manifest { + Some(manifest) => Ok(manifest), + None => Err(Error::JumbfNotFound), + } + } + + fn read_xmp(&self, _reader: &mut dyn CAIRead) -> Option { + // Text files do not carry XMP. + None + } +} + +impl CAIWriter for TextIO { + fn write_cai( + &self, + input_stream: &mut dyn CAIRead, + output_stream: &mut dyn CAIReadWrite, + store_bytes: &[u8], + ) -> Result<()> { + let text = stream_to_string(input_stream)?; + + // Strip any existing wrapper to get clean text. + let clean = match extract_manifest(&text) { + Ok(r) => r.clean_text, + Err(_) => text, + }; + + let signed = embed_manifest(&clean, store_bytes); + + output_stream.rewind()?; + output_stream.write_all(signed.as_bytes())?; + Ok(()) + } + + fn get_object_locations_from_stream( + &self, + input_stream: &mut dyn CAIRead, + ) -> Result> { + let text = stream_to_string(input_stream)?; + + let result = extract_manifest(&text) + .map_err(|e| Error::InvalidAsset(format!("text manifest extraction failed: {e}")))?; + + let (offset, length) = match (result.offset, result.length) { + (Some(o), Some(l)) => (o, l), + _ => return Err(Error::JumbfNotFound), + }; + + let total = text.len(); + + let mut positions = Vec::new(); + + // Pre-wrapper content + positions.push(HashObjectPositions { + offset: 0, + length: offset, + htype: HashBlockObjectType::Other, + }); + + // C2PA wrapper (the variation-selector encoded block) + positions.push(HashObjectPositions { + offset, + length, + htype: HashBlockObjectType::Cai, + }); + + // Post-wrapper content (if any) + let end = offset + length; + if end < total { + positions.push(HashObjectPositions { + offset: end, + length: total - end, + htype: HashBlockObjectType::Other, + }); + } + + Ok(positions) + } + + fn remove_cai_store_from_stream( + &self, + input_stream: &mut dyn CAIRead, + output_stream: &mut dyn CAIReadWrite, + ) -> Result<()> { + let text = stream_to_string(input_stream)?; + + let clean = match extract_manifest(&text) { + Ok(r) => r.clean_text, + Err(_) => text, + }; + + output_stream.rewind()?; + output_stream.write_all(clean.as_bytes())?; + Ok(()) + } +} + +impl AssetIO for TextIO { + fn new(_asset_type: &str) -> Self + where + Self: Sized, + { + TextIO {} + } + + fn get_handler(&self, asset_type: &str) -> Box { + Box::new(TextIO::new(asset_type)) + } + + fn get_reader(&self) -> &dyn CAIReader { + self + } + + fn get_writer(&self, asset_type: &str) -> Option> { + Some(Box::new(TextIO::new(asset_type))) + } + + fn read_cai_store(&self, asset_path: &Path) -> Result> { + let mut f = File::open(asset_path)?; + self.read_cai(&mut f) + } + + fn save_cai_store(&self, asset_path: &Path, store_bytes: &[u8]) -> Result<()> { + let mut input_stream = File::open(asset_path).map_err(Error::IoError)?; + + let mut temp_file = tempfile_builder("c2pa_temp")?; + + self.write_cai(&mut input_stream, &mut temp_file, store_bytes)?; + + rename_or_move(temp_file, asset_path) + } + + fn get_object_locations(&self, asset_path: &Path) -> Result> { + let mut input_stream = File::open(asset_path).map_err(|_| Error::EmbeddingError)?; + self.get_object_locations_from_stream(&mut input_stream) + } + + fn remove_cai_store(&self, asset_path: &Path) -> Result<()> { + let mut input_file = File::open(asset_path)?; + + let mut temp_file = tempfile_builder("c2pa_temp")?; + + self.remove_cai_store_from_stream(&mut input_file, &mut temp_file)?; + + rename_or_move(temp_file, asset_path) + } + + fn supported_types(&self) -> &[&str] { + &SUPPORTED_TYPES + } +} + +#[cfg(test)] +#[cfg(feature = "file_io")] +pub mod tests { + #![allow(clippy::expect_used)] + #![allow(clippy::unwrap_used)] + + use std::io::Cursor; + + use super::*; + + /// Round-trip: write manifest into text, then read it back. + #[test] + fn text_io_stream_roundtrip() { + let plain = "Hello, C2PA world!"; + let jumbf: Vec = vec![0, 0, 0, 8, b'j', b'u', b'm', b'b']; + + // Build a signed text string via c2pa-text directly. + let signed = embed_manifest(plain, &jumbf); + + // Read back through the handler. + let text_io = TextIO::new("txt"); + let mut reader = Cursor::new(signed.clone().into_bytes()); + let extracted = text_io.read_cai(&mut reader).expect("read_cai"); + assert_eq!(extracted, jumbf); + + // Write through the handler (replaces old manifest). + let new_jumbf: Vec = vec![0, 0, 0, 8, b'j', b'u', b'm', b'b', 0xAA]; + let mut input = Cursor::new(signed.into_bytes()); + let mut output = Cursor::new(Vec::new()); + text_io + .write_cai(&mut input, &mut output, &new_jumbf) + .expect("write_cai"); + + // Read new manifest. + output.set_position(0); + let extracted2 = text_io.read_cai(&mut output).expect("read_cai after write"); + assert_eq!(extracted2, new_jumbf); + } + + /// Removing the CAI store produces clean text. + #[test] + fn text_io_remove_store() { + let plain = "Article body text."; + let jumbf: Vec = vec![0, 0, 0, 8, b'j', b'u', b'm', b'b']; + let signed = embed_manifest(plain, &jumbf); + + let text_io = TextIO::new("txt"); + let mut input = Cursor::new(signed.into_bytes()); + let mut output = Cursor::new(Vec::new()); + text_io + .remove_cai_store_from_stream(&mut input, &mut output) + .expect("remove_cai_store_from_stream"); + + let clean = String::from_utf8(output.into_inner()).unwrap(); + assert_eq!(clean, plain); + } + + /// Object locations correctly partition the byte stream. + #[test] + fn text_io_object_locations() { + let plain = "Test text."; + let jumbf: Vec = vec![0, 0, 0, 8, b'j', b'u', b'm', b'b']; + let signed = embed_manifest(plain, &jumbf); + let total_bytes = signed.len(); + + let text_io = TextIO::new("txt"); + let mut reader = Cursor::new(signed.into_bytes()); + let positions = text_io + .get_object_locations_from_stream(&mut reader) + .expect("get_object_locations_from_stream"); + + // Should have pre + cai + (possibly post) + assert!(positions.len() >= 2); + + let cai_pos = positions + .iter() + .find(|p| p.htype == HashBlockObjectType::Cai) + .expect("CAI block"); + assert!(cai_pos.length > 0); + + // All positions should cover the total byte range without overlap. + let covered: usize = positions.iter().map(|p| p.length).sum(); + assert_eq!(covered, total_bytes); + } + + /// Plain text with no wrapper returns JumbfNotFound. + #[test] + fn text_io_no_manifest() { + let text_io = TextIO::new("txt"); + let mut reader = Cursor::new("Just plain text.".as_bytes().to_vec()); + let result = text_io.read_cai(&mut reader); + assert!(matches!(result.unwrap_err(), Error::JumbfNotFound)); + } +} diff --git a/sdk/src/jumbf_io.rs b/sdk/src/jumbf_io.rs index e146a8908..e2144f7a6 100644 --- a/sdk/src/jumbf_io.rs +++ b/sdk/src/jumbf_io.rs @@ -29,7 +29,7 @@ use crate::{ asset_handlers::{ bmff_io::BmffIO, c2pa_io::C2paIO, flac_io::FlacIO, gif_io::GifIO, jpeg_io::JpegIO, jpegxl_io::JpegXlIO, mp3_io::Mp3IO, png_io::PngIO, riff_io::RiffIO, svg_io::SvgIO, - tiff_io::TiffIO, + text_io::TextIO, tiff_io::TiffIO, }, asset_io::{AssetIO, CAIRead, CAIReadWrite, CAIReader, CAIWriter, HashObjectPositions}, error::{Error, Result}, @@ -53,6 +53,7 @@ lazy_static! { Box::new(Mp3IO::new("")), Box::new(GifIO::new("")), Box::new(FlacIO::new("")), + Box::new(TextIO::new("")), ]; let mut handler_map = HashMap::new(); @@ -84,6 +85,7 @@ lazy_static! { Box::new(Mp3IO::new("")), Box::new(FlacIO::new("")), Box::new(GifIO::new("")), + Box::new(TextIO::new("")), ]; let mut handler_map = HashMap::new(); @@ -408,6 +410,7 @@ pub mod tests { Box::new(SvgIO::new("")), Box::new(Mp3IO::new("")), Box::new(FlacIO::new("")), + Box::new(TextIO::new("")), ]; // build handler map @@ -430,6 +433,7 @@ pub mod tests { Box::new(SvgIO::new("")), Box::new(RiffIO::new("")), Box::new(GifIO::new("")), + Box::new(TextIO::new("")), ]; // build handler map diff --git a/sdk/src/utils/mime.rs b/sdk/src/utils/mime.rs index 4d6be12fa..9be6f72d9 100644 --- a/sdk/src/utils/mime.rs +++ b/sdk/src/utils/mime.rs @@ -38,6 +38,7 @@ pub fn extension_to_mime(extension: &str) -> Option<&'static str> { "wav" => "audio/wav", "aif" | "aifc" | "aiff" => "audio/aiff", "ogg" => "audio/ogg", + "txt" => "text/plain", "pdf" => "application/pdf", "ai" => "application/postscript", "arw" => "image/x-sony-arw", @@ -86,6 +87,7 @@ pub fn format_to_extension(format: &str) -> Option<&'static str> { "wav" | "audio/wav" | "audio/wave" | "audio.vnd.wave" => "wav", "aif" | "aifc" | "aiff" | "audio/aiff" => "aif", "ogg" | "audio/ogg" => "ogg", + "txt" | "text/plain" => "txt", "pdf" | "application/pdf" => "pdf", "ai" | "application/postscript" => "ai", "arw" | "image/x-sony-arw" => "arw", From 40c165d1dfeffd9d3d85ab460f33b1afa3510552 Mon Sep 17 00:00:00 2001 From: Erik Svilich Date: Fri, 15 May 2026 14:35:42 +0000 Subject: [PATCH 2/2] fix: use crates.io dependency for c2pa-text instead of git source Git dependencies are rejected by crates.io during publish. c2pa-text v1.1.0 is already published on crates.io, so reference it directly. --- sdk/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/Cargo.toml b/sdk/Cargo.toml index 2220dcb8f..41b1c942f 100644 --- a/sdk/Cargo.toml +++ b/sdk/Cargo.toml @@ -109,7 +109,7 @@ byteorder = { version = "1.4.3", default-features = false } byteordered = "0.6.0" chrono = { version = "0.4.42", default-features = false, features = ["serde"] } c2pa_cbor = "0.77.2" -c2pa-text = { git = "https://github.com/encypherai/c2pa-text.git" } +c2pa-text = "1.1.0" config = { version = "0.14.0", default-features = false, features = [ "json", "toml",