Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions sdk/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ byteorder = { version = "1.4.3", default-features = false }
byteordered = "0.6.0"
chrono = { version = "0.4.42", default-features = false, features = ["serde"] }
c2pa_cbor = "0.77.2"
c2pa-text = "1.1.0"
config = { version = "0.14.0", default-features = false, features = [
"json",
"toml",
Expand Down
1 change: 1 addition & 0 deletions sdk/src/asset_handlers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ pub mod mp3_io;
pub mod png_io;
pub mod riff_io;
pub mod svg_io;
pub mod text_io;
pub mod tiff_io;

#[cfg(feature = "pdf")]
Expand Down
289 changes: 289 additions & 0 deletions sdk/src/asset_handlers/text_io.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,289 @@
// Copyright 2024 Encypher Corporation. All rights reserved.
// Licensed under the Apache License, Version 2.0 or the MIT license,
// at your option.

//! C2PA Text asset handler.
//!
//! Embeds and extracts C2PA JUMBF manifests in plain text using the c2pa-text
//! crate, which encodes binary data as invisible Unicode Variation Selectors
//! per the C2PA Text Embedding specification.

use std::{fs::File, path::Path};

use c2pa_text::{embed_manifest, extract_manifest};

use crate::{
asset_io::{
rename_or_move, AssetIO, CAIRead, CAIReadWrite, CAIReader, CAIWriter,
HashBlockObjectType, HashObjectPositions,
},
error::{Error, Result},
utils::io_utils::tempfile_builder,
};

static SUPPORTED_TYPES: [&str; 2] = ["txt", "text/plain"];

pub struct TextIO {}

/// Read the entire stream as a UTF-8 string.
fn stream_to_string(reader: &mut dyn CAIRead) -> Result<String> {
reader.rewind()?;
let mut buf = Vec::new();
reader.read_to_end(&mut buf)?;
String::from_utf8(buf)
.map_err(|_| Error::InvalidAsset("text asset is not valid UTF-8".to_string()))
}

impl CAIReader for TextIO {
fn read_cai(&self, reader: &mut dyn CAIRead) -> Result<Vec<u8>> {
let text = stream_to_string(reader)?;

let result = extract_manifest(&text)
.map_err(|e| Error::InvalidAsset(format!("text manifest extraction failed: {e}")))?;

match result.manifest {
Some(manifest) => Ok(manifest),
None => Err(Error::JumbfNotFound),
}
}

fn read_xmp(&self, _reader: &mut dyn CAIRead) -> Option<String> {
// Text files do not carry XMP.
None
}
}

impl CAIWriter for TextIO {
fn write_cai(
&self,
input_stream: &mut dyn CAIRead,
output_stream: &mut dyn CAIReadWrite,
store_bytes: &[u8],
) -> Result<()> {
let text = stream_to_string(input_stream)?;

// Strip any existing wrapper to get clean text.
let clean = match extract_manifest(&text) {
Ok(r) => r.clean_text,
Err(_) => text,
};

let signed = embed_manifest(&clean, store_bytes);

output_stream.rewind()?;
output_stream.write_all(signed.as_bytes())?;
Ok(())
}

fn get_object_locations_from_stream(
&self,
input_stream: &mut dyn CAIRead,
) -> Result<Vec<HashObjectPositions>> {
let text = stream_to_string(input_stream)?;

let result = extract_manifest(&text)
.map_err(|e| Error::InvalidAsset(format!("text manifest extraction failed: {e}")))?;

let (offset, length) = match (result.offset, result.length) {
(Some(o), Some(l)) => (o, l),
_ => return Err(Error::JumbfNotFound),
};

let total = text.len();

let mut positions = Vec::new();

// Pre-wrapper content
positions.push(HashObjectPositions {
offset: 0,
length: offset,
htype: HashBlockObjectType::Other,
});

// C2PA wrapper (the variation-selector encoded block)
positions.push(HashObjectPositions {
offset,
length,
htype: HashBlockObjectType::Cai,
});

// Post-wrapper content (if any)
let end = offset + length;
if end < total {
positions.push(HashObjectPositions {
offset: end,
length: total - end,
htype: HashBlockObjectType::Other,
});
}

Ok(positions)
}

fn remove_cai_store_from_stream(
&self,
input_stream: &mut dyn CAIRead,
output_stream: &mut dyn CAIReadWrite,
) -> Result<()> {
let text = stream_to_string(input_stream)?;

let clean = match extract_manifest(&text) {
Ok(r) => r.clean_text,
Err(_) => text,
};

output_stream.rewind()?;
output_stream.write_all(clean.as_bytes())?;
Ok(())
}
}

impl AssetIO for TextIO {
fn new(_asset_type: &str) -> Self
where
Self: Sized,
{
TextIO {}
}

fn get_handler(&self, asset_type: &str) -> Box<dyn AssetIO> {
Box::new(TextIO::new(asset_type))
}

fn get_reader(&self) -> &dyn CAIReader {
self
}

fn get_writer(&self, asset_type: &str) -> Option<Box<dyn CAIWriter>> {
Some(Box::new(TextIO::new(asset_type)))
}

fn read_cai_store(&self, asset_path: &Path) -> Result<Vec<u8>> {
let mut f = File::open(asset_path)?;
self.read_cai(&mut f)
}

fn save_cai_store(&self, asset_path: &Path, store_bytes: &[u8]) -> Result<()> {
let mut input_stream = File::open(asset_path).map_err(Error::IoError)?;

let mut temp_file = tempfile_builder("c2pa_temp")?;

self.write_cai(&mut input_stream, &mut temp_file, store_bytes)?;

rename_or_move(temp_file, asset_path)
}

fn get_object_locations(&self, asset_path: &Path) -> Result<Vec<HashObjectPositions>> {
let mut input_stream = File::open(asset_path).map_err(|_| Error::EmbeddingError)?;
self.get_object_locations_from_stream(&mut input_stream)
}

fn remove_cai_store(&self, asset_path: &Path) -> Result<()> {
let mut input_file = File::open(asset_path)?;

let mut temp_file = tempfile_builder("c2pa_temp")?;

self.remove_cai_store_from_stream(&mut input_file, &mut temp_file)?;

rename_or_move(temp_file, asset_path)
}

fn supported_types(&self) -> &[&str] {
&SUPPORTED_TYPES
}
}

#[cfg(test)]
#[cfg(feature = "file_io")]
pub mod tests {
#![allow(clippy::expect_used)]
#![allow(clippy::unwrap_used)]

use std::io::Cursor;

use super::*;

/// Round-trip: write manifest into text, then read it back.
#[test]
fn text_io_stream_roundtrip() {
let plain = "Hello, C2PA world!";
let jumbf: Vec<u8> = vec![0, 0, 0, 8, b'j', b'u', b'm', b'b'];

// Build a signed text string via c2pa-text directly.
let signed = embed_manifest(plain, &jumbf);

// Read back through the handler.
let text_io = TextIO::new("txt");
let mut reader = Cursor::new(signed.clone().into_bytes());
let extracted = text_io.read_cai(&mut reader).expect("read_cai");
assert_eq!(extracted, jumbf);

// Write through the handler (replaces old manifest).
let new_jumbf: Vec<u8> = vec![0, 0, 0, 8, b'j', b'u', b'm', b'b', 0xAA];
let mut input = Cursor::new(signed.into_bytes());
let mut output = Cursor::new(Vec::new());
text_io
.write_cai(&mut input, &mut output, &new_jumbf)
.expect("write_cai");

// Read new manifest.
output.set_position(0);
let extracted2 = text_io.read_cai(&mut output).expect("read_cai after write");
assert_eq!(extracted2, new_jumbf);
}

/// Removing the CAI store produces clean text.
#[test]
fn text_io_remove_store() {
let plain = "Article body text.";
let jumbf: Vec<u8> = vec![0, 0, 0, 8, b'j', b'u', b'm', b'b'];
let signed = embed_manifest(plain, &jumbf);

let text_io = TextIO::new("txt");
let mut input = Cursor::new(signed.into_bytes());
let mut output = Cursor::new(Vec::new());
text_io
.remove_cai_store_from_stream(&mut input, &mut output)
.expect("remove_cai_store_from_stream");

let clean = String::from_utf8(output.into_inner()).unwrap();
assert_eq!(clean, plain);
}

/// Object locations correctly partition the byte stream.
#[test]
fn text_io_object_locations() {
let plain = "Test text.";
let jumbf: Vec<u8> = vec![0, 0, 0, 8, b'j', b'u', b'm', b'b'];
let signed = embed_manifest(plain, &jumbf);
let total_bytes = signed.len();

let text_io = TextIO::new("txt");
let mut reader = Cursor::new(signed.into_bytes());
let positions = text_io
.get_object_locations_from_stream(&mut reader)
.expect("get_object_locations_from_stream");

// Should have pre + cai + (possibly post)
assert!(positions.len() >= 2);

let cai_pos = positions
.iter()
.find(|p| p.htype == HashBlockObjectType::Cai)
.expect("CAI block");
assert!(cai_pos.length > 0);

// All positions should cover the total byte range without overlap.
let covered: usize = positions.iter().map(|p| p.length).sum();
assert_eq!(covered, total_bytes);
}

/// Plain text with no wrapper returns JumbfNotFound.
#[test]
fn text_io_no_manifest() {
let text_io = TextIO::new("txt");
let mut reader = Cursor::new("Just plain text.".as_bytes().to_vec());
let result = text_io.read_cai(&mut reader);
assert!(matches!(result.unwrap_err(), Error::JumbfNotFound));
}
}
6 changes: 5 additions & 1 deletion sdk/src/jumbf_io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ use crate::{
asset_handlers::{
bmff_io::BmffIO, c2pa_io::C2paIO, flac_io::FlacIO, gif_io::GifIO, jpeg_io::JpegIO,
jpegxl_io::JpegXlIO, mp3_io::Mp3IO, png_io::PngIO, riff_io::RiffIO, svg_io::SvgIO,
tiff_io::TiffIO,
text_io::TextIO, tiff_io::TiffIO,
},
asset_io::{AssetIO, CAIRead, CAIReadWrite, CAIReader, CAIWriter, HashObjectPositions},
error::{Error, Result},
Expand All @@ -53,6 +53,7 @@ lazy_static! {
Box::new(Mp3IO::new("")),
Box::new(GifIO::new("")),
Box::new(FlacIO::new("")),
Box::new(TextIO::new("")),
];

let mut handler_map = HashMap::new();
Expand Down Expand Up @@ -84,6 +85,7 @@ lazy_static! {
Box::new(Mp3IO::new("")),
Box::new(FlacIO::new("")),
Box::new(GifIO::new("")),
Box::new(TextIO::new("")),
];
let mut handler_map = HashMap::new();

Expand Down Expand Up @@ -408,6 +410,7 @@ pub mod tests {
Box::new(SvgIO::new("")),
Box::new(Mp3IO::new("")),
Box::new(FlacIO::new("")),
Box::new(TextIO::new("")),
];

// build handler map
Expand All @@ -430,6 +433,7 @@ pub mod tests {
Box::new(SvgIO::new("")),
Box::new(RiffIO::new("")),
Box::new(GifIO::new("")),
Box::new(TextIO::new("")),
];

// build handler map
Expand Down
2 changes: 2 additions & 0 deletions sdk/src/utils/mime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ pub fn extension_to_mime(extension: &str) -> Option<&'static str> {
"wav" => "audio/wav",
"aif" | "aifc" | "aiff" => "audio/aiff",
"ogg" => "audio/ogg",
"txt" => "text/plain",
"pdf" => "application/pdf",
"ai" => "application/postscript",
"arw" => "image/x-sony-arw",
Expand Down Expand Up @@ -86,6 +87,7 @@ pub fn format_to_extension(format: &str) -> Option<&'static str> {
"wav" | "audio/wav" | "audio/wave" | "audio.vnd.wave" => "wav",
"aif" | "aifc" | "aiff" | "audio/aiff" => "aif",
"ogg" | "audio/ogg" => "ogg",
"txt" | "text/plain" => "txt",
"pdf" | "application/pdf" => "pdf",
"ai" | "application/postscript" => "ai",
"arw" | "image/x-sony-arw" => "arw",
Expand Down