diff --git a/Cargo.lock b/Cargo.lock index 774024b..cb24b94 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -75,6 +75,14 @@ dependencies = [ "testvectors", ] +[[package]] +name = "embedded-cal-software" +version = "0.1.0" +dependencies = [ + "embedded-cal", + "testvectors", +] + [[package]] name = "generic-array" version = "0.14.7" diff --git a/Cargo.toml b/Cargo.toml index 208bd50..1c0d0a5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [workspace] -members = ["embedded-cal", "embedded-cal-rustcrypto", "testvectors"] +members = ["embedded-cal", "embedded-cal-rustcrypto", "embedded-cal-software", "testvectors"] resolver = "3" [workspace.package] diff --git a/embedded-cal-software/Cargo.toml b/embedded-cal-software/Cargo.toml new file mode 100644 index 0000000..309eb7f --- /dev/null +++ b/embedded-cal-software/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "embedded-cal-software" +edition.workspace = true +license.workspace = true +readme.workspace = true +repository.workspace = true +keywords.workspace = true +categories.workspace = true +version.workspace = true + +[dependencies] +embedded-cal = { version = "0.1.0", path = "../embedded-cal" } + +[dev-dependencies] +testvectors.path = "../testvectors" diff --git a/embedded-cal-software/README.md b/embedded-cal-software/README.md new file mode 100644 index 0000000..4df534b --- /dev/null +++ b/embedded-cal-software/README.md @@ -0,0 +1 @@ +This is currently a stand-in for upcoming libcrux integration. diff --git a/embedded-cal-software/src/lib.rs b/embedded-cal-software/src/lib.rs new file mode 100644 index 0000000..0906eaa --- /dev/null +++ b/embedded-cal-software/src/lib.rs @@ -0,0 +1,183 @@ +//! Minimal stand-in for the libcrux based implementation and polyfills. +//! +//! Currently, this demonstrates how that layer would work on top of a hardware implementation that +//! only does the hard work of the SHA hashes and not the clerical buffering / padding. +#![no_std] + +pub struct Extender(B); + +const HASH_WRAPPER_MAX_BLOCKSIZE: usize = 64; + +impl embedded_cal::Cal for Extender {} + +impl embedded_cal::HashProvider for Extender { + type Algorithm = HashAlgorithm; + + type HashState = HashState; + + type HashResult = B::HashResult; + + fn init(&mut self, algorithm: Self::Algorithm) -> Self::HashState { + HashState { + block_length: match &algorithm { + HashAlgorithm::BlockWrap(_, block_length) => Some(*block_length), + HashAlgorithm::Direct(_) => None, + }, + underlying: self.0.init(match algorithm { + HashAlgorithm::BlockWrap(alg, _) | HashAlgorithm::Direct(alg) => alg, + }), + cursor: 0, + buffer: [0; _], + blocks_written: 0, + } + } + + fn update(&mut self, instance: &mut Self::HashState, mut data: &[u8]) { + let Some(block_length) = instance.block_length else { + self.0.update(&mut instance.underlying, data); + return; + }; + let block_length: usize = block_length.into(); + // Not trying to be efficient here: This is a demo implementation. + // In particular, this does *not* test sending more than a single buffer multiple in; + // that'll be tested soon enough (and easy to fix). + loop { + let buffer_to_fill = &mut instance.buffer[instance.cursor..block_length]; + let fill_bytes = if data.len() > buffer_to_fill.len() { + buffer_to_fill.len() + } else { + data.len() + }; + buffer_to_fill[..fill_bytes].copy_from_slice(&data[..fill_bytes]); + data = &data[fill_bytes..]; + instance.cursor += fill_bytes; + if instance.cursor < block_length { + return; + } + self.0 + .update(&mut instance.underlying, &instance.buffer[..block_length]); + instance.cursor = 0; + instance.blocks_written += 1; + } + } + + fn finalize(&mut self, mut instance: Self::HashState) -> Self::HashResult { + if let Some(block_length) = instance.block_length { + let block_length: usize = block_length.into(); + + let mut padding = [0; _]; + let padding_size = sha256_padding( + instance.blocks_written * block_length + instance.cursor, + &mut padding, + ); + self.update(&mut instance, &padding[..padding_size]); + assert!(instance.cursor == 0, "Padding didn't pad out the message"); + }; + + self.0.finalize(instance.underlying) + } +} + +#[derive(PartialEq, Eq, Debug, Clone)] +pub enum HashAlgorithm { + BlockWrap(A, core::num::NonZeroUsize), + Direct(A), +} + +impl embedded_cal::HashAlgorithm for HashAlgorithm { + fn len(&self) -> usize { + match self { + HashAlgorithm::BlockWrap(a, _) => a.len(), + HashAlgorithm::Direct(a) => a.len(), + } + } + + #[inline] + fn from_cose_number(number: impl Into) -> Option { + let number: i128 = number.into(); + + if let Some(a) = A::from_cose_number(number) { + return Some(HashAlgorithm::Direct(a)); + } + + // This is just demo code, so we just pick any: SHA256 + if number == -16 { + return Some(HashAlgorithm::BlockWrap( + A::fullblock_nonfinishing_from_cose_number(-16)?, + 64.try_into().unwrap(), + )); + } + + None + } + + #[inline] + fn from_ni_id(number: u8) -> Option { + match number { + 1 => Self::from_cose_number(-16), + _ => None, + } + } +} + +pub struct HashState { + underlying: S, + // If this is None, we pass on; otherwise, we spool and apply SHA-2 finalization. (Support for + // more algorithms would require an extra disambiguator). + block_length: Option, + // I'd really love to use heapless -- can we hax that up? + cursor: usize, + buffer: [u8; HASH_WRAPPER_MAX_BLOCKSIZE], + // … or combine cursor with blocks_written and modulo it out? Doesn't matter for a demo. + blocks_written: usize, +} + +#[cfg(test)] +mod tests { + use super::*; + + mod dummy_sha256; + + #[test] + fn test_hash_algorithm_sha256_on_dummy() { + let mut cal = Extender(dummy_sha256::DummySha256); + + testvectors::test_hash_algorithm_sha256(&mut cal); + } +} + +// Remaining code is copied from https://github.com/lake-rs/embedded-cal/pull/9 + +fn sha256_padding(msg_len: usize, out: &mut [u8; 256]) -> usize { + sha2_padding(msg_len, 64, 56, 8, out) +} + +fn sha2_padding( + msg_len: usize, + block_size: usize, + length_offset: usize, + length_bytes: usize, + out: &mut [u8; 256], +) -> usize { + out[0] = 0x80; + + let rem = (msg_len + 1) % block_size; + + let zero_pad = if rem <= length_offset { + length_offset - rem + } else { + length_offset + (block_size - rem) + }; + + for b in &mut out[1..=zero_pad] { + *b = 0; + } + + let bit_len = (msg_len as u128) * 8; + let len_bytes_be = bit_len.to_be_bytes(); + + let start = 1 + zero_pad; + out[start..start + length_bytes].copy_from_slice(&len_bytes_be[(16 - length_bytes)..]); + + 1 + zero_pad + length_bytes +} diff --git a/embedded-cal-software/src/tests/dummy_sha256.rs b/embedded-cal-software/src/tests/dummy_sha256.rs new file mode 100644 index 0000000..0e83fc9 --- /dev/null +++ b/embedded-cal-software/src/tests/dummy_sha256.rs @@ -0,0 +1,112 @@ +//! Testing-only module with a manual version of SHA256 that does *not* do the padding (in order to +//! test the `fullblock_nonfinishing_from_cose_number()` algorithms) +#![allow( + non_snake_case, + non_upper_case_globals, + reason = "folling algorithm convention" +)] + +/// A minimal testable version of SHA256-but-no-blocks-or-dummy. +/// +/// All implementation follows the Wikipedia pseudocode. +pub struct DummySha256; + +impl embedded_cal::Cal for DummySha256 {} + +#[derive(Clone, PartialEq, Eq, Debug)] +pub struct DummyAlgorithm; + +impl embedded_cal::HashAlgorithm for DummyAlgorithm { + fn len(&self) -> usize { + 32 + } + + #[inline] + fn fullblock_nonfinishing_from_cose_number(number: impl Into) -> Option { + if number.into() == -16 { + Some(Self) + } else { + None + } + } +} + +const k: [u32; 64] = [ + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2, +]; + +impl embedded_cal::HashProvider for DummySha256 { + type Algorithm = DummyAlgorithm; + type HashState = [u32; 8]; + type HashResult = [u8; 32]; + + fn init(&mut self, algorithm: Self::Algorithm) -> Self::HashState { + let DummyAlgorithm = algorithm; + [ + 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, + 0x5be0cd19, + ] + } + + fn update(&mut self, instance: &mut Self::HashState, data: &[u8]) { + assert!(data.len() % 64 == 0, "Not feeding a multiple of 512bit"); + for chunk in data.as_chunks::<64>().0 { + let mut w: [u32; 64] = [0; _]; // or uninit + for (i, chunkword) in chunk.as_chunks::<4>().0.into_iter().enumerate() { + w[i] = u32::from_be_bytes(*chunkword); + } + + for i in 16..64 { + let s0 = w[i - 15].rotate_right(7) ^ w[i - 15].rotate_right(18) ^ (w[i - 15] >> 3); + let s1 = w[i - 2].rotate_right(17) ^ w[i - 2].rotate_right(19) ^ (w[i - 2] >> 10); + w[i] = w[i - 16] + .wrapping_add(s0) + .wrapping_add(w[i - 7]) + .wrapping_add(s1); + } + + let [mut a, mut b, mut c, mut d, mut e, mut f, mut g, mut h] = *instance; + + for i in 0..64 { + let S1 = e.rotate_right(6) ^ e.rotate_right(11) ^ e.rotate_right(25); + let ch = (e & f) ^ (!e & g); + let temp1 = h + .wrapping_add(S1) + .wrapping_add(ch) + .wrapping_add(k[i]) + .wrapping_add(w[i]); + let S0 = a.rotate_right(2) ^ a.rotate_right(13) ^ a.rotate_right(22); + let maj = (a & b) ^ (a & c) ^ (b & c); + let temp2 = S0.wrapping_add(maj); + + h = g; + g = f; + f = e; + e = d.wrapping_add(temp1); + d = c; + c = b; + b = a; + a = temp1.wrapping_add(temp2); + } + + for (big_h, add_this) in instance.iter_mut().zip([a, b, c, d, e, f, g, h]) { + *big_h = big_h.wrapping_add(add_this); + } + } + } + + fn finalize(&mut self, instance: Self::HashState) -> Self::HashResult { + let mut out = [0; _]; + for (i, word) in instance.into_iter().enumerate() { + out[4 * i..][..4].copy_from_slice(&word.to_be_bytes()); + } + out + } +} diff --git a/embedded-cal/src/lib.rs b/embedded-cal/src/lib.rs index b390bda..aa5769a 100644 --- a/embedded-cal/src/lib.rs +++ b/embedded-cal/src/lib.rs @@ -2,6 +2,7 @@ pub trait Cal: HashProvider {} +/// Interfaces for hashing a byte stream. pub trait HashProvider { type Algorithm: HashAlgorithm; /// State in which is carried between rounds of feeding data. @@ -111,6 +112,42 @@ pub trait HashAlgorithm: Sized + PartialEq + Eq + core::fmt::Debug + Clone { fn from_ni_name(name: &str) -> Option { None } + + /// Selects a **block-only** hash algorithm from its COSE number **with dummy finalization**. + /// + /// (See [`Self::from_cose_number()`] for general properties). + /// + /// The resulting algorithm is **limited** in that it only operates in "full blocks". + /// Algorithms created this way may expect the caller to only feed data in chunks in integer + /// multiples of the block size, including the last one. (I.e., the caller is responsible for + /// any required padding), and skip the finalization steps. + /// + /// What constitutes a block or the finalization steps is not specified in general -- it is a + /// concept introduced in this library to allow combinging typical hardware implementations a + /// common software wrapper. A block, for the purpose of the API of algorithms created through + /// this function, is: + /// + /// * For the SHA-2 family, it is the block size as defined in [RFC6234], eg. + /// `SHA224_Message_Block_Size = 64`, and appending padding as described in Section 4 is not + /// performed. + /// + /// (Likely, most algorithms will either have a trivial block size and padding that applies, or + /// that there is no point in using this interface anyway; nonetheless, the block sizes are + /// spelled out here to be precise.) + /// + /// [RFC6234]: https://www.rfc-editor.org/rfc/rfc6234 + /// + /// No equivalent `.fullblock_nonfinishing_from_ni_name()` constructor is provided, because any + /// adapter would recognize both names anyway, and there are currently no algorithms listed + /// above that have a NI but no COSE entry. + #[inline] + #[allow( + unused_variables, + reason = "Argument names are part of the documentation" + )] + fn fullblock_nonfinishing_from_cose_number(number: impl Into) -> Option { + None + } } // FIXME: Should we introduce a feature to no build those all the time?