diff --git a/Cargo.lock b/Cargo.lock
index 774024b..cb24b94 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -75,6 +75,14 @@ dependencies = [
  "testvectors",
 ]
 
+[[package]]
+name = "embedded-cal-software"
+version = "0.1.0"
+dependencies = [
+ "embedded-cal",
+ "testvectors",
+]
+
 [[package]]
 name = "generic-array"
 version = "0.14.7"
diff --git a/Cargo.toml b/Cargo.toml
index 208bd50..1c0d0a5 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,5 +1,5 @@
 [workspace]
-members = ["embedded-cal", "embedded-cal-rustcrypto", "testvectors"]
+members = ["embedded-cal", "embedded-cal-rustcrypto", "embedded-cal-software", "testvectors"]
 resolver = "3"
 
 [workspace.package]
diff --git a/embedded-cal-software/Cargo.toml b/embedded-cal-software/Cargo.toml
new file mode 100644
index 0000000..309eb7f
--- /dev/null
+++ b/embedded-cal-software/Cargo.toml
@@ -0,0 +1,15 @@
+[package]
+name = "embedded-cal-software"
+edition.workspace = true
+license.workspace = true
+readme.workspace = true
+repository.workspace = true
+keywords.workspace = true
+categories.workspace = true
+version.workspace = true
+
+[dependencies]
+embedded-cal = { version = "0.1.0", path = "../embedded-cal" }
+
+[dev-dependencies]
+testvectors.path = "../testvectors"
diff --git a/embedded-cal-software/README.md b/embedded-cal-software/README.md
new file mode 100644
index 0000000..4df534b
--- /dev/null
+++ b/embedded-cal-software/README.md
@@ -0,0 +1 @@
+This is currently a stand-in for upcoming libcrux integration.
diff --git a/embedded-cal-software/src/lib.rs b/embedded-cal-software/src/lib.rs
new file mode 100644
index 0000000..0906eaa
--- /dev/null
+++ b/embedded-cal-software/src/lib.rs
@@ -0,0 +1,183 @@
+//! Minimal stand-in for the libcrux based implementation and polyfills.
+//!
+//! Currently, this demonstrates how that layer would work on top of a hardware implementation that
+//! only does the hard work of the SHA hashes and not the clerical buffering / padding.
+#![no_std]
+
+pub struct Extender<B: embedded_cal::Cal>(B);
+
+const HASH_WRAPPER_MAX_BLOCKSIZE: usize = 64;
+
+impl<B: embedded_cal::Cal> embedded_cal::Cal for Extender<B> {}
+
+impl<B: embedded_cal::Cal> embedded_cal::HashProvider for Extender<B> {
+    type Algorithm = HashAlgorithm<B::Algorithm>;
+
+    type HashState = HashState<B::HashState>;
+
+    type HashResult = B::HashResult;
+
+    fn init(&mut self, algorithm: Self::Algorithm) -> Self::HashState {
+        HashState {
+            block_length: match &algorithm {
+                HashAlgorithm::BlockWrap(_, block_length) => Some(*block_length),
+                HashAlgorithm::Direct(_) => None,
+            },
+            underlying: self.0.init(match algorithm {
+                HashAlgorithm::BlockWrap(alg, _) | HashAlgorithm::Direct(alg) => alg,
+            }),
+            cursor: 0,
+            buffer: [0; _],
+            blocks_written: 0,
+        }
+    }
+
+    fn update(&mut self, instance: &mut Self::HashState, mut data: &[u8]) {
+        let Some(block_length) = instance.block_length else {
+            self.0.update(&mut instance.underlying, data);
+            return;
+        };
+        let block_length: usize = block_length.into();
+        // Not trying to be efficient here: This is a demo implementation.
+        // In particular, this does *not* test sending more than a single buffer multiple in;
+        // that'll be tested soon enough (and easy to fix).
+        loop {
+            let buffer_to_fill = &mut instance.buffer[instance.cursor..block_length];
+            let fill_bytes = if data.len() > buffer_to_fill.len() {
+                buffer_to_fill.len()
+            } else {
+                data.len()
+            };
+            buffer_to_fill[..fill_bytes].copy_from_slice(&data[..fill_bytes]);
+            data = &data[fill_bytes..];
+            instance.cursor += fill_bytes;
+            if instance.cursor < block_length {
+                return;
+            }
+            self.0
+                .update(&mut instance.underlying, &instance.buffer[..block_length]);
+            instance.cursor = 0;
+            instance.blocks_written += 1;
+        }
+    }
+
+    fn finalize(&mut self, mut instance: Self::HashState) -> Self::HashResult {
+        if let Some(block_length) = instance.block_length {
+            let block_length: usize = block_length.into();
+
+            let mut padding = [0; _];
+            let padding_size = sha256_padding(
+                instance.blocks_written * block_length + instance.cursor,
+                &mut padding,
+            );
+            self.update(&mut instance, &padding[..padding_size]);
+            assert!(instance.cursor == 0, "Padding didn't pad out the message");
+        };
+
+        self.0.finalize(instance.underlying)
+    }
+}
+
+#[derive(PartialEq, Eq, Debug, Clone)]
+pub enum HashAlgorithm<A: embedded_cal::HashAlgorithm> {
+    BlockWrap(A, core::num::NonZeroUsize),
+    Direct(A),
+}
+
+impl<A: embedded_cal::HashAlgorithm> embedded_cal::HashAlgorithm for HashAlgorithm<A> {
+    fn len(&self) -> usize {
+        match self {
+            HashAlgorithm::BlockWrap(a, _) => a.len(),
+            HashAlgorithm::Direct(a) => a.len(),
+        }
+    }
+
+    #[inline]
+    fn from_cose_number(number: impl Into<i128>) -> Option<Self> {
+        let number: i128 = number.into();
+
+        if let Some(a) = A::from_cose_number(number) {
+            return Some(HashAlgorithm::Direct(a));
+        }
+
+        // This is just demo code, so we just pick any: SHA256
+        if number == -16 {
+            return Some(HashAlgorithm::BlockWrap(
+                A::fullblock_nonfinishing_from_cose_number(-16)?,
+                64.try_into().unwrap(),
+            ));
+        }
+
+        None
+    }
+
+    #[inline]
+    fn from_ni_id(number: u8) -> Option<Self> {
+        match number {
+            1 => Self::from_cose_number(-16),
+            _ => None,
+        }
+    }
+}
+
+pub struct HashState<S> {
+    underlying: S,
+    // If this is None, we pass on; otherwise, we spool and apply SHA-2 finalization. (Support for
+    // more algorithms would require an extra disambiguator).
+    block_length: Option<core::num::NonZeroUsize>,
+    // I'd really love to use heapless -- can we hax that up?
+    cursor: usize,
+    buffer: [u8; HASH_WRAPPER_MAX_BLOCKSIZE],
+    // … or combine cursor with blocks_written and modulo it out? Doesn't matter for a demo.
+    blocks_written: usize,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    mod dummy_sha256;
+
+    #[test]
+    fn test_hash_algorithm_sha256_on_dummy() {
+        let mut cal = Extender(dummy_sha256::DummySha256);
+
+        testvectors::test_hash_algorithm_sha256(&mut cal);
+    }
+}
+
+// Remaining code is copied from https://github.com/lake-rs/embedded-cal/pull/9
+
+fn sha256_padding(msg_len: usize, out: &mut [u8; 256]) -> usize {
+    sha2_padding(msg_len, 64, 56, 8, out)
+}
+
+fn sha2_padding(
+    msg_len: usize,
+    block_size: usize,
+    length_offset: usize,
+    length_bytes: usize,
+    out: &mut [u8; 256],
+) -> usize {
+    out[0] = 0x80;
+
+    let rem = (msg_len + 1) % block_size;
+
+    let zero_pad = if rem <= length_offset {
+        length_offset - rem
+    } else {
+        length_offset + (block_size - rem)
+    };
+
+    for b in &mut out[1..=zero_pad] {
+        *b = 0;
+    }
+
+    let bit_len = (msg_len as u128) * 8;
+    let len_bytes_be = bit_len.to_be_bytes();
+
+    let start = 1 + zero_pad;
+    out[start..start + length_bytes].copy_from_slice(&len_bytes_be[(16 - length_bytes)..]);
+
+    1 + zero_pad + length_bytes
+}
diff --git a/embedded-cal-software/src/tests/dummy_sha256.rs b/embedded-cal-software/src/tests/dummy_sha256.rs
new file mode 100644
index 0000000..0e83fc9
--- /dev/null
+++ b/embedded-cal-software/src/tests/dummy_sha256.rs
@@ -0,0 +1,112 @@
+//! Testing-only module with a manual version of SHA256 that does *not* do the padding (in order to
+//! test the `fullblock_nonfinishing_from_cose_number()` algorithms)
+#![allow(
+    non_snake_case,
+    non_upper_case_globals,
+    reason = "folling algorithm convention"
+)]
+
+/// A minimal testable version of SHA256-but-no-blocks-or-dummy.
+///
+/// All implementation follows the Wikipedia pseudocode.
+pub struct DummySha256;
+
+impl embedded_cal::Cal for DummySha256 {}
+
+#[derive(Clone, PartialEq, Eq, Debug)]
+pub struct DummyAlgorithm;
+
+impl embedded_cal::HashAlgorithm for DummyAlgorithm {
+    fn len(&self) -> usize {
+        32
+    }
+
+    #[inline]
+    fn fullblock_nonfinishing_from_cose_number(number: impl Into<i128>) -> Option<Self> {
+        if number.into() == -16 {
+            Some(Self)
+        } else {
+            None
+        }
+    }
+}
+
+const k: [u32; 64] = [
+    0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+    0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+    0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+    0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+    0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+    0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+    0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+    0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
+];
+
+impl embedded_cal::HashProvider for DummySha256 {
+    type Algorithm = DummyAlgorithm;
+    type HashState = [u32; 8];
+    type HashResult = [u8; 32];
+
+    fn init(&mut self, algorithm: Self::Algorithm) -> Self::HashState {
+        let DummyAlgorithm = algorithm;
+        [
+            0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab,
+            0x5be0cd19,
+        ]
+    }
+
+    fn update(&mut self, instance: &mut Self::HashState, data: &[u8]) {
+        assert!(data.len() % 64 == 0, "Not feeding a multiple of 512bit");
+        for chunk in data.as_chunks::<64>().0 {
+            let mut w: [u32; 64] = [0; _]; // or uninit
+            for (i, chunkword) in chunk.as_chunks::<4>().0.into_iter().enumerate() {
+                w[i] = u32::from_be_bytes(*chunkword);
+            }
+
+            for i in 16..64 {
+                let s0 = w[i - 15].rotate_right(7) ^ w[i - 15].rotate_right(18) ^ (w[i - 15] >> 3);
+                let s1 = w[i - 2].rotate_right(17) ^ w[i - 2].rotate_right(19) ^ (w[i - 2] >> 10);
+                w[i] = w[i - 16]
+                    .wrapping_add(s0)
+                    .wrapping_add(w[i - 7])
+                    .wrapping_add(s1);
+            }
+
+            let [mut a, mut b, mut c, mut d, mut e, mut f, mut g, mut h] = *instance;
+
+            for i in 0..64 {
+                let S1 = e.rotate_right(6) ^ e.rotate_right(11) ^ e.rotate_right(25);
+                let ch = (e & f) ^ (!e & g);
+                let temp1 = h
+                    .wrapping_add(S1)
+                    .wrapping_add(ch)
+                    .wrapping_add(k[i])
+                    .wrapping_add(w[i]);
+                let S0 = a.rotate_right(2) ^ a.rotate_right(13) ^ a.rotate_right(22);
+                let maj = (a & b) ^ (a & c) ^ (b & c);
+                let temp2 = S0.wrapping_add(maj);
+
+                h = g;
+                g = f;
+                f = e;
+                e = d.wrapping_add(temp1);
+                d = c;
+                c = b;
+                b = a;
+                a = temp1.wrapping_add(temp2);
+            }
+
+            for (big_h, add_this) in instance.iter_mut().zip([a, b, c, d, e, f, g, h]) {
+                *big_h = big_h.wrapping_add(add_this);
+            }
+        }
+    }
+
+    fn finalize(&mut self, instance: Self::HashState) -> Self::HashResult {
+        let mut out = [0; _];
+        for (i, word) in instance.into_iter().enumerate() {
+            out[4 * i..][..4].copy_from_slice(&word.to_be_bytes());
+        }
+        out
+    }
+}
diff --git a/embedded-cal/src/lib.rs b/embedded-cal/src/lib.rs
index b390bda..aa5769a 100644
--- a/embedded-cal/src/lib.rs
+++ b/embedded-cal/src/lib.rs
@@ -2,6 +2,7 @@
 
 pub trait Cal: HashProvider {}
 
+/// Interfaces for hashing a byte stream.
 pub trait HashProvider {
     type Algorithm: HashAlgorithm;
     /// State in which is carried between rounds of feeding data.
@@ -111,6 +112,42 @@ pub trait HashAlgorithm: Sized + PartialEq + Eq + core::fmt::Debug + Clone {
     fn from_ni_name(name: &str) -> Option<Self> {
         None
     }
+
+    /// Selects a **block-only** hash algorithm from its COSE number **with dummy finalization**.
+    ///
+    /// (See [`Self::from_cose_number()`] for general properties).
+    ///
+    /// The resulting algorithm is **limited** in that it only operates in "full blocks".
+    /// Algorithms created this way may expect the caller to only feed data in chunks in integer
+    /// multiples of the block size, including the last one. (I.e., the caller is responsible for
+    /// any required padding), and skip the finalization steps.
+    ///
+    /// What constitutes a block or the finalization steps is not specified in general -- it is a
+    /// concept introduced in this library to allow combinging typical hardware implementations a
+    /// common software wrapper. A block, for the purpose of the API of algorithms created through
+    /// this function, is:
+    ///
+    /// * For the SHA-2 family, it is the block size as defined in [RFC6234], eg.
+    ///   `SHA224_Message_Block_Size = 64`, and appending padding as described in Section 4 is not
+    ///   performed.
+    ///
+    /// (Likely, most algorithms will either have a trivial block size and padding that applies, or
+    /// that there is no point in using this interface anyway; nonetheless, the block sizes are
+    /// spelled out here to be precise.)
+    ///
+    /// [RFC6234]: https://www.rfc-editor.org/rfc/rfc6234
+    ///
+    /// No equivalent `.fullblock_nonfinishing_from_ni_name()` constructor is provided, because any
+    /// adapter would recognize both names anyway, and there are currently no algorithms listed
+    /// above that have a NI but no COSE entry.
+    #[inline]
+    #[allow(
+        unused_variables,
+        reason = "Argument names are part of the documentation"
+    )]
+    fn fullblock_nonfinishing_from_cose_number(number: impl Into<i128>) -> Option<Self> {
+        None
+    }
 }
 
 // FIXME: Should we introduce a feature to no build those all the time?