From fedfb51e89c4ee5d3dfabee354b5524ed0ffdeb8 Mon Sep 17 00:00:00 2001 From: patrick Date: Thu, 14 May 2026 22:31:52 +0200 Subject: [PATCH 1/2] perf: compress embedded espeak-ng-data with rust-embed Swap include_dir for rust-embed with the 'compression' feature. The embedded files were uncompressed phoneme dictionaries (~15 MB plain text/binary), DEFLATE typically gives 4-5x on this content. Expected release binary drop: ~50 MB to ~32 MB, tarball ~22 MB to ~13 MB. UX unchanged: first piper run still extracts to ~/.config/vox/piper/espeak-ng-data and sets PIPER_ESPEAKNG_DATA_DIRECTORY. The sentinel file logic short-circuits on subsequent runs. --- Cargo.toml | 2 +- src/backend/piper.rs | 27 ++++++++++++++++++--------- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ad7d4de..2aa0dee 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,7 +29,7 @@ rodio = "0.20" hound = "3" qwen3-tts = { git = "https://github.com/TrevorS/qwen3-tts-rs", features = ["hub"], default-features = false } piper-rs = { git = "https://github.com/thewh1teagle/piper-rs" } -include_dir = "0.7" +rust-embed = { version = "8", features = ["compression", "interpolate-folder-path"] } kokoro-tts = { version = "0.3", optional = true } toml = "0.8" ratatui = "0.29" diff --git a/src/backend/piper.rs b/src/backend/piper.rs index 2a673ac..b031039 100644 --- a/src/backend/piper.rs +++ b/src/backend/piper.rs @@ -7,8 +7,8 @@ use std::path::PathBuf; use std::sync::{Mutex, OnceLock}; use anyhow::{Context, Result}; -use include_dir::{Dir, include_dir}; use piper_rs::Piper; +use rust_embed::RustEmbed; use super::{SpeakOptions, TtsBackend}; use crate::config; @@ -19,11 +19,14 @@ pub struct PiperBackend; /// Reloads when language changes (different ONNX model per language). static MODEL: Mutex> = Mutex::new(None); -/// espeak-ng-data embedded at build time (staged by build.rs into OUT_DIR). -/// Needed because the espeak-ng library statically linked into vox has a -/// hard-coded data path from the CI builder that does not exist on user -/// machines. We extract this once and point espeak-rs at the result. -static ESPEAK_DATA: Dir<'_> = include_dir!("$OUT_DIR/espeak-ng-data"); +/// espeak-ng-data embedded (and DEFLATE-compressed) at build time. Staged +/// into OUT_DIR by build.rs. Needed because the espeak-ng library statically +/// linked into vox has a hard-coded data path from the CI builder that does +/// not exist on user machines. We extract this once and point espeak-rs at +/// the result. +#[derive(RustEmbed)] +#[folder = "$OUT_DIR/espeak-ng-data"] +struct EspeakData; static ESPEAK_DATA_INIT: OnceLock> = OnceLock::new(); @@ -44,9 +47,15 @@ fn ensure_espeak_data() -> Result<()> { std::fs::remove_dir_all(&data_dir).map_err(|e| e.to_string())?; } std::fs::create_dir_all(&data_dir).map_err(|e| e.to_string())?; - ESPEAK_DATA - .extract(&data_dir) - .map_err(|e| format!("failed to extract espeak-ng-data: {e}"))?; + for path in EspeakData::iter() { + let file = EspeakData::get(&path) + .ok_or_else(|| format!("embedded espeak-ng-data entry vanished: {path}"))?; + let target = data_dir.join(path.as_ref()); + if let Some(parent) = target.parent() { + std::fs::create_dir_all(parent).map_err(|e| e.to_string())?; + } + std::fs::write(&target, file.data.as_ref()).map_err(|e| e.to_string())?; + } std::fs::File::create(&sentinel).map_err(|e| e.to_string())?; } From cec3c14aa88ac1296f6578fc0b7c27e2fe01b994 Mon Sep 17 00:00:00 2001 From: patrick Date: Thu, 14 May 2026 22:43:11 +0200 Subject: [PATCH 2/2] fix: import Embed trait for iter/get methods (rust-embed v8) --- src/backend/piper.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/piper.rs b/src/backend/piper.rs index b031039..10cd5d4 100644 --- a/src/backend/piper.rs +++ b/src/backend/piper.rs @@ -8,7 +8,7 @@ use std::sync::{Mutex, OnceLock}; use anyhow::{Context, Result}; use piper_rs::Piper; -use rust_embed::RustEmbed; +use rust_embed::{Embed, RustEmbed}; use super::{SpeakOptions, TtsBackend}; use crate::config;