diff --git a/src-tauri/Cargo.lock b/src-tauri/Cargo.lock index d4427affb..04b2c1a3c 100644 --- a/src-tauri/Cargo.lock +++ b/src-tauri/Cargo.lock @@ -7086,8 +7086,6 @@ dependencies = [ [[package]] name = "transcribe-rs" version = "0.2.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a15e55d3bd4f74a474b7ff2c87ab416130223db490102c5ad32c6e2227686b0" dependencies = [ "derive_builder", "env_logger", diff --git a/src-tauri/Cargo.toml b/src-tauri/Cargo.toml index 39bbcf537..1cc9c0356 100644 --- a/src-tauri/Cargo.toml +++ b/src-tauri/Cargo.toml @@ -106,3 +106,6 @@ lto = true codegen-units = 1 strip = true panic = "abort" + +[patch.crates-io] +transcribe-rs = { path = "../../transcribe-rs" } diff --git a/src-tauri/src/commands/transcription.rs b/src-tauri/src/commands/transcription.rs index 6cfa2e6a4..ea8dc89d3 100644 --- a/src-tauri/src/commands/transcription.rs +++ b/src-tauri/src/commands/transcription.rs @@ -38,3 +38,11 @@ pub fn unload_model_manually( .unload_model() .map_err(|e| format!("Failed to unload model: {}", e)) } + +#[tauri::command] +#[specta::specta] +pub fn retry_whisper_gpu(transcription_manager: State) -> Result<(), String> { + transcription_manager + .retry_whisper_gpu() + .map_err(|e| format!("Failed to return Whisper to GPU: {}", e)) +} diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index c45adf9fa..cba559251 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -16,6 +16,7 @@ mod signal_handle; mod tray; mod tray_i18n; mod utils; +pub mod whisper_worker; use specta_typescript::{BigIntExportBehavior, Typescript}; use tauri_specta::{collect_commands, Builder}; @@ -255,6 +256,7 @@ pub fn run() { shortcut::change_clipboard_handling_setting, shortcut::change_post_process_enabled_setting, shortcut::change_experimental_enabled_setting, + shortcut::change_whisper_compute_mode_setting, shortcut::change_post_process_base_url_setting, shortcut::change_post_process_api_key_setting, shortcut::change_post_process_model_setting, @@ -315,6 +317,7 @@ pub fn run() { commands::transcription::set_model_unload_timeout, commands::transcription::get_model_load_status, commands::transcription::unload_model_manually, + commands::transcription::retry_whisper_gpu, commands::history::get_history_entries, commands::history::toggle_history_entry_saved, commands::history::get_audio_file_path, diff --git a/src-tauri/src/main.rs b/src-tauri/src/main.rs index 9da82776d..844b9bdf3 100644 --- a/src-tauri/src/main.rs +++ b/src-tauri/src/main.rs @@ -2,6 +2,11 @@ #![cfg_attr(not(debug_assertions), windows_subsystem = "windows")] fn main() { + if std::env::args().any(|arg| arg == "--whisper-worker") { + let _ = handy_app_lib::whisper_worker::run_worker_process(); + return; + } + #[cfg(target_os = "linux")] { if std::path::Path::new("/dev/dri").exists() diff --git a/src-tauri/src/managers/transcription.rs b/src-tauri/src/managers/transcription.rs index 4287533dd..24fdc729d 100644 --- a/src-tauri/src/managers/transcription.rs +++ b/src-tauri/src/managers/transcription.rs @@ -1,6 +1,9 @@ use crate::audio_toolkit::{apply_custom_words, filter_transcription_output}; use crate::managers::model::{EngineType, ModelManager}; -use crate::settings::{get_settings, ModelUnloadTimeout}; +use crate::settings::{get_settings, ModelUnloadTimeout, WhisperComputeMode}; +use crate::whisper_worker::{ + WhisperRuntimeMode, WhisperWorkerClient, WhisperWorkerInferenceParams, +}; use anyhow::Result; use log::{debug, error, info, warn}; use serde::Serialize; @@ -15,7 +18,6 @@ use transcribe_rs::{ parakeet::{ ParakeetEngine, ParakeetInferenceParams, ParakeetModelParams, TimestampGranularity, }, - whisper::{WhisperEngine, WhisperInferenceParams}, }, TranscriptionEngine, }; @@ -28,8 +30,16 @@ pub struct ModelStateEvent { pub error: Option, } +#[derive(Clone, Debug, Serialize)] +pub struct WhisperComputeFallbackEvent { + pub from: String, + pub to: String, + pub reason: String, + pub can_retry_gpu: bool, +} + enum LoadedEngine { - Whisper(WhisperEngine), + Whisper(WhisperWorkerClient), Parakeet(ParakeetEngine), Moonshine(MoonshineEngine), } @@ -142,7 +152,10 @@ impl TranscriptionManager { let mut engine = self.engine.lock().unwrap(); if let Some(ref mut loaded_engine) = *engine { match loaded_engine { - LoadedEngine::Whisper(ref mut e) => e.unload_model(), + LoadedEngine::Whisper(ref mut e) => { + let _ = e.unload(); + e.terminate(); + } LoadedEngine::Parakeet(ref mut e) => e.unload_model(), LoadedEngine::Moonshine(ref mut e) => e.unload_model(), } @@ -225,21 +238,35 @@ impl TranscriptionManager { // Create appropriate engine based on model type let loaded_engine = match model_info.engine_type { EngineType::Whisper => { - let mut engine = WhisperEngine::new(); - engine.load_model(&model_path).map_err(|e| { - let error_msg = format!("Failed to load whisper model {}: {}", model_id, e); - let _ = self.app_handle.emit( - "model-state-changed", - ModelStateEvent { - event_type: "loading_failed".to_string(), - model_id: Some(model_id.to_string()), - model_name: Some(model_info.name.clone()), - error: Some(error_msg.clone()), - }, - ); - anyhow::anyhow!(error_msg) - })?; - LoadedEngine::Whisper(engine) + let settings = get_settings(&self.app_handle); + let preferred_mode = + whisper_runtime_mode_from_setting(settings.whisper_compute_mode); + let worker = WhisperWorkerClient::spawn_for_model(&model_path, preferred_mode) + .or_else(|first_err| { + if settings.whisper_compute_mode == WhisperComputeMode::Auto { + WhisperWorkerClient::spawn_for_model( + &model_path, + WhisperRuntimeMode::Cpu, + ) + .map_err(|cpu_err| anyhow::anyhow!("{}; {}", first_err, cpu_err)) + } else { + Err(first_err) + } + }) + .map_err(|e| { + let error_msg = format!("Failed to load whisper model {}: {}", model_id, e); + let _ = self.app_handle.emit( + "model-state-changed", + ModelStateEvent { + event_type: "loading_failed".to_string(), + model_id: Some(model_id.to_string()), + model_name: Some(model_info.name.clone()), + error: Some(error_msg.clone()), + }, + ); + anyhow::anyhow!(error_msg) + })?; + LoadedEngine::Whisper(worker) } EngineType::Parakeet => { let mut engine = ParakeetEngine::new(); @@ -341,6 +368,31 @@ impl TranscriptionManager { current_model.clone() } + pub fn retry_whisper_gpu(&self) -> Result<()> { + let model_id = self + .get_current_model() + .ok_or_else(|| anyhow::anyhow!("No active model"))?; + let model_info = self + .model_manager + .get_model_info(&model_id) + .ok_or_else(|| anyhow::anyhow!("Model not found: {}", model_id))?; + if !matches!(model_info.engine_type, EngineType::Whisper) { + return Err(anyhow::anyhow!("Current model is not Whisper")); + } + + let model_path = self.model_manager.get_model_path(&model_id)?; + let gpu_worker = + WhisperWorkerClient::spawn_for_model(&model_path, WhisperRuntimeMode::Gpu)?; + let mut engine_guard = self.engine.lock().unwrap(); + if let Some(LoadedEngine::Whisper(existing)) = engine_guard.as_mut() { + existing.terminate(); + *existing = gpu_worker; + Ok(()) + } else { + Err(anyhow::anyhow!("Whisper engine is not loaded")) + } + } + pub fn transcribe(&self, audio: Vec) -> Result { // Update last activity timestamp self.last_activity.store( @@ -389,8 +441,6 @@ impl TranscriptionManager { match engine { LoadedEngine::Whisper(whisper_engine) => { - // Normalize language code for Whisper - // Convert zh-Hans and zh-Hant to zh since Whisper uses ISO 639-1 codes let whisper_language = if settings.selected_language == "auto" { None } else { @@ -404,15 +454,70 @@ impl TranscriptionManager { Some(normalized) }; - let params = WhisperInferenceParams { + let params = WhisperWorkerInferenceParams { language: whisper_language, translate: settings.translate_to_english, - ..Default::default() }; - whisper_engine - .transcribe_samples(audio, Some(params)) - .map_err(|e| anyhow::anyhow!("Whisper transcription failed: {}", e))? + match whisper_engine.transcribe(audio.clone(), params.clone()) { + Ok(text) => transcribe_rs::TranscriptionResult { + text, + segments: None, + }, + Err(err) => { + let can_fallback = matches!( + settings.whisper_compute_mode, + WhisperComputeMode::Auto | WhisperComputeMode::Gpu + ) && whisper_engine.runtime_mode() + == WhisperRuntimeMode::Gpu; + + if can_fallback { + let current_model_id = self + .get_current_model() + .ok_or_else(|| anyhow::anyhow!("No active whisper model"))?; + let model_path = + self.model_manager.get_model_path(¤t_model_id)?; + whisper_engine.terminate(); + let mut cpu_worker = WhisperWorkerClient::spawn_for_model( + &model_path, + WhisperRuntimeMode::Cpu, + ) + .map_err(|e| { + anyhow::anyhow!( + "Whisper GPU failed and CPU fallback failed: {}; {}", + err, + e + ) + })?; + let retried_text = + cpu_worker.transcribe(audio, params).map_err(|e| { + anyhow::anyhow!( + "Whisper CPU fallback transcription failed: {}", + e + ) + })?; + *whisper_engine = cpu_worker; + let _ = self.app_handle.emit( + "whisper-compute-fallback", + WhisperComputeFallbackEvent { + from: "gpu".to_string(), + to: "cpu".to_string(), + reason: err.to_string(), + can_retry_gpu: true, + }, + ); + transcribe_rs::TranscriptionResult { + text: retried_text, + segments: None, + } + } else { + return Err(anyhow::anyhow!( + "Whisper transcription failed: {}", + err + )); + } + } + } } LoadedEngine::Parakeet(parakeet_engine) => { let params = ParakeetInferenceParams { @@ -486,3 +591,11 @@ impl Drop for TranscriptionManager { } } } + +fn whisper_runtime_mode_from_setting(mode: WhisperComputeMode) -> WhisperRuntimeMode { + match mode { + WhisperComputeMode::Auto => WhisperRuntimeMode::Gpu, + WhisperComputeMode::Gpu => WhisperRuntimeMode::Gpu, + WhisperComputeMode::Cpu => WhisperRuntimeMode::Cpu, + } +} diff --git a/src-tauri/src/settings.rs b/src-tauri/src/settings.rs index 44402bc16..8a4d17000 100644 --- a/src-tauri/src/settings.rs +++ b/src-tauri/src/settings.rs @@ -158,6 +158,14 @@ pub enum KeyboardImplementation { HandyKeys, } +#[derive(Serialize, Deserialize, Debug, Clone, Copy, PartialEq, Eq, Type)] +#[serde(rename_all = "snake_case")] +pub enum WhisperComputeMode { + Auto, + Gpu, + Cpu, +} + impl Default for KeyboardImplementation { fn default() -> Self { // Default to HandyKeys only on macOS where it's well-tested. @@ -169,6 +177,15 @@ impl Default for KeyboardImplementation { } } +impl Default for WhisperComputeMode { + fn default() -> Self { + #[cfg(target_os = "windows")] + return WhisperComputeMode::Auto; + #[cfg(not(target_os = "windows"))] + return WhisperComputeMode::Auto; + } +} + impl Default for ModelUnloadTimeout { fn default() -> Self { ModelUnloadTimeout::Never @@ -315,6 +332,8 @@ pub struct AppSettings { pub experimental_enabled: bool, #[serde(default)] pub keyboard_implementation: KeyboardImplementation, + #[serde(default)] + pub whisper_compute_mode: WhisperComputeMode, #[serde(default = "default_paste_delay_ms")] pub paste_delay_ms: u64, } @@ -631,6 +650,7 @@ pub fn get_default_settings() -> AppSettings { app_language: default_app_language(), experimental_enabled: false, keyboard_implementation: KeyboardImplementation::default(), + whisper_compute_mode: WhisperComputeMode::default(), paste_delay_ms: default_paste_delay_ms(), } } diff --git a/src-tauri/src/shortcut/mod.rs b/src-tauri/src/shortcut/mod.rs index 6ff5a04b7..fd2eba99b 100644 --- a/src-tauri/src/shortcut/mod.rs +++ b/src-tauri/src/shortcut/mod.rs @@ -21,8 +21,8 @@ use tauri_plugin_autostart::ManagerExt; use crate::settings::{ self, get_settings, ClipboardHandling, KeyboardImplementation, LLMPrompt, OverlayPosition, - PasteMethod, ShortcutBinding, SoundTheme, APPLE_INTELLIGENCE_DEFAULT_MODEL_ID, - APPLE_INTELLIGENCE_PROVIDER_ID, + PasteMethod, ShortcutBinding, SoundTheme, WhisperComputeMode, + APPLE_INTELLIGENCE_DEFAULT_MODEL_ID, APPLE_INTELLIGENCE_PROVIDER_ID, }; use crate::tray; @@ -730,6 +730,22 @@ pub fn change_experimental_enabled_setting(app: AppHandle, enabled: bool) -> Res Ok(()) } +#[tauri::command] +#[specta::specta] +pub fn change_whisper_compute_mode_setting(app: AppHandle, mode: String) -> Result<(), String> { + let mut settings = settings::get_settings(&app); + settings.whisper_compute_mode = match mode.as_str() { + "auto" => WhisperComputeMode::Auto, + "gpu" => WhisperComputeMode::Gpu, + "cpu" => WhisperComputeMode::Cpu, + other => { + return Err(format!("Invalid whisper compute mode '{}'", other)); + } + }; + settings::write_settings(&app, settings); + Ok(()) +} + #[tauri::command] #[specta::specta] pub fn change_post_process_base_url_setting( diff --git a/src-tauri/src/whisper_worker.rs b/src-tauri/src/whisper_worker.rs new file mode 100644 index 000000000..88da5ca41 --- /dev/null +++ b/src-tauri/src/whisper_worker.rs @@ -0,0 +1,255 @@ +use anyhow::{anyhow, Result}; +use serde::{Deserialize, Serialize}; +use std::io::{BufRead, BufReader, BufWriter, Write}; +use std::path::PathBuf; +use std::process::{Child, ChildStdin, ChildStdout, Command, Stdio}; +use transcribe_rs::engines::whisper::{WhisperEngine, WhisperInferenceParams, WhisperModelParams}; +use transcribe_rs::TranscriptionEngine; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum WhisperRuntimeMode { + Gpu, + Cpu, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct WhisperWorkerInferenceParams { + pub language: Option, + pub translate: bool, +} + +#[derive(Debug, Serialize, Deserialize)] +#[serde(tag = "type", rename_all = "snake_case")] +enum WhisperWorkerRequest { + LoadModel { + model_path: String, + use_gpu: bool, + }, + Transcribe { + audio: Vec, + params: WhisperWorkerInferenceParams, + }, + UnloadModel, +} + +#[derive(Debug, Serialize, Deserialize)] +struct WhisperWorkerResponse { + ok: bool, + text: Option, + error: Option, +} + +pub struct WhisperWorkerClient { + child: Child, + stdin: BufWriter, + stdout: BufReader, + runtime_mode: WhisperRuntimeMode, +} + +impl WhisperWorkerClient { + pub fn spawn_for_model(model_path: &PathBuf, runtime_mode: WhisperRuntimeMode) -> Result { + let current_exe = std::env::current_exe()?; + let mut child = Command::new(current_exe) + .arg("--whisper-worker") + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::null()) + .spawn()?; + + let child_stdin = child + .stdin + .take() + .ok_or_else(|| anyhow!("whisper worker stdin is unavailable"))?; + let child_stdout = child + .stdout + .take() + .ok_or_else(|| anyhow!("whisper worker stdout is unavailable"))?; + + let mut client = Self { + child, + stdin: BufWriter::new(child_stdin), + stdout: BufReader::new(child_stdout), + runtime_mode, + }; + + client.send_request(WhisperWorkerRequest::LoadModel { + model_path: model_path.to_string_lossy().to_string(), + use_gpu: runtime_mode == WhisperRuntimeMode::Gpu, + })?; + + Ok(client) + } + + pub fn runtime_mode(&self) -> WhisperRuntimeMode { + self.runtime_mode + } + + pub fn is_alive(&mut self) -> bool { + self.child.try_wait().ok().flatten().is_none() + } + + pub fn transcribe( + &mut self, + audio: Vec, + params: WhisperWorkerInferenceParams, + ) -> Result { + let response = self.send_request(WhisperWorkerRequest::Transcribe { audio, params })?; + response + .text + .ok_or_else(|| anyhow!("whisper worker returned empty result")) + } + + pub fn unload(&mut self) -> Result<()> { + let _ = self.send_request(WhisperWorkerRequest::UnloadModel)?; + Ok(()) + } + + pub fn terminate(&mut self) { + let _ = self.child.kill(); + let _ = self.child.wait(); + } + + fn send_request(&mut self, request: WhisperWorkerRequest) -> Result { + let payload = serde_json::to_string(&request)?; + self.stdin.write_all(payload.as_bytes())?; + self.stdin.write_all(b"\n")?; + self.stdin.flush()?; + + let mut response_line = String::new(); + let read = self.stdout.read_line(&mut response_line)?; + if read == 0 { + return Err(anyhow!("whisper worker process exited")); + } + + let response: WhisperWorkerResponse = serde_json::from_str(response_line.trim())?; + if response.ok { + Ok(response) + } else { + Err(anyhow!( + "{}", + response + .error + .unwrap_or_else(|| "whisper worker request failed".to_string()) + )) + } + } +} + +impl Drop for WhisperWorkerClient { + fn drop(&mut self) { + self.terminate(); + } +} + +pub fn run_worker_process() -> Result<()> { + let stdin = std::io::stdin(); + let stdout = std::io::stdout(); + let mut reader = BufReader::new(stdin.lock()); + let mut writer = BufWriter::new(stdout.lock()); + let mut engine: Option = None; + + loop { + let mut line = String::new(); + let bytes = reader.read_line(&mut line)?; + if bytes == 0 { + break; + } + + let request: WhisperWorkerRequest = match serde_json::from_str(line.trim()) { + Ok(v) => v, + Err(e) => { + write_response( + &mut writer, + WhisperWorkerResponse { + ok: false, + text: None, + error: Some(format!("invalid request: {}", e)), + }, + )?; + continue; + } + }; + + let response = match request { + WhisperWorkerRequest::LoadModel { + model_path, + use_gpu, + } => { + let mut whisper = WhisperEngine::new(); + match whisper.load_model_with_params( + PathBuf::from(model_path).as_path(), + WhisperModelParams { use_gpu }, + ) { + Ok(_) => { + engine = Some(whisper); + WhisperWorkerResponse { + ok: true, + text: None, + error: None, + } + } + Err(e) => WhisperWorkerResponse { + ok: false, + text: None, + error: Some(format!("failed to load whisper model: {}", e)), + }, + } + } + WhisperWorkerRequest::Transcribe { audio, params } => { + match engine.as_mut() { + Some(engine_ref) => { + let inference_params = WhisperInferenceParams { + language: params.language, + translate: params.translate, + ..Default::default() + }; + + match engine_ref.transcribe_samples(audio, Some(inference_params)) { + Ok(result) => WhisperWorkerResponse { + ok: true, + text: Some(result.text), + error: None, + }, + Err(e) => WhisperWorkerResponse { + ok: false, + text: None, + error: Some(format!("whisper transcription failed: {}", e)), + }, + } + } + None => WhisperWorkerResponse { + ok: false, + text: None, + error: Some("model is not loaded".to_string()), + }, + } + } + WhisperWorkerRequest::UnloadModel => { + if let Some(engine_ref) = engine.as_mut() { + engine_ref.unload_model(); + } + engine = None; + WhisperWorkerResponse { + ok: true, + text: None, + error: None, + } + } + }; + + write_response(&mut writer, response)?; + } + + Ok(()) +} + +fn write_response( + writer: &mut BufWriter>, + response: WhisperWorkerResponse, +) -> Result<()> { + let payload = serde_json::to_string(&response)?; + writer.write_all(payload.as_bytes())?; + writer.write_all(b"\n")?; + writer.flush()?; + Ok(()) +} diff --git a/src/App.tsx b/src/App.tsx index 454ccd2d8..2c0421611 100644 --- a/src/App.tsx +++ b/src/App.tsx @@ -1,6 +1,8 @@ import { useEffect, useState, useRef } from "react"; import { Toaster } from "sonner"; +import { toast } from "sonner"; import { useTranslation } from "react-i18next"; +import { listen } from "@tauri-apps/api/event"; import { platform } from "@tauri-apps/plugin-os"; import { checkAccessibilityPermission, @@ -25,7 +27,7 @@ const renderSettingsContent = (section: SidebarSection) => { }; function App() { - const { i18n } = useTranslation(); + const { t, i18n } = useTranslation(); const [onboardingStep, setOnboardingStep] = useState( null, ); @@ -48,6 +50,37 @@ function App() { checkOnboardingStatus(); }, []); + useEffect(() => { + let unlisten: (() => void) | null = null; + listen<{ + from: string; + to: string; + reason: string; + can_retry_gpu: boolean; + }>("whisper-compute-fallback", () => { + toast.warning(t("settings.advanced.whisperCompute.fallbackWarning"), { + action: { + label: t("settings.advanced.whisperCompute.returnGpu"), + onClick: async () => { + const result = await commands.retryWhisperGpu(); + if (result.status === "ok") { + await updateSetting("whisper_compute_mode", "gpu"); + } else { + toast.error(t("settings.advanced.whisperCompute.returnGpuFailed")); + } + }, + }, + }); + }).then((fn) => { + unlisten = fn; + }); + return () => { + if (unlisten) { + unlisten(); + } + }; + }, [t, updateSetting]); + // Initialize RTL direction when language changes useEffect(() => { initializeRTL(i18n.language); diff --git a/src/bindings.ts b/src/bindings.ts index ff3846d32..3db0f8f91 100644 --- a/src/bindings.ts +++ b/src/bindings.ts @@ -141,6 +141,14 @@ async changeExperimentalEnabledSetting(enabled: boolean) : Promise> { + try { + return { status: "ok", data: await TAURI_INVOKE("change_whisper_compute_mode_setting", { mode }) }; +} catch (e) { + if(e instanceof Error) throw e; + else return { status: "error", error: e as any }; +} +}, async changePostProcessBaseUrlSetting(providerId: string, baseUrl: string) : Promise> { try { return { status: "ok", data: await TAURI_INVOKE("change_post_process_base_url_setting", { providerId, baseUrl }) }; @@ -621,6 +629,14 @@ async unloadModelManually() : Promise> { else return { status: "error", error: e as any }; } }, +async retryWhisperGpu() : Promise> { + try { + return { status: "ok", data: await TAURI_INVOKE("retry_whisper_gpu") }; +} catch (e) { + if(e instanceof Error) throw e; + else return { status: "error", error: e as any }; +} +}, async getHistoryEntries() : Promise> { try { return { status: "ok", data: await TAURI_INVOKE("get_history_entries") }; @@ -670,10 +686,8 @@ async updateRecordingRetentionPeriod(period: string) : Promise> { try { @@ -695,7 +709,7 @@ async isLaptop() : Promise> { /** user-defined types **/ -export type AppSettings = { bindings: Partial<{ [key in string]: ShortcutBinding }>; push_to_talk: boolean; audio_feedback: boolean; audio_feedback_volume?: number; sound_theme?: SoundTheme; start_hidden?: boolean; autostart_enabled?: boolean; update_checks_enabled?: boolean; selected_model?: string; always_on_microphone?: boolean; selected_microphone?: string | null; clamshell_microphone?: string | null; selected_output_device?: string | null; translate_to_english?: boolean; selected_language?: string; overlay_position?: OverlayPosition; debug_mode?: boolean; log_level?: LogLevel; custom_words?: string[]; model_unload_timeout?: ModelUnloadTimeout; word_correction_threshold?: number; history_limit?: number; recording_retention_period?: RecordingRetentionPeriod; paste_method?: PasteMethod; clipboard_handling?: ClipboardHandling; post_process_enabled?: boolean; post_process_provider_id?: string; post_process_providers?: PostProcessProvider[]; post_process_api_keys?: Partial<{ [key in string]: string }>; post_process_models?: Partial<{ [key in string]: string }>; post_process_prompts?: LLMPrompt[]; post_process_selected_prompt_id?: string | null; mute_while_recording?: boolean; append_trailing_space?: boolean; app_language?: string; experimental_enabled?: boolean; keyboard_implementation?: KeyboardImplementation; paste_delay_ms?: number } +export type AppSettings = { bindings: Partial<{ [key in string]: ShortcutBinding }>; push_to_talk: boolean; audio_feedback: boolean; audio_feedback_volume?: number; sound_theme?: SoundTheme; start_hidden?: boolean; autostart_enabled?: boolean; update_checks_enabled?: boolean; selected_model?: string; always_on_microphone?: boolean; selected_microphone?: string | null; clamshell_microphone?: string | null; selected_output_device?: string | null; translate_to_english?: boolean; selected_language?: string; overlay_position?: OverlayPosition; debug_mode?: boolean; log_level?: LogLevel; custom_words?: string[]; model_unload_timeout?: ModelUnloadTimeout; word_correction_threshold?: number; history_limit?: number; recording_retention_period?: RecordingRetentionPeriod; paste_method?: PasteMethod; clipboard_handling?: ClipboardHandling; post_process_enabled?: boolean; post_process_provider_id?: string; post_process_providers?: PostProcessProvider[]; post_process_api_keys?: Partial<{ [key in string]: string }>; post_process_models?: Partial<{ [key in string]: string }>; post_process_prompts?: LLMPrompt[]; post_process_selected_prompt_id?: string | null; mute_while_recording?: boolean; append_trailing_space?: boolean; app_language?: string; experimental_enabled?: boolean; keyboard_implementation?: KeyboardImplementation; whisper_compute_mode?: WhisperComputeMode; paste_delay_ms?: number } export type AudioDevice = { index: string; name: string; is_default: boolean } export type BindingResponse = { success: boolean; binding: ShortcutBinding | null; error: string | null } export type ClipboardHandling = "dont_modify" | "copy_to_clipboard" @@ -722,6 +736,7 @@ export type PostProcessProvider = { id: string; label: string; base_url: string; export type RecordingRetentionPeriod = "never" | "preserve_limit" | "days_3" | "weeks_2" | "months_3" export type ShortcutBinding = { id: string; name: string; description: string; default_binding: string; current_binding: string } export type SoundTheme = "marimba" | "pop" | "custom" +export type WhisperComputeMode = "auto" | "gpu" | "cpu" /** tauri-specta globals **/ diff --git a/src/components/settings/WhisperComputeMode.tsx b/src/components/settings/WhisperComputeMode.tsx new file mode 100644 index 000000000..30b0a294d --- /dev/null +++ b/src/components/settings/WhisperComputeMode.tsx @@ -0,0 +1,54 @@ +import React from "react"; +import { useTranslation } from "react-i18next"; +import { Dropdown } from "../ui/Dropdown"; +import { SettingContainer } from "../ui/SettingContainer"; +import { useSettings } from "../../hooks/useSettings"; +import type { WhisperComputeMode } from "@/bindings"; + +interface WhisperComputeModeSettingProps { + descriptionMode?: "tooltip" | "inline"; + grouped?: boolean; +} + +export const WhisperComputeModeSetting: React.FC< + WhisperComputeModeSettingProps +> = ({ descriptionMode = "inline", grouped = false }) => { + const { t } = useTranslation(); + const { getSetting, updateSetting } = useSettings(); + + const currentValue = (getSetting("whisper_compute_mode") ?? + "auto") as WhisperComputeMode; + + const options = [ + { + value: "auto" as WhisperComputeMode, + label: t("settings.advanced.whisperCompute.options.auto"), + }, + { + value: "gpu" as WhisperComputeMode, + label: t("settings.advanced.whisperCompute.options.gpu"), + }, + { + value: "cpu" as WhisperComputeMode, + label: t("settings.advanced.whisperCompute.options.cpu"), + }, + ]; + + return ( + + + updateSetting("whisper_compute_mode", value as WhisperComputeMode) + } + disabled={false} + /> + + ); +}; diff --git a/src/components/settings/advanced/AdvancedSettings.tsx b/src/components/settings/advanced/AdvancedSettings.tsx index fa220ea67..abd9d4387 100644 --- a/src/components/settings/advanced/AdvancedSettings.tsx +++ b/src/components/settings/advanced/AdvancedSettings.tsx @@ -13,6 +13,7 @@ import { AppendTrailingSpace } from "../AppendTrailingSpace"; import { HistoryLimit } from "../HistoryLimit"; import { RecordingRetentionPeriodSelector } from "../RecordingRetentionPeriod"; import { ExperimentalToggle } from "../ExperimentalToggle"; +import { WhisperComputeModeSetting } from "../WhisperComputeMode"; import { useSettings } from "../../../hooks/useSettings"; import { KeyboardImplementationSelector } from "../debug/KeyboardImplementationSelector"; @@ -37,6 +38,7 @@ export const AdvancedSettings: React.FC = () => { + diff --git a/src/i18n/locales/ar/translation.json b/src/i18n/locales/ar/translation.json index 92ab7ccab..0486d982b 100644 --- a/src/i18n/locales/ar/translation.json +++ b/src/i18n/locales/ar/translation.json @@ -248,6 +248,18 @@ "placeholder": "أضف كلمة", "add": "إضافة", "remove": "إزالة {{word}}" + }, + "whisperCompute": { + "title": "Whisper Compute Mode", + "description": "Choose how Whisper runs. Auto prefers GPU and falls back to CPU if GPU fails.", + "options": { + "auto": "Auto (GPU to CPU fallback)", + "gpu": "GPU", + "cpu": "CPU" + }, + "fallbackWarning": "Whisper GPU failed. Temporarily switched to CPU.", + "returnGpu": "Return to GPU", + "returnGpuFailed": "Failed to switch Whisper back to GPU." } }, "postProcessing": { diff --git a/src/i18n/locales/cs/translation.json b/src/i18n/locales/cs/translation.json index 239974fa3..8ade6cd33 100644 --- a/src/i18n/locales/cs/translation.json +++ b/src/i18n/locales/cs/translation.json @@ -270,6 +270,18 @@ "placeholder": "Přidat slovo", "add": "Přidat", "remove": "Odebrat {{word}}" + }, + "whisperCompute": { + "title": "Whisper Compute Mode", + "description": "Choose how Whisper runs. Auto prefers GPU and falls back to CPU if GPU fails.", + "options": { + "auto": "Auto (GPU to CPU fallback)", + "gpu": "GPU", + "cpu": "CPU" + }, + "fallbackWarning": "Whisper GPU failed. Temporarily switched to CPU.", + "returnGpu": "Return to GPU", + "returnGpuFailed": "Failed to switch Whisper back to GPU." } }, "postProcessing": { diff --git a/src/i18n/locales/de/translation.json b/src/i18n/locales/de/translation.json index b51d98287..590d9bb2d 100644 --- a/src/i18n/locales/de/translation.json +++ b/src/i18n/locales/de/translation.json @@ -270,6 +270,18 @@ "placeholder": "Wort hinzufügen", "add": "Hinzufügen", "remove": "{{word}} entfernen" + }, + "whisperCompute": { + "title": "Whisper Compute Mode", + "description": "Choose how Whisper runs. Auto prefers GPU and falls back to CPU if GPU fails.", + "options": { + "auto": "Auto (GPU to CPU fallback)", + "gpu": "GPU", + "cpu": "CPU" + }, + "fallbackWarning": "Whisper GPU failed. Temporarily switched to CPU.", + "returnGpu": "Return to GPU", + "returnGpuFailed": "Failed to switch Whisper back to GPU." } }, "postProcessing": { diff --git a/src/i18n/locales/en/translation.json b/src/i18n/locales/en/translation.json index 1a1a10c51..d32d11cb5 100644 --- a/src/i18n/locales/en/translation.json +++ b/src/i18n/locales/en/translation.json @@ -270,6 +270,18 @@ "placeholder": "Add a word", "add": "Add", "remove": "Remove {{word}}" + }, + "whisperCompute": { + "title": "Whisper Compute Mode", + "description": "Choose how Whisper runs. Auto prefers GPU and falls back to CPU if GPU fails.", + "options": { + "auto": "Auto (GPU to CPU fallback)", + "gpu": "GPU", + "cpu": "CPU" + }, + "fallbackWarning": "Whisper GPU failed. Temporarily switched to CPU.", + "returnGpu": "Return to GPU", + "returnGpuFailed": "Failed to switch Whisper back to GPU." } }, "postProcessing": { diff --git a/src/i18n/locales/es/translation.json b/src/i18n/locales/es/translation.json index 353fe6373..69afd1dc1 100644 --- a/src/i18n/locales/es/translation.json +++ b/src/i18n/locales/es/translation.json @@ -270,6 +270,18 @@ "placeholder": "Agregar una palabra", "add": "Agregar", "remove": "Eliminar {{word}}" + }, + "whisperCompute": { + "title": "Whisper Compute Mode", + "description": "Choose how Whisper runs. Auto prefers GPU and falls back to CPU if GPU fails.", + "options": { + "auto": "Auto (GPU to CPU fallback)", + "gpu": "GPU", + "cpu": "CPU" + }, + "fallbackWarning": "Whisper GPU failed. Temporarily switched to CPU.", + "returnGpu": "Return to GPU", + "returnGpuFailed": "Failed to switch Whisper back to GPU." } }, "postProcessing": { diff --git a/src/i18n/locales/fr/translation.json b/src/i18n/locales/fr/translation.json index c21a05beb..69e5de70e 100644 --- a/src/i18n/locales/fr/translation.json +++ b/src/i18n/locales/fr/translation.json @@ -270,6 +270,18 @@ "placeholder": "Ajouter un mot", "add": "Ajouter", "remove": "Supprimer {{word}}" + }, + "whisperCompute": { + "title": "Whisper Compute Mode", + "description": "Choose how Whisper runs. Auto prefers GPU and falls back to CPU if GPU fails.", + "options": { + "auto": "Auto (GPU to CPU fallback)", + "gpu": "GPU", + "cpu": "CPU" + }, + "fallbackWarning": "Whisper GPU failed. Temporarily switched to CPU.", + "returnGpu": "Return to GPU", + "returnGpuFailed": "Failed to switch Whisper back to GPU." } }, "postProcessing": { diff --git a/src/i18n/locales/it/translation.json b/src/i18n/locales/it/translation.json index c7885a506..6f7d1a206 100644 --- a/src/i18n/locales/it/translation.json +++ b/src/i18n/locales/it/translation.json @@ -270,6 +270,18 @@ "placeholder": "Aggiungi una parola", "add": "Aggiungi", "remove": "Rimuovi {{word}}" + }, + "whisperCompute": { + "title": "Whisper Compute Mode", + "description": "Choose how Whisper runs. Auto prefers GPU and falls back to CPU if GPU fails.", + "options": { + "auto": "Auto (GPU to CPU fallback)", + "gpu": "GPU", + "cpu": "CPU" + }, + "fallbackWarning": "Whisper GPU failed. Temporarily switched to CPU.", + "returnGpu": "Return to GPU", + "returnGpuFailed": "Failed to switch Whisper back to GPU." } }, "postProcessing": { diff --git a/src/i18n/locales/ja/translation.json b/src/i18n/locales/ja/translation.json index 592115901..77ec9bb42 100644 --- a/src/i18n/locales/ja/translation.json +++ b/src/i18n/locales/ja/translation.json @@ -270,6 +270,18 @@ "placeholder": "単語を追加", "add": "追加", "remove": "{{word}}を削除" + }, + "whisperCompute": { + "title": "Whisper Compute Mode", + "description": "Choose how Whisper runs. Auto prefers GPU and falls back to CPU if GPU fails.", + "options": { + "auto": "Auto (GPU to CPU fallback)", + "gpu": "GPU", + "cpu": "CPU" + }, + "fallbackWarning": "Whisper GPU failed. Temporarily switched to CPU.", + "returnGpu": "Return to GPU", + "returnGpuFailed": "Failed to switch Whisper back to GPU." } }, "postProcessing": { diff --git a/src/i18n/locales/ko/translation.json b/src/i18n/locales/ko/translation.json index 73397c834..9e7e51ad0 100644 --- a/src/i18n/locales/ko/translation.json +++ b/src/i18n/locales/ko/translation.json @@ -270,6 +270,18 @@ "placeholder": "단어 추가", "add": "추가", "remove": "{{word}} 제거" + }, + "whisperCompute": { + "title": "Whisper Compute Mode", + "description": "Choose how Whisper runs. Auto prefers GPU and falls back to CPU if GPU fails.", + "options": { + "auto": "Auto (GPU to CPU fallback)", + "gpu": "GPU", + "cpu": "CPU" + }, + "fallbackWarning": "Whisper GPU failed. Temporarily switched to CPU.", + "returnGpu": "Return to GPU", + "returnGpuFailed": "Failed to switch Whisper back to GPU." } }, "postProcessing": { diff --git a/src/i18n/locales/pl/translation.json b/src/i18n/locales/pl/translation.json index 9c5405209..f5dfcef78 100644 --- a/src/i18n/locales/pl/translation.json +++ b/src/i18n/locales/pl/translation.json @@ -270,6 +270,18 @@ "placeholder": "Dodaj słowo", "add": "Dodaj", "remove": "Usuń {{word}}" + }, + "whisperCompute": { + "title": "Whisper Compute Mode", + "description": "Choose how Whisper runs. Auto prefers GPU and falls back to CPU if GPU fails.", + "options": { + "auto": "Auto (GPU to CPU fallback)", + "gpu": "GPU", + "cpu": "CPU" + }, + "fallbackWarning": "Whisper GPU failed. Temporarily switched to CPU.", + "returnGpu": "Return to GPU", + "returnGpuFailed": "Failed to switch Whisper back to GPU." } }, "postProcessing": { diff --git a/src/i18n/locales/pt/translation.json b/src/i18n/locales/pt/translation.json index 950f6ab93..6c91af781 100644 --- a/src/i18n/locales/pt/translation.json +++ b/src/i18n/locales/pt/translation.json @@ -270,6 +270,18 @@ "placeholder": "Adicionar uma palavra", "add": "Adicionar", "remove": "Remover {{word}}" + }, + "whisperCompute": { + "title": "Whisper Compute Mode", + "description": "Choose how Whisper runs. Auto prefers GPU and falls back to CPU if GPU fails.", + "options": { + "auto": "Auto (GPU to CPU fallback)", + "gpu": "GPU", + "cpu": "CPU" + }, + "fallbackWarning": "Whisper GPU failed. Temporarily switched to CPU.", + "returnGpu": "Return to GPU", + "returnGpuFailed": "Failed to switch Whisper back to GPU." } }, "postProcessing": { diff --git a/src/i18n/locales/ru/translation.json b/src/i18n/locales/ru/translation.json index 9132e9bd9..85d786a88 100644 --- a/src/i18n/locales/ru/translation.json +++ b/src/i18n/locales/ru/translation.json @@ -270,6 +270,18 @@ "placeholder": "Добавить слово", "add": "Добавлять", "remove": "Удалить {{word}}" + }, + "whisperCompute": { + "title": "Whisper Compute Mode", + "description": "Choose how Whisper runs. Auto prefers GPU and falls back to CPU if GPU fails.", + "options": { + "auto": "Auto (GPU to CPU fallback)", + "gpu": "GPU", + "cpu": "CPU" + }, + "fallbackWarning": "Whisper GPU failed. Temporarily switched to CPU.", + "returnGpu": "Return to GPU", + "returnGpuFailed": "Failed to switch Whisper back to GPU." } }, "postProcessing": { diff --git a/src/i18n/locales/tr/translation.json b/src/i18n/locales/tr/translation.json index d0d587cfc..f7e9f9c7c 100644 --- a/src/i18n/locales/tr/translation.json +++ b/src/i18n/locales/tr/translation.json @@ -270,6 +270,18 @@ "placeholder": "Kelime ekle", "add": "Ekle", "remove": "{{word}} Kaldır" + }, + "whisperCompute": { + "title": "Whisper Compute Mode", + "description": "Choose how Whisper runs. Auto prefers GPU and falls back to CPU if GPU fails.", + "options": { + "auto": "Auto (GPU to CPU fallback)", + "gpu": "GPU", + "cpu": "CPU" + }, + "fallbackWarning": "Whisper GPU failed. Temporarily switched to CPU.", + "returnGpu": "Return to GPU", + "returnGpuFailed": "Failed to switch Whisper back to GPU." } }, "postProcessing": { diff --git a/src/i18n/locales/uk/translation.json b/src/i18n/locales/uk/translation.json index 0af4137a5..5fc132844 100644 --- a/src/i18n/locales/uk/translation.json +++ b/src/i18n/locales/uk/translation.json @@ -270,6 +270,18 @@ "placeholder": "Додати слово", "add": "Додати", "remove": "Видалити {{word}}" + }, + "whisperCompute": { + "title": "Whisper Compute Mode", + "description": "Choose how Whisper runs. Auto prefers GPU and falls back to CPU if GPU fails.", + "options": { + "auto": "Auto (GPU to CPU fallback)", + "gpu": "GPU", + "cpu": "CPU" + }, + "fallbackWarning": "Whisper GPU failed. Temporarily switched to CPU.", + "returnGpu": "Return to GPU", + "returnGpuFailed": "Failed to switch Whisper back to GPU." } }, "postProcessing": { diff --git a/src/i18n/locales/vi/translation.json b/src/i18n/locales/vi/translation.json index 81f077755..19cc29d8c 100644 --- a/src/i18n/locales/vi/translation.json +++ b/src/i18n/locales/vi/translation.json @@ -270,6 +270,18 @@ "placeholder": "Thêm một từ", "add": "Thêm", "remove": "Xóa {{word}}" + }, + "whisperCompute": { + "title": "Whisper Compute Mode", + "description": "Choose how Whisper runs. Auto prefers GPU and falls back to CPU if GPU fails.", + "options": { + "auto": "Auto (GPU to CPU fallback)", + "gpu": "GPU", + "cpu": "CPU" + }, + "fallbackWarning": "Whisper GPU failed. Temporarily switched to CPU.", + "returnGpu": "Return to GPU", + "returnGpuFailed": "Failed to switch Whisper back to GPU." } }, "postProcessing": { diff --git a/src/i18n/locales/zh/translation.json b/src/i18n/locales/zh/translation.json index 5c16a4ad9..a4e130aa9 100644 --- a/src/i18n/locales/zh/translation.json +++ b/src/i18n/locales/zh/translation.json @@ -270,6 +270,18 @@ "placeholder": "添加词汇", "add": "添加", "remove": "删除 {{word}}" + }, + "whisperCompute": { + "title": "Whisper Compute Mode", + "description": "Choose how Whisper runs. Auto prefers GPU and falls back to CPU if GPU fails.", + "options": { + "auto": "Auto (GPU to CPU fallback)", + "gpu": "GPU", + "cpu": "CPU" + }, + "fallbackWarning": "Whisper GPU failed. Temporarily switched to CPU.", + "returnGpu": "Return to GPU", + "returnGpuFailed": "Failed to switch Whisper back to GPU." } }, "postProcessing": { diff --git a/src/stores/settingsStore.ts b/src/stores/settingsStore.ts index 620ab7053..0c66d9cad 100644 --- a/src/stores/settingsStore.ts +++ b/src/stores/settingsStore.ts @@ -127,6 +127,8 @@ const settingUpdaters: { app_language: (value) => commands.changeAppLanguageSetting(value as string), experimental_enabled: (value) => commands.changeExperimentalEnabledSetting(value as boolean), + whisper_compute_mode: (value) => + commands.changeWhisperComputeModeSetting(value as string), }; export const useSettingsStore = create()(