diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 00000000..b58b603f
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,5 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
diff --git a/.idea/clicky.iml b/.idea/clicky.iml
new file mode 100644
index 00000000..24643cc3
--- /dev/null
+++ b/.idea/clicky.iml
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 00000000..7d1a270d
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 00000000..35eb1ddf
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/AGENTS.md b/AGENTS.md
index 6946d441..cb2b7e60 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -19,6 +19,7 @@ All API keys live on a Cloudflare Worker proxy — nothing sensitive ships in th
- **Text-to-Speech**: ElevenLabs (`eleven_flash_v2_5` model) via Cloudflare Worker proxy
- **Screen Capture**: ScreenCaptureKit (macOS 14.2+), multi-monitor support
- **Voice Input**: Push-to-talk via `AVAudioEngine` + pluggable transcription-provider layer. System-wide keyboard shortcut via listen-only CGEvent tap.
+- **Text Input**: Customizable global hotkey (`⌥⌘K` by default) opens a compact typed-command popup near the cursor. Typed commands reuse the same screenshot → Claude → TTS/pointing pipeline as voice.
- **Element Pointing**: Claude embeds `[POINT:x,y:label:screenN]` tags in responses. The overlay parses these, maps coordinates to the correct monitor, and animates the blue cursor along a bezier arc to the target.
- **Concurrency**: `@MainActor` isolation, async/await throughout
- **Analytics**: PostHog via `ClickyAnalytics.swift`
@@ -44,6 +45,8 @@ Worker vars: `ELEVENLABS_VOICE_ID`
**Global Push-To-Talk Shortcut**: Background push-to-talk uses a listen-only `CGEvent` tap instead of an AppKit global monitor so modifier-based shortcuts like `ctrl + option` are detected more reliably while the app is running in the background.
+**Global Text Input Shortcut**: Background typed input uses a sibling listen-only `CGEvent` tap with a persisted shortcut model. The popup is a lightweight `NSPanel` near the cursor, and submitted text feeds the same core response pipeline as voice transcripts.
+
**Shared URLSession for AssemblyAI**: A single long-lived `URLSession` is shared across all AssemblyAI streaming sessions (owned by the provider, not the session). Creating and invalidating a URLSession per session corrupts the OS connection pool and causes "Socket is not connected" errors after a few rapid reconnections.
**Transient Cursor Mode**: When "Show Clicky" is off, pressing the hotkey fades in the cursor overlay for the duration of the interaction (recording → response → TTS → optional pointing), then fades it out automatically after 1 second of inactivity.
@@ -53,9 +56,9 @@ Worker vars: `ELEVENLABS_VOICE_ID`
| File | Lines | Purpose |
|------|-------|---------|
| `leanring_buddyApp.swift` | ~89 | Menu bar app entry point. Uses `@NSApplicationDelegateAdaptor` with `CompanionAppDelegate` which creates `MenuBarPanelManager` and starts `CompanionManager`. No main window — the app lives entirely in the status bar. |
-| `CompanionManager.swift` | ~1026 | Central state machine. Owns dictation, shortcut monitoring, screen capture, Claude API, ElevenLabs TTS, and overlay management. Tracks voice state (idle/listening/processing/responding), conversation history, model selection, and cursor visibility. Coordinates the full push-to-talk → screenshot → Claude → TTS → pointing pipeline. |
+| `CompanionManager.swift` | ~1111 | Central state machine. Owns dictation, voice/text shortcut monitoring, text popup orchestration, screen capture, Claude API, ElevenLabs TTS, and overlay management. Tracks voice state (idle/listening/processing/responding), conversation history, model selection, and cursor visibility. Coordinates voice and typed command → screenshot → Claude → TTS → pointing pipeline. |
| `MenuBarPanelManager.swift` | ~243 | NSStatusItem + custom NSPanel lifecycle. Creates the menu bar icon, manages the floating companion panel (show/hide/position), installs click-outside-to-dismiss monitor. |
-| `CompanionPanelView.swift` | ~761 | SwiftUI panel content for the menu bar dropdown. Shows companion status, push-to-talk instructions, model picker (Sonnet/Opus), permissions UI, DM feedback button, and quit button. Dark aesthetic using `DS` design system. |
+| `CompanionPanelView.swift` | ~876 | SwiftUI panel content for the menu bar dropdown. Shows companion status, push-to-talk instructions, model picker (Sonnet/Opus), text hotkey settings, permissions UI, DM feedback button, and quit button. Dark aesthetic using `DS` design system. |
| `OverlayWindow.swift` | ~881 | Full-screen transparent overlay hosting the blue cursor, response text, waveform, and spinner. Handles cursor animation, element pointing with bezier arcs, multi-monitor coordinate mapping, and fade-out transitions. |
| `CompanionResponseOverlay.swift` | ~217 | SwiftUI view for the response text bubble and waveform displayed next to the cursor in the overlay. |
| `CompanionScreenCaptureUtility.swift` | ~132 | Multi-monitor screenshot capture using ScreenCaptureKit. Returns labeled image data for each connected display. |
@@ -66,6 +69,9 @@ Worker vars: `ELEVENLABS_VOICE_ID`
| `AppleSpeechTranscriptionProvider.swift` | ~147 | Local fallback transcription provider backed by Apple's Speech framework. |
| `BuddyAudioConversionSupport.swift` | ~108 | Audio conversion helpers. Converts live mic buffers to PCM16 mono audio and builds WAV payloads for upload-based providers. |
| `GlobalPushToTalkShortcutMonitor.swift` | ~132 | System-wide push-to-talk monitor. Owns the listen-only `CGEvent` tap and publishes press/release transitions. |
+| `ClickyKeyboardShortcut.swift` | ~178 | Persisted keyboard shortcut model for typed command hotkeys, including display text and validation. |
+| `GlobalTextInputShortcutMonitor.swift` | ~134 | System-wide typed command shortcut monitor. Owns a listen-only `CGEvent` tap and publishes trigger events for the popup. |
+| `TextCommandPopupManager.swift` | ~342 | Compact typed-command `NSPanel` manager and SwiftUI popup view. Tracks near the cursor, focuses input, submits on Enter, and closes on Esc/outside click. |
| `ClaudeAPI.swift` | ~291 | Claude vision API client with streaming (SSE) and non-streaming modes. TLS warmup optimization, image MIME detection, conversation history support. |
| `OpenAIAPI.swift` | ~142 | OpenAI GPT vision API client. |
| `ElevenLabsTTSClient.swift` | ~81 | ElevenLabs TTS client. Sends text to the Worker proxy, plays back audio via `AVAudioPlayer`. Exposes `isPlaying` for transient cursor scheduling. |
diff --git a/leanring-buddy.xcodeproj/project.pbxproj b/leanring-buddy.xcodeproj/project.pbxproj
index 75e57261..3139a9d6 100644
--- a/leanring-buddy.xcodeproj/project.pbxproj
+++ b/leanring-buddy.xcodeproj/project.pbxproj
@@ -34,9 +34,22 @@
28F22CD62F56440300A0FC59 /* leanring-buddyUITests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = "leanring-buddyUITests.xctest"; sourceTree = BUILT_PRODUCTS_DIR; };
/* End PBXFileReference section */
+/* Begin PBXFileSystemSynchronizedBuildFileExceptionSet section */
+ AA00BB072F6500070039DA55 /* Exceptions for "leanring-buddy" folder in "leanring-buddy" target */ = {
+ isa = PBXFileSystemSynchronizedBuildFileExceptionSet;
+ membershipExceptions = (
+ Info.plist,
+ );
+ target = 28F22CBE2F56440300A0FC59 /* leanring-buddy */;
+ };
+/* End PBXFileSystemSynchronizedBuildFileExceptionSet section */
+
/* Begin PBXFileSystemSynchronizedRootGroup section */
28F22CC12F56440300A0FC59 /* leanring-buddy */ = {
isa = PBXFileSystemSynchronizedRootGroup;
+ exceptions = (
+ AA00BB072F6500070039DA55 /* Exceptions for "leanring-buddy" folder in "leanring-buddy" target */,
+ );
path = "leanring-buddy";
sourceTree = "";
};
@@ -411,7 +424,7 @@
CODE_SIGN_STYLE = Automatic;
COMBINE_HIDPI_IMAGES = YES;
CURRENT_PROJECT_VERSION = 1;
- DEVELOPMENT_TEAM = 2UDAY4J48G;
+ DEVELOPMENT_TEAM = NDJZK3L926;
ENABLE_APP_SANDBOX = NO;
ENABLE_HARDENED_RUNTIME = YES;
ENABLE_OUTGOING_NETWORK_CONNECTIONS = YES;
@@ -449,7 +462,7 @@
CODE_SIGN_STYLE = Automatic;
COMBINE_HIDPI_IMAGES = YES;
CURRENT_PROJECT_VERSION = 1;
- DEVELOPMENT_TEAM = 2UDAY4J48G;
+ DEVELOPMENT_TEAM = NDJZK3L926;
ENABLE_APP_SANDBOX = NO;
ENABLE_HARDENED_RUNTIME = YES;
ENABLE_OUTGOING_NETWORK_CONNECTIONS = YES;
diff --git a/leanring-buddy.xcodeproj/xcuserdata/abdul-aleem.xcuserdatad/xcschemes/xcschememanagement.plist b/leanring-buddy.xcodeproj/xcuserdata/abdul-aleem.xcuserdatad/xcschemes/xcschememanagement.plist
new file mode 100644
index 00000000..6c13490a
--- /dev/null
+++ b/leanring-buddy.xcodeproj/xcuserdata/abdul-aleem.xcuserdatad/xcschemes/xcschememanagement.plist
@@ -0,0 +1,14 @@
+
+
+
+
+ SchemeUserState
+
+ leanring-buddy.xcscheme_^#shared#^_
+
+ orderHint
+ 0
+
+
+
+
diff --git a/leanring-buddy/ClickyKeyboardShortcut.swift b/leanring-buddy/ClickyKeyboardShortcut.swift
new file mode 100644
index 00000000..39206405
--- /dev/null
+++ b/leanring-buddy/ClickyKeyboardShortcut.swift
@@ -0,0 +1,180 @@
+//
+// ClickyKeyboardShortcut.swift
+// leanring-buddy
+//
+// Persisted keyboard shortcut model for Clicky's typed command popup.
+//
+
+import AppKit
+import Foundation
+
+struct ClickyKeyboardShortcut: Codable, Equatable {
+ static let textInputUserDefaultsKey = "textInputKeyboardShortcut"
+ static let defaultTextInputShortcut = ClickyKeyboardShortcut(
+ keyCode: 40,
+ modifierFlags: [.option, .command],
+ keyDisplayText: "K"
+ )
+ private static let supportedModifierFlags: NSEvent.ModifierFlags = [
+ .control,
+ .option,
+ .shift,
+ .command,
+ .function
+ ]
+
+ let keyCode: UInt16
+ let modifierFlagsRawValue: UInt
+ let keyDisplayText: String
+
+ var modifierFlags: NSEvent.ModifierFlags {
+ NSEvent.ModifierFlags(rawValue: modifierFlagsRawValue)
+ .intersection(.deviceIndependentFlagsMask)
+ .intersection(Self.supportedModifierFlags)
+ }
+
+ var displayText: String {
+ let modifierDisplayText = Self.displayText(for: modifierFlags)
+ guard !modifierDisplayText.isEmpty else { return keyDisplayText }
+ return modifierDisplayText + keyDisplayText
+ }
+
+ var validationErrorMessage: String? {
+ if keyDisplayText.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty {
+ return "Choose a key with at least one modifier."
+ }
+
+ if modifierFlags.isEmpty {
+ return "Add at least one modifier."
+ }
+
+ if modifierFlags.contains(.control) && modifierFlags.contains(.option) {
+ return "Control+Option is reserved for voice."
+ }
+
+ return nil
+ }
+
+ init(keyCode: UInt16, modifierFlags: NSEvent.ModifierFlags, keyDisplayText: String) {
+ self.keyCode = keyCode
+ self.modifierFlagsRawValue = modifierFlags
+ .intersection(.deviceIndependentFlagsMask)
+ .intersection(Self.supportedModifierFlags)
+ .rawValue
+ self.keyDisplayText = keyDisplayText
+ }
+
+ static func persistedTextInputShortcut() -> ClickyKeyboardShortcut {
+ guard let data = UserDefaults.standard.data(forKey: textInputUserDefaultsKey),
+ let shortcut = try? JSONDecoder().decode(ClickyKeyboardShortcut.self, from: data),
+ shortcut.validationErrorMessage == nil else {
+ return defaultTextInputShortcut
+ }
+
+ return shortcut
+ }
+
+ func persistAsTextInputShortcut() {
+ guard let data = try? JSONEncoder().encode(self) else { return }
+ UserDefaults.standard.set(data, forKey: Self.textInputUserDefaultsKey)
+ }
+
+ func matches(keyCode eventKeyCode: UInt16, modifierFlags eventModifierFlags: NSEvent.ModifierFlags) -> Bool {
+ eventKeyCode == keyCode
+ && eventModifierFlags
+ .intersection(.deviceIndependentFlagsMask)
+ .isSuperset(of: modifierFlags)
+ }
+
+ static func shortcut(from event: NSEvent) -> ClickyKeyboardShortcut? {
+ guard event.type == .keyDown else { return nil }
+
+ let keyDisplayText = keyDisplayText(for: event)
+ guard !keyDisplayText.isEmpty else { return nil }
+
+ return ClickyKeyboardShortcut(
+ keyCode: event.keyCode,
+ modifierFlags: event.modifierFlags.intersection(.deviceIndependentFlagsMask),
+ keyDisplayText: keyDisplayText
+ )
+ }
+
+ static func shortcut(
+ keyCode: UInt16,
+ modifierFlagsRawValue: UInt64
+ ) -> ClickyKeyboardShortcut? {
+ guard let keyDisplayText = keyDisplayText(forKeyCode: keyCode),
+ !keyDisplayText.isEmpty else {
+ return nil
+ }
+
+ return ClickyKeyboardShortcut(
+ keyCode: keyCode,
+ modifierFlags: NSEvent.ModifierFlags(rawValue: UInt(modifierFlagsRawValue))
+ .intersection(.deviceIndependentFlagsMask),
+ keyDisplayText: keyDisplayText
+ )
+ }
+
+ private static func displayText(for modifierFlags: NSEvent.ModifierFlags) -> String {
+ var displayText = ""
+
+ if modifierFlags.contains(.control) {
+ displayText += "⌃"
+ }
+ if modifierFlags.contains(.option) {
+ displayText += "⌥"
+ }
+ if modifierFlags.contains(.shift) {
+ displayText += "⇧"
+ }
+ if modifierFlags.contains(.command) {
+ displayText += "⌘"
+ }
+ if modifierFlags.contains(.function) {
+ displayText += "fn "
+ }
+
+ return displayText
+ }
+
+ private static func keyDisplayText(for event: NSEvent) -> String {
+ if let specialKey = keyDisplayText(forKeyCode: event.keyCode), !specialKey.isEmpty {
+ return specialKey
+ }
+
+ if let charactersIgnoringModifiers = event.charactersIgnoringModifiers,
+ let firstCharacter = charactersIgnoringModifiers.first {
+ return String(firstCharacter).uppercased()
+ }
+
+ return ""
+ }
+
+ private static func keyDisplayText(forKeyCode keyCode: UInt16) -> String? {
+ switch keyCode {
+ case 36:
+ return "Return"
+ case 48:
+ return "Tab"
+ case 49:
+ return "Space"
+ case 51:
+ return "Delete"
+ case 53:
+ return "Esc"
+ case 76:
+ return "Enter"
+ case 123:
+ return "←"
+ case 124:
+ return "→"
+ case 125:
+ return "↓"
+ case 126:
+ return "↑"
+ default:
+ return nil
+ }
+ }
+}
diff --git a/leanring-buddy/CompanionManager.swift b/leanring-buddy/CompanionManager.swift
index 0234cf19..2a0be6db 100644
--- a/leanring-buddy/CompanionManager.swift
+++ b/leanring-buddy/CompanionManager.swift
@@ -64,7 +64,9 @@ final class CompanionManager: ObservableObject {
let buddyDictationManager = BuddyDictationManager()
let globalPushToTalkShortcutMonitor = GlobalPushToTalkShortcutMonitor()
+ let globalTextInputShortcutMonitor = GlobalTextInputShortcutMonitor()
let overlayWindowManager = OverlayWindowManager()
+ lazy var textCommandPopupManager = TextCommandPopupManager(companionManager: self)
// Response text is now displayed inline on the cursor overlay via
// streamingResponseText, so no separate response overlay manager is needed.
@@ -89,10 +91,12 @@ final class CompanionManager: ObservableObject {
private var currentResponseTask: Task?
private var shortcutTransitionCancellable: AnyCancellable?
+ private var textInputShortcutCancellable: AnyCancellable?
private var voiceStateCancellable: AnyCancellable?
private var audioPowerCancellable: AnyCancellable?
private var accessibilityCheckTimer: Timer?
private var pendingKeyboardShortcutStartTask: Task?
+ @Published private(set) var isTypedCommandSubmissionInFlight = false
/// Scheduled hide for transient cursor mode — cancelled if the user
/// speaks again before the delay elapses.
private var transientHideTask: Task?
@@ -116,6 +120,22 @@ final class CompanionManager: ObservableObject {
claudeAPI.model = model
}
+ @Published var textInputKeyboardShortcut: ClickyKeyboardShortcut = .persistedTextInputShortcut()
+
+ var isTextCommandSubmissionBusy: Bool {
+ isTypedCommandSubmissionInFlight
+ || buddyDictationManager.isDictationInProgress
+ || voiceState == .listening
+ || voiceState == .processing
+ }
+
+ func setTextInputKeyboardShortcut(_ shortcut: ClickyKeyboardShortcut) {
+ guard shortcut.validationErrorMessage == nil else { return }
+ textInputKeyboardShortcut = shortcut
+ shortcut.persistAsTextInputShortcut()
+ globalTextInputShortcutMonitor.currentShortcut = shortcut
+ }
+
/// User preference for whether the Clicky cursor should be shown.
/// When toggled off, the overlay is hidden and push-to-talk is disabled.
/// Persisted to UserDefaults so the choice survives app restarts.
@@ -179,6 +199,8 @@ final class CompanionManager: ObservableObject {
bindVoiceStateObservation()
bindAudioPowerLevel()
bindShortcutTransitions()
+ bindTextInputShortcut()
+ _ = textCommandPopupManager
// Eagerly touch the Claude API so its TLS warmup handshake completes
// well before the onboarding demo fires at ~40s into the video.
_ = claudeAPI
@@ -289,13 +311,16 @@ final class CompanionManager: ObservableObject {
func stop() {
globalPushToTalkShortcutMonitor.stop()
+ globalTextInputShortcutMonitor.stop()
buddyDictationManager.cancelCurrentDictation()
overlayWindowManager.hideOverlay()
+ textCommandPopupManager.hidePopup()
transientHideTask?.cancel()
currentResponseTask?.cancel()
currentResponseTask = nil
shortcutTransitionCancellable?.cancel()
+ textInputShortcutCancellable?.cancel()
voiceStateCancellable?.cancel()
audioPowerCancellable?.cancel()
accessibilityCheckTimer?.invalidate()
@@ -313,8 +338,10 @@ final class CompanionManager: ObservableObject {
if currentlyHasAccessibility {
globalPushToTalkShortcutMonitor.start()
+ globalTextInputShortcutMonitor.start()
} else {
globalPushToTalkShortcutMonitor.stop()
+ globalTextInputShortcutMonitor.stop()
}
hasScreenRecordingPermission = WindowPositionManager.hasScreenRecordingPermission()
@@ -470,6 +497,23 @@ final class CompanionManager: ObservableObject {
}
}
+ private func bindTextInputShortcut() {
+ textInputShortcutCancellable = globalTextInputShortcutMonitor
+ .shortcutTriggeredPublisher
+ .receive(on: DispatchQueue.main)
+ .sink { [weak self] in
+ self?.handleTextInputShortcut()
+ }
+ }
+
+ private func handleTextInputShortcut() {
+ guard hasCompletedOnboarding && allPermissionsGranted else { return }
+ guard !showOnboardingVideo else { return }
+
+ NotificationCenter.default.post(name: .clickyDismissPanel, object: nil)
+ textCommandPopupManager.showPopup()
+ }
+
private func handleShortcutTransition(_ transition: BuddyPushToTalkShortcut.ShortcutTransition) {
switch transition {
case .pressed:
@@ -490,23 +534,15 @@ final class CompanionManager: ObservableObject {
// Dismiss the menu bar panel so it doesn't cover the screen
NotificationCenter.default.post(name: .clickyDismissPanel, object: nil)
+ textCommandPopupManager.hidePopup()
// Cancel any in-progress response and TTS from a previous utterance
currentResponseTask?.cancel()
+ isTypedCommandSubmissionInFlight = false
elevenLabsTTSClient.stopPlayback()
clearDetectedElementLocation()
- // Dismiss the onboarding prompt if it's showing
- if showOnboardingPrompt {
- withAnimation(.easeOut(duration: 0.3)) {
- onboardingPromptOpacity = 0.0
- }
- DispatchQueue.main.asyncAfter(deadline: .now() + 0.35) {
- self.showOnboardingPrompt = false
- self.onboardingPromptText = ""
- }
- }
-
+ dismissOnboardingPromptIfNeeded()
ClickyAnalytics.trackPushToTalkStarted()
@@ -539,10 +575,52 @@ final class CompanionManager: ObservableObject {
}
}
+ func submitTypedCommand(_ typedCommand: String) {
+ let trimmedTypedCommand = typedCommand.trimmingCharacters(in: .whitespacesAndNewlines)
+ guard !trimmedTypedCommand.isEmpty else { return }
+ guard !isTextCommandSubmissionBusy else { return }
+
+ textCommandPopupManager.hidePopup()
+ NotificationCenter.default.post(name: .clickyDismissPanel, object: nil)
+
+ transientHideTask?.cancel()
+ transientHideTask = nil
+
+ if !isClickyCursorEnabled && !isOverlayVisible {
+ overlayWindowManager.hasShownOverlayBefore = true
+ overlayWindowManager.showOverlay(onScreens: NSScreen.screens, companionManager: self)
+ isOverlayVisible = true
+ }
+
+ clearDetectedElementLocation()
+ dismissOnboardingPromptIfNeeded()
+
+ isTypedCommandSubmissionInFlight = true
+ lastTranscript = trimmedTypedCommand
+ print("⌨️ Companion received typed command: \(trimmedTypedCommand)")
+ ClickyAnalytics.trackUserMessageSent(transcript: trimmedTypedCommand)
+
+ sendTranscriptToClaudeWithScreenshot(transcript: trimmedTypedCommand) { [weak self] in
+ self?.isTypedCommandSubmissionInFlight = false
+ }
+ }
+
+ private func dismissOnboardingPromptIfNeeded() {
+ guard showOnboardingPrompt else { return }
+
+ withAnimation(.easeOut(duration: 0.3)) {
+ onboardingPromptOpacity = 0.0
+ }
+ DispatchQueue.main.asyncAfter(deadline: .now() + 0.35) {
+ self.showOnboardingPrompt = false
+ self.onboardingPromptText = ""
+ }
+ }
+
// MARK: - Companion Prompt
private static let companionVoiceResponseSystemPrompt = """
- you're clicky, a friendly always-on companion that lives in the user's menu bar. the user just spoke to you via push-to-talk and you can see their screen(s). your reply will be spoken aloud via text-to-speech, so write the way you'd actually talk. this is an ongoing conversation — you remember everything they've said before.
+ you're clicky, a friendly always-on companion that lives in the user's menu bar. the user sent you a command by voice or text and you can see their screen(s). your reply will be spoken aloud via text-to-speech, so write the way you'd actually talk. this is an ongoing conversation — you remember everything they've said before.
rules:
- default to one or two sentences. be direct and dense. BUT if the user asks you to explain more, go deeper, or elaborate, then go all out — give a thorough, detailed explanation with no length limit.
@@ -583,11 +661,18 @@ final class CompanionManager: ObservableObject {
/// the spinner/processing state until TTS audio begins playing.
/// Claude's response may include a [POINT:x,y:label] tag which triggers
/// the buddy to fly to that element on screen.
- private func sendTranscriptToClaudeWithScreenshot(transcript: String) {
+ private func sendTranscriptToClaudeWithScreenshot(
+ transcript: String,
+ onCompletion: (@MainActor () -> Void)? = nil
+ ) {
currentResponseTask?.cancel()
elevenLabsTTSClient.stopPlayback()
currentResponseTask = Task {
+ defer {
+ onCompletion?()
+ }
+
// Stay in processing (spinner) state — no streaming text displayed
voiceState = .processing
diff --git a/leanring-buddy/CompanionPanelView.swift b/leanring-buddy/CompanionPanelView.swift
index 76789b4c..83d72132 100644
--- a/leanring-buddy/CompanionPanelView.swift
+++ b/leanring-buddy/CompanionPanelView.swift
@@ -8,11 +8,15 @@
//
import AVFoundation
+import AppKit
import SwiftUI
struct CompanionPanelView: View {
@ObservedObject var companionManager: CompanionManager
@State private var emailInput: String = ""
+ @State private var isRecordingTextHotkey = false
+ @State private var textHotkeyValidationMessage: String?
+ @State private var textHotkeyRecorderMonitor: Any?
var body: some View {
VStack(alignment: .leading, spacing: 0) {
@@ -31,6 +35,12 @@ struct CompanionPanelView: View {
modelPickerRow
.padding(.horizontal, 16)
+
+ Spacer()
+ .frame(height: 10)
+
+ textHotkeyRow
+ .padding(.horizontal, 16)
}
if !companionManager.allPermissionsGranted {
@@ -79,6 +89,9 @@ struct CompanionPanelView: View {
}
.frame(width: 320)
.background(panelBackground)
+ .onDisappear {
+ stopRecordingTextHotkey()
+ }
}
// MARK: - Header
@@ -596,6 +609,108 @@ struct CompanionPanelView: View {
.padding(.vertical, 4)
}
+ // MARK: - Text Hotkey
+
+ private var textHotkeyRow: some View {
+ VStack(alignment: .leading, spacing: 6) {
+ HStack {
+ Text("Text Hotkey")
+ .font(.system(size: 13, weight: .medium))
+ .foregroundColor(DS.Colors.textSecondary)
+
+ Spacer()
+
+ Text(isRecordingTextHotkey ? "Press keys..." : companionManager.textInputKeyboardShortcut.displayText)
+ .font(.system(size: 11, weight: .semibold))
+ .foregroundColor(isRecordingTextHotkey ? DS.Colors.blue400 : DS.Colors.textPrimary)
+ .padding(.horizontal, 8)
+ .padding(.vertical, 4)
+ .background(
+ RoundedRectangle(cornerRadius: 5, style: .continuous)
+ .fill(Color.white.opacity(0.08))
+ )
+ .overlay(
+ RoundedRectangle(cornerRadius: 5, style: .continuous)
+ .stroke(isRecordingTextHotkey ? DS.Colors.blue400.opacity(0.7) : DS.Colors.borderSubtle, lineWidth: 0.6)
+ )
+
+ Button(action: {
+ toggleTextHotkeyRecording()
+ }) {
+ Text(isRecordingTextHotkey ? "Cancel" : "Record")
+ .font(.system(size: 11, weight: .semibold))
+ .foregroundColor(DS.Colors.textSecondary)
+ .padding(.horizontal, 9)
+ .padding(.vertical, 5)
+ .background(
+ Capsule()
+ .stroke(DS.Colors.borderSubtle, lineWidth: 0.8)
+ )
+ }
+ .buttonStyle(.plain)
+ .pointerCursor()
+ }
+
+ if let textHotkeyValidationMessage {
+ Text(textHotkeyValidationMessage)
+ .font(.system(size: 10))
+ .foregroundColor(DS.Colors.warning)
+ } else {
+ Text("Open typed input near the cursor.")
+ .font(.system(size: 10))
+ .foregroundColor(DS.Colors.textTertiary)
+ }
+ }
+ .padding(.vertical, 4)
+ }
+
+ private func toggleTextHotkeyRecording() {
+ if isRecordingTextHotkey {
+ stopRecordingTextHotkey()
+ } else {
+ startRecordingTextHotkey()
+ }
+ }
+
+ private func startRecordingTextHotkey() {
+ stopRecordingTextHotkey()
+ isRecordingTextHotkey = true
+ textHotkeyValidationMessage = nil
+
+ textHotkeyRecorderMonitor = NSEvent.addLocalMonitorForEvents(matching: [.keyDown]) { event in
+ guard isRecordingTextHotkey else { return event }
+
+ if event.keyCode == 53 {
+ stopRecordingTextHotkey()
+ return nil
+ }
+
+ guard let shortcut = ClickyKeyboardShortcut.shortcut(from: event) else {
+ textHotkeyValidationMessage = "Choose a key with at least one modifier."
+ return nil
+ }
+
+ if let validationErrorMessage = shortcut.validationErrorMessage {
+ textHotkeyValidationMessage = validationErrorMessage
+ } else {
+ companionManager.setTextInputKeyboardShortcut(shortcut)
+ textHotkeyValidationMessage = nil
+ stopRecordingTextHotkey()
+ }
+
+ return nil
+ }
+ }
+
+ private func stopRecordingTextHotkey() {
+ isRecordingTextHotkey = false
+
+ if let textHotkeyRecorderMonitor {
+ NSEvent.removeMonitor(textHotkeyRecorderMonitor)
+ self.textHotkeyRecorderMonitor = nil
+ }
+ }
+
// MARK: - Model Picker
private var modelPickerRow: some View {
diff --git a/leanring-buddy/GlobalTextInputShortcutMonitor.swift b/leanring-buddy/GlobalTextInputShortcutMonitor.swift
new file mode 100644
index 00000000..e393483e
--- /dev/null
+++ b/leanring-buddy/GlobalTextInputShortcutMonitor.swift
@@ -0,0 +1,134 @@
+//
+// GlobalTextInputShortcutMonitor.swift
+// leanring-buddy
+//
+// Captures the typed command popup shortcut while Clicky is backgrounded.
+//
+
+import AppKit
+import Combine
+import CoreGraphics
+import Foundation
+
+final class GlobalTextInputShortcutMonitor: ObservableObject {
+ let shortcutTriggeredPublisher = PassthroughSubject()
+
+ var currentShortcut: ClickyKeyboardShortcut {
+ didSet {
+ isShortcutCurrentlyPressed = false
+ }
+ }
+
+ private var globalEventTap: CFMachPort?
+ private var globalEventTapRunLoopSource: CFRunLoopSource?
+ private var isShortcutCurrentlyPressed = false
+
+ init(currentShortcut: ClickyKeyboardShortcut = .persistedTextInputShortcut()) {
+ self.currentShortcut = currentShortcut
+ }
+
+ deinit {
+ stop()
+ }
+
+ func start() {
+ guard globalEventTap == nil else { return }
+
+ let monitoredEventTypes: [CGEventType] = [.flagsChanged, .keyDown, .keyUp]
+ let eventMask = monitoredEventTypes.reduce(CGEventMask(0)) { currentMask, eventType in
+ currentMask | (CGEventMask(1) << eventType.rawValue)
+ }
+
+ let eventTapCallback: CGEventTapCallBack = { _, eventType, event, userInfo in
+ guard let userInfo else {
+ return Unmanaged.passUnretained(event)
+ }
+
+ let globalTextInputShortcutMonitor = Unmanaged
+ .fromOpaque(userInfo)
+ .takeUnretainedValue()
+
+ return globalTextInputShortcutMonitor.handleGlobalEventTap(
+ eventType: eventType,
+ event: event
+ )
+ }
+
+ guard let globalEventTap = CGEvent.tapCreate(
+ tap: .cgSessionEventTap,
+ place: .headInsertEventTap,
+ options: .listenOnly,
+ eventsOfInterest: eventMask,
+ callback: eventTapCallback,
+ userInfo: Unmanaged.passUnretained(self).toOpaque()
+ ) else {
+ print("⚠️ Global text input shortcut: couldn't create CGEvent tap")
+ return
+ }
+
+ guard let globalEventTapRunLoopSource = CFMachPortCreateRunLoopSource(
+ kCFAllocatorDefault,
+ globalEventTap,
+ 0
+ ) else {
+ CFMachPortInvalidate(globalEventTap)
+ print("⚠️ Global text input shortcut: couldn't create event tap run loop source")
+ return
+ }
+
+ self.globalEventTap = globalEventTap
+ self.globalEventTapRunLoopSource = globalEventTapRunLoopSource
+
+ CFRunLoopAddSource(CFRunLoopGetMain(), globalEventTapRunLoopSource, .commonModes)
+ CGEvent.tapEnable(tap: globalEventTap, enable: true)
+ }
+
+ func stop() {
+ isShortcutCurrentlyPressed = false
+
+ if let globalEventTapRunLoopSource {
+ CFRunLoopRemoveSource(CFRunLoopGetMain(), globalEventTapRunLoopSource, .commonModes)
+ self.globalEventTapRunLoopSource = nil
+ }
+
+ if let globalEventTap {
+ CFMachPortInvalidate(globalEventTap)
+ self.globalEventTap = nil
+ }
+ }
+
+ private func handleGlobalEventTap(
+ eventType: CGEventType,
+ event: CGEvent
+ ) -> Unmanaged? {
+ if eventType == .tapDisabledByTimeout || eventType == .tapDisabledByUserInput {
+ if let globalEventTap {
+ CGEvent.tapEnable(tap: globalEventTap, enable: true)
+ }
+ return Unmanaged.passUnretained(event)
+ }
+
+ let eventKeyCode = UInt16(event.getIntegerValueField(.keyboardEventKeycode))
+ let eventModifierFlags = NSEvent.ModifierFlags(rawValue: UInt(event.flags.rawValue))
+ .intersection(.deviceIndependentFlagsMask)
+ let eventMatchesCurrentShortcut = currentShortcut.matches(
+ keyCode: eventKeyCode,
+ modifierFlags: eventModifierFlags
+ )
+
+ if eventType == .keyDown && eventMatchesCurrentShortcut && !isShortcutCurrentlyPressed {
+ isShortcutCurrentlyPressed = true
+ shortcutTriggeredPublisher.send(())
+ }
+
+ if eventType == .keyUp && eventKeyCode == currentShortcut.keyCode {
+ isShortcutCurrentlyPressed = false
+ }
+
+ if eventType == .flagsChanged && !eventModifierFlags.isSuperset(of: currentShortcut.modifierFlags) {
+ isShortcutCurrentlyPressed = false
+ }
+
+ return Unmanaged.passUnretained(event)
+ }
+}
diff --git a/leanring-buddy/TextCommandPopupManager.swift b/leanring-buddy/TextCommandPopupManager.swift
new file mode 100644
index 00000000..a972faa4
--- /dev/null
+++ b/leanring-buddy/TextCommandPopupManager.swift
@@ -0,0 +1,342 @@
+//
+// TextCommandPopupManager.swift
+// leanring-buddy
+//
+// Manages the compact typed command popup opened by Clicky's text hotkey.
+//
+
+import AppKit
+import SwiftUI
+
+@MainActor
+final class TextCommandPopupManager: NSObject {
+ private final class TextCommandPanel: NSPanel {
+ var onEscapeKeyPressed: (() -> Void)?
+
+ override var canBecomeKey: Bool { true }
+ override var canBecomeMain: Bool { false }
+
+ override func keyDown(with event: NSEvent) {
+ if event.keyCode == 53 {
+ onEscapeKeyPressed?()
+ return
+ }
+
+ super.keyDown(with: event)
+ }
+ }
+
+ private var panel: TextCommandPanel?
+ private var popupEventMonitors: [Any] = []
+ private var cursorTrackingTimer: Timer?
+
+ private let companionManager: CompanionManager
+ private let popupWidth: CGFloat = 360
+ private let popupHeight: CGFloat = 62
+
+ init(companionManager: CompanionManager) {
+ self.companionManager = companionManager
+ super.init()
+ createPanelIfNeeded()
+ }
+
+ deinit {
+ for popupEventMonitor in popupEventMonitors {
+ NSEvent.removeMonitor(popupEventMonitor)
+ }
+ }
+
+ func showPopup() {
+ createPanelIfNeeded()
+ refreshPanelContent()
+ positionPopupNearCursor()
+
+ panel?.makeKeyAndOrderFront(nil)
+ panel?.orderFrontRegardless()
+ installPopupEventMonitors()
+ startTrackingCursor()
+ }
+
+ func hidePopup() {
+ stopTrackingCursor()
+ panel?.orderOut(nil)
+ removePopupEventMonitors()
+ }
+
+ private func createPanelIfNeeded() {
+ guard panel == nil else { return }
+
+ let textCommandPanel = TextCommandPanel(
+ contentRect: NSRect(x: 0, y: 0, width: popupWidth, height: popupHeight),
+ styleMask: [.borderless, .nonactivatingPanel],
+ backing: .buffered,
+ defer: false
+ )
+
+ textCommandPanel.onEscapeKeyPressed = { [weak self] in
+ self?.hidePopup()
+ }
+ textCommandPanel.isFloatingPanel = true
+ textCommandPanel.level = .floating
+ textCommandPanel.isOpaque = false
+ textCommandPanel.backgroundColor = .clear
+ textCommandPanel.hasShadow = false
+ textCommandPanel.hidesOnDeactivate = false
+ textCommandPanel.isExcludedFromWindowsMenu = true
+ textCommandPanel.collectionBehavior = [.canJoinAllSpaces, .fullScreenAuxiliary]
+ textCommandPanel.isMovableByWindowBackground = false
+ textCommandPanel.titleVisibility = .hidden
+ textCommandPanel.titlebarAppearsTransparent = true
+
+ panel = textCommandPanel
+ refreshPanelContent()
+ }
+
+ private func refreshPanelContent() {
+ let textCommandPopupView = TextCommandPopupView(companionManager: companionManager)
+ .frame(width: popupWidth)
+
+ let hostingView = NSHostingView(rootView: textCommandPopupView)
+ hostingView.frame = NSRect(x: 0, y: 0, width: popupWidth, height: popupHeight)
+ hostingView.wantsLayer = true
+ hostingView.layer?.backgroundColor = .clear
+
+ panel?.contentView = hostingView
+ }
+
+ private func positionPopupNearCursor() {
+ guard let panel else { return }
+
+ let cursorLocation = NSEvent.mouseLocation
+ let screenContainingCursor = NSScreen.screens.first { screen in
+ screen.frame.contains(cursorLocation)
+ } ?? NSScreen.main
+
+ guard let targetScreenFrame = screenContainingCursor?.visibleFrame else { return }
+
+ let preferredOffset = CGPoint(x: 22, y: -18)
+ let preferredOrigin = CGPoint(
+ x: cursorLocation.x + preferredOffset.x,
+ y: cursorLocation.y - popupHeight + preferredOffset.y
+ )
+
+ let clampedOrigin = CGPoint(
+ x: min(max(preferredOrigin.x, targetScreenFrame.minX + 10), targetScreenFrame.maxX - popupWidth - 10),
+ y: min(max(preferredOrigin.y, targetScreenFrame.minY + 10), targetScreenFrame.maxY - popupHeight - 10)
+ )
+
+ panel.setFrame(
+ NSRect(x: clampedOrigin.x, y: clampedOrigin.y, width: popupWidth, height: popupHeight),
+ display: true
+ )
+ }
+
+ private func startTrackingCursor() {
+ stopTrackingCursor()
+
+ cursorTrackingTimer = Timer.scheduledTimer(withTimeInterval: 1.0 / 30.0, repeats: true) { [weak self] _ in
+ Task { @MainActor [weak self] in
+ guard let self, self.panel?.isVisible == true else { return }
+ self.positionPopupNearCursor()
+ }
+ }
+ }
+
+ private func stopTrackingCursor() {
+ cursorTrackingTimer?.invalidate()
+ cursorTrackingTimer = nil
+ }
+
+ private func installPopupEventMonitors() {
+ removePopupEventMonitors()
+
+ if let localEscapeKeyMonitor = NSEvent.addLocalMonitorForEvents(
+ matching: [.keyDown]
+ ) { [weak self] event in
+ if event.keyCode == 53 {
+ self?.hidePopup()
+ return nil
+ }
+
+ return event
+ } {
+ popupEventMonitors.append(localEscapeKeyMonitor)
+ }
+
+ if let globalClickOutsideMonitor = NSEvent.addGlobalMonitorForEvents(
+ matching: [.leftMouseDown, .rightMouseDown]
+ ) { [weak self] _ in
+ self?.hidePopupIfNeeded(forClickAt: NSEvent.mouseLocation)
+ } {
+ popupEventMonitors.append(globalClickOutsideMonitor)
+ }
+
+ if let localClickOutsideMonitor = NSEvent.addLocalMonitorForEvents(
+ matching: [.leftMouseDown, .rightMouseDown]
+ ) { [weak self] event in
+ self?.hidePopupIfNeeded(forClickAt: NSEvent.mouseLocation)
+ return event
+ } {
+ popupEventMonitors.append(localClickOutsideMonitor)
+ }
+ }
+
+ private func hidePopupIfNeeded(forClickAt clickLocation: CGPoint) {
+ guard let panel else { return }
+ guard !panel.frame.contains(clickLocation) else { return }
+
+ DispatchQueue.main.async {
+ self.hidePopup()
+ }
+ }
+
+ private func removePopupEventMonitors() {
+ for popupEventMonitor in popupEventMonitors {
+ NSEvent.removeMonitor(popupEventMonitor)
+ }
+ popupEventMonitors.removeAll()
+ }
+}
+
+private struct TextCommandPopupView: View {
+ @ObservedObject var companionManager: CompanionManager
+ @State private var typedCommandText = ""
+
+ private var trimmedTypedCommandText: String {
+ typedCommandText.trimmingCharacters(in: .whitespacesAndNewlines)
+ }
+
+ private var canSubmitTypedCommand: Bool {
+ !trimmedTypedCommandText.isEmpty && !companionManager.isTextCommandSubmissionBusy
+ }
+
+ var body: some View {
+ HStack(spacing: 10) {
+ Image(systemName: companionManager.isTextCommandSubmissionBusy ? "hourglass" : "bubble.left")
+ .font(.system(size: 15, weight: .semibold))
+ .foregroundColor(DS.Colors.blue300)
+ .frame(width: 18)
+
+ PopupTextField(
+ text: $typedCommandText,
+ placeholder: companionManager.isTextCommandSubmissionBusy ? "Clicky is busy..." : "Ask Clicky...",
+ isEnabled: !companionManager.isTextCommandSubmissionBusy,
+ onSubmit: {
+ submitTypedCommandIfPossible()
+ }
+ )
+ .frame(height: 22)
+ .overlay(IBeamCursorView())
+ }
+ .padding(.horizontal, 16)
+ .padding(.vertical, 11)
+ .background(
+ RoundedRectangle(cornerRadius: 18, style: .continuous)
+ .fill(DS.Colors.background.opacity(0.94))
+ .shadow(color: Color.black.opacity(0.48), radius: 18, x: 0, y: 9)
+ .shadow(color: DS.Colors.blue500.opacity(0.10), radius: 12, x: 0, y: 0)
+ )
+ .overlay(
+ RoundedRectangle(cornerRadius: 18, style: .continuous)
+ .stroke(
+ LinearGradient(
+ colors: [
+ Color.white.opacity(0.22),
+ DS.Colors.blue400.opacity(0.18),
+ Color.white.opacity(0.08)
+ ],
+ startPoint: .topLeading,
+ endPoint: .bottomTrailing
+ ),
+ lineWidth: 0.9
+ )
+ )
+ .padding(5)
+ .onAppear {
+ typedCommandText = ""
+ }
+ }
+
+ private func submitTypedCommandIfPossible() {
+ guard canSubmitTypedCommand else { return }
+ companionManager.submitTypedCommand(trimmedTypedCommandText)
+ }
+}
+
+private struct PopupTextField: NSViewRepresentable {
+ @Binding var text: String
+ let placeholder: String
+ let isEnabled: Bool
+ let onSubmit: () -> Void
+
+ func makeNSView(context: Context) -> NSTextField {
+ let textField = NSTextField()
+ textField.delegate = context.coordinator
+ textField.target = context.coordinator
+ textField.action = #selector(Coordinator.submitTextField)
+ textField.isBezeled = false
+ textField.isBordered = false
+ textField.drawsBackground = false
+ textField.backgroundColor = .clear
+ textField.focusRingType = .none
+ textField.lineBreakMode = .byTruncatingTail
+ textField.usesSingleLineMode = true
+ textField.cell?.wraps = false
+ textField.cell?.isScrollable = true
+ textField.font = .systemFont(ofSize: 15, weight: .medium)
+ textField.textColor = NSColor(calibratedRed: 0.925, green: 0.933, blue: 0.929, alpha: 1.0)
+
+ DispatchQueue.main.async {
+ textField.window?.makeFirstResponder(textField)
+ }
+
+ return textField
+ }
+
+ func updateNSView(_ textField: NSTextField, context: Context) {
+ context.coordinator.text = $text
+ context.coordinator.onSubmit = onSubmit
+
+ if textField.stringValue != text {
+ textField.stringValue = text
+ }
+
+ textField.placeholderAttributedString = NSAttributedString(
+ string: placeholder,
+ attributes: [
+ .foregroundColor: NSColor(calibratedRed: 0.671, green: 0.710, blue: 0.698, alpha: 1.0),
+ .font: NSFont.systemFont(ofSize: 15, weight: .medium)
+ ]
+ )
+ textField.isEnabled = isEnabled
+
+ if isEnabled && textField.window?.firstResponder !== textField.currentEditor() {
+ DispatchQueue.main.async {
+ textField.window?.makeFirstResponder(textField)
+ }
+ }
+ }
+
+ func makeCoordinator() -> Coordinator {
+ Coordinator(text: $text, onSubmit: onSubmit)
+ }
+
+ final class Coordinator: NSObject, NSTextFieldDelegate {
+ var text: Binding
+ var onSubmit: () -> Void
+
+ init(text: Binding, onSubmit: @escaping () -> Void) {
+ self.text = text
+ self.onSubmit = onSubmit
+ }
+
+ func controlTextDidChange(_ notification: Notification) {
+ guard let textField = notification.object as? NSTextField else { return }
+ text.wrappedValue = textField.stringValue
+ }
+
+ @objc func submitTextField() {
+ onSubmit()
+ }
+ }
+}
diff --git a/leanring-buddyTests/leanring_buddyTests.swift b/leanring-buddyTests/leanring_buddyTests.swift
index 188fe7ae..d488c25a 100644
--- a/leanring-buddyTests/leanring_buddyTests.swift
+++ b/leanring-buddyTests/leanring_buddyTests.swift
@@ -6,6 +6,7 @@
//
import Testing
+import AppKit
@testable import leanring_buddy
struct leanring_buddyTests {
@@ -37,4 +38,24 @@ struct leanring_buddyTests {
#expect(shouldTreatPermissionAsGranted)
}
+ @Test func defaultTextInputShortcutUsesOptionCommandK() async throws {
+ let shortcut = ClickyKeyboardShortcut.defaultTextInputShortcut
+
+ #expect(shortcut.keyCode == 40)
+ #expect(shortcut.modifierFlags.contains(.option))
+ #expect(shortcut.modifierFlags.contains(.command))
+ #expect(shortcut.displayText == "⌥⌘K")
+ #expect(shortcut.validationErrorMessage == nil)
+ }
+
+ @Test func textInputShortcutRejectsVoiceShortcutConflict() async throws {
+ let shortcut = ClickyKeyboardShortcut(
+ keyCode: 49,
+ modifierFlags: [.control, .option],
+ keyDisplayText: "Space"
+ )
+
+ #expect(shortcut.validationErrorMessage == "Control+Option is reserved for voice.")
+ }
+
}