diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 00000000..b58b603f --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,5 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ diff --git a/.idea/clicky.iml b/.idea/clicky.iml new file mode 100644 index 00000000..24643cc3 --- /dev/null +++ b/.idea/clicky.iml @@ -0,0 +1,12 @@ + + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 00000000..7d1a270d --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 00000000..35eb1ddf --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/AGENTS.md b/AGENTS.md index 6946d441..cb2b7e60 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -19,6 +19,7 @@ All API keys live on a Cloudflare Worker proxy — nothing sensitive ships in th - **Text-to-Speech**: ElevenLabs (`eleven_flash_v2_5` model) via Cloudflare Worker proxy - **Screen Capture**: ScreenCaptureKit (macOS 14.2+), multi-monitor support - **Voice Input**: Push-to-talk via `AVAudioEngine` + pluggable transcription-provider layer. System-wide keyboard shortcut via listen-only CGEvent tap. +- **Text Input**: Customizable global hotkey (`⌥⌘K` by default) opens a compact typed-command popup near the cursor. Typed commands reuse the same screenshot → Claude → TTS/pointing pipeline as voice. - **Element Pointing**: Claude embeds `[POINT:x,y:label:screenN]` tags in responses. The overlay parses these, maps coordinates to the correct monitor, and animates the blue cursor along a bezier arc to the target. - **Concurrency**: `@MainActor` isolation, async/await throughout - **Analytics**: PostHog via `ClickyAnalytics.swift` @@ -44,6 +45,8 @@ Worker vars: `ELEVENLABS_VOICE_ID` **Global Push-To-Talk Shortcut**: Background push-to-talk uses a listen-only `CGEvent` tap instead of an AppKit global monitor so modifier-based shortcuts like `ctrl + option` are detected more reliably while the app is running in the background. +**Global Text Input Shortcut**: Background typed input uses a sibling listen-only `CGEvent` tap with a persisted shortcut model. The popup is a lightweight `NSPanel` near the cursor, and submitted text feeds the same core response pipeline as voice transcripts. + **Shared URLSession for AssemblyAI**: A single long-lived `URLSession` is shared across all AssemblyAI streaming sessions (owned by the provider, not the session). Creating and invalidating a URLSession per session corrupts the OS connection pool and causes "Socket is not connected" errors after a few rapid reconnections. **Transient Cursor Mode**: When "Show Clicky" is off, pressing the hotkey fades in the cursor overlay for the duration of the interaction (recording → response → TTS → optional pointing), then fades it out automatically after 1 second of inactivity. @@ -53,9 +56,9 @@ Worker vars: `ELEVENLABS_VOICE_ID` | File | Lines | Purpose | |------|-------|---------| | `leanring_buddyApp.swift` | ~89 | Menu bar app entry point. Uses `@NSApplicationDelegateAdaptor` with `CompanionAppDelegate` which creates `MenuBarPanelManager` and starts `CompanionManager`. No main window — the app lives entirely in the status bar. | -| `CompanionManager.swift` | ~1026 | Central state machine. Owns dictation, shortcut monitoring, screen capture, Claude API, ElevenLabs TTS, and overlay management. Tracks voice state (idle/listening/processing/responding), conversation history, model selection, and cursor visibility. Coordinates the full push-to-talk → screenshot → Claude → TTS → pointing pipeline. | +| `CompanionManager.swift` | ~1111 | Central state machine. Owns dictation, voice/text shortcut monitoring, text popup orchestration, screen capture, Claude API, ElevenLabs TTS, and overlay management. Tracks voice state (idle/listening/processing/responding), conversation history, model selection, and cursor visibility. Coordinates voice and typed command → screenshot → Claude → TTS → pointing pipeline. | | `MenuBarPanelManager.swift` | ~243 | NSStatusItem + custom NSPanel lifecycle. Creates the menu bar icon, manages the floating companion panel (show/hide/position), installs click-outside-to-dismiss monitor. | -| `CompanionPanelView.swift` | ~761 | SwiftUI panel content for the menu bar dropdown. Shows companion status, push-to-talk instructions, model picker (Sonnet/Opus), permissions UI, DM feedback button, and quit button. Dark aesthetic using `DS` design system. | +| `CompanionPanelView.swift` | ~876 | SwiftUI panel content for the menu bar dropdown. Shows companion status, push-to-talk instructions, model picker (Sonnet/Opus), text hotkey settings, permissions UI, DM feedback button, and quit button. Dark aesthetic using `DS` design system. | | `OverlayWindow.swift` | ~881 | Full-screen transparent overlay hosting the blue cursor, response text, waveform, and spinner. Handles cursor animation, element pointing with bezier arcs, multi-monitor coordinate mapping, and fade-out transitions. | | `CompanionResponseOverlay.swift` | ~217 | SwiftUI view for the response text bubble and waveform displayed next to the cursor in the overlay. | | `CompanionScreenCaptureUtility.swift` | ~132 | Multi-monitor screenshot capture using ScreenCaptureKit. Returns labeled image data for each connected display. | @@ -66,6 +69,9 @@ Worker vars: `ELEVENLABS_VOICE_ID` | `AppleSpeechTranscriptionProvider.swift` | ~147 | Local fallback transcription provider backed by Apple's Speech framework. | | `BuddyAudioConversionSupport.swift` | ~108 | Audio conversion helpers. Converts live mic buffers to PCM16 mono audio and builds WAV payloads for upload-based providers. | | `GlobalPushToTalkShortcutMonitor.swift` | ~132 | System-wide push-to-talk monitor. Owns the listen-only `CGEvent` tap and publishes press/release transitions. | +| `ClickyKeyboardShortcut.swift` | ~178 | Persisted keyboard shortcut model for typed command hotkeys, including display text and validation. | +| `GlobalTextInputShortcutMonitor.swift` | ~134 | System-wide typed command shortcut monitor. Owns a listen-only `CGEvent` tap and publishes trigger events for the popup. | +| `TextCommandPopupManager.swift` | ~342 | Compact typed-command `NSPanel` manager and SwiftUI popup view. Tracks near the cursor, focuses input, submits on Enter, and closes on Esc/outside click. | | `ClaudeAPI.swift` | ~291 | Claude vision API client with streaming (SSE) and non-streaming modes. TLS warmup optimization, image MIME detection, conversation history support. | | `OpenAIAPI.swift` | ~142 | OpenAI GPT vision API client. | | `ElevenLabsTTSClient.swift` | ~81 | ElevenLabs TTS client. Sends text to the Worker proxy, plays back audio via `AVAudioPlayer`. Exposes `isPlaying` for transient cursor scheduling. | diff --git a/leanring-buddy.xcodeproj/project.pbxproj b/leanring-buddy.xcodeproj/project.pbxproj index 75e57261..3139a9d6 100644 --- a/leanring-buddy.xcodeproj/project.pbxproj +++ b/leanring-buddy.xcodeproj/project.pbxproj @@ -34,9 +34,22 @@ 28F22CD62F56440300A0FC59 /* leanring-buddyUITests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = "leanring-buddyUITests.xctest"; sourceTree = BUILT_PRODUCTS_DIR; }; /* End PBXFileReference section */ +/* Begin PBXFileSystemSynchronizedBuildFileExceptionSet section */ + AA00BB072F6500070039DA55 /* Exceptions for "leanring-buddy" folder in "leanring-buddy" target */ = { + isa = PBXFileSystemSynchronizedBuildFileExceptionSet; + membershipExceptions = ( + Info.plist, + ); + target = 28F22CBE2F56440300A0FC59 /* leanring-buddy */; + }; +/* End PBXFileSystemSynchronizedBuildFileExceptionSet section */ + /* Begin PBXFileSystemSynchronizedRootGroup section */ 28F22CC12F56440300A0FC59 /* leanring-buddy */ = { isa = PBXFileSystemSynchronizedRootGroup; + exceptions = ( + AA00BB072F6500070039DA55 /* Exceptions for "leanring-buddy" folder in "leanring-buddy" target */, + ); path = "leanring-buddy"; sourceTree = ""; }; @@ -411,7 +424,7 @@ CODE_SIGN_STYLE = Automatic; COMBINE_HIDPI_IMAGES = YES; CURRENT_PROJECT_VERSION = 1; - DEVELOPMENT_TEAM = 2UDAY4J48G; + DEVELOPMENT_TEAM = NDJZK3L926; ENABLE_APP_SANDBOX = NO; ENABLE_HARDENED_RUNTIME = YES; ENABLE_OUTGOING_NETWORK_CONNECTIONS = YES; @@ -449,7 +462,7 @@ CODE_SIGN_STYLE = Automatic; COMBINE_HIDPI_IMAGES = YES; CURRENT_PROJECT_VERSION = 1; - DEVELOPMENT_TEAM = 2UDAY4J48G; + DEVELOPMENT_TEAM = NDJZK3L926; ENABLE_APP_SANDBOX = NO; ENABLE_HARDENED_RUNTIME = YES; ENABLE_OUTGOING_NETWORK_CONNECTIONS = YES; diff --git a/leanring-buddy.xcodeproj/xcuserdata/abdul-aleem.xcuserdatad/xcschemes/xcschememanagement.plist b/leanring-buddy.xcodeproj/xcuserdata/abdul-aleem.xcuserdatad/xcschemes/xcschememanagement.plist new file mode 100644 index 00000000..6c13490a --- /dev/null +++ b/leanring-buddy.xcodeproj/xcuserdata/abdul-aleem.xcuserdatad/xcschemes/xcschememanagement.plist @@ -0,0 +1,14 @@ + + + + + SchemeUserState + + leanring-buddy.xcscheme_^#shared#^_ + + orderHint + 0 + + + + diff --git a/leanring-buddy/ClickyKeyboardShortcut.swift b/leanring-buddy/ClickyKeyboardShortcut.swift new file mode 100644 index 00000000..39206405 --- /dev/null +++ b/leanring-buddy/ClickyKeyboardShortcut.swift @@ -0,0 +1,180 @@ +// +// ClickyKeyboardShortcut.swift +// leanring-buddy +// +// Persisted keyboard shortcut model for Clicky's typed command popup. +// + +import AppKit +import Foundation + +struct ClickyKeyboardShortcut: Codable, Equatable { + static let textInputUserDefaultsKey = "textInputKeyboardShortcut" + static let defaultTextInputShortcut = ClickyKeyboardShortcut( + keyCode: 40, + modifierFlags: [.option, .command], + keyDisplayText: "K" + ) + private static let supportedModifierFlags: NSEvent.ModifierFlags = [ + .control, + .option, + .shift, + .command, + .function + ] + + let keyCode: UInt16 + let modifierFlagsRawValue: UInt + let keyDisplayText: String + + var modifierFlags: NSEvent.ModifierFlags { + NSEvent.ModifierFlags(rawValue: modifierFlagsRawValue) + .intersection(.deviceIndependentFlagsMask) + .intersection(Self.supportedModifierFlags) + } + + var displayText: String { + let modifierDisplayText = Self.displayText(for: modifierFlags) + guard !modifierDisplayText.isEmpty else { return keyDisplayText } + return modifierDisplayText + keyDisplayText + } + + var validationErrorMessage: String? { + if keyDisplayText.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { + return "Choose a key with at least one modifier." + } + + if modifierFlags.isEmpty { + return "Add at least one modifier." + } + + if modifierFlags.contains(.control) && modifierFlags.contains(.option) { + return "Control+Option is reserved for voice." + } + + return nil + } + + init(keyCode: UInt16, modifierFlags: NSEvent.ModifierFlags, keyDisplayText: String) { + self.keyCode = keyCode + self.modifierFlagsRawValue = modifierFlags + .intersection(.deviceIndependentFlagsMask) + .intersection(Self.supportedModifierFlags) + .rawValue + self.keyDisplayText = keyDisplayText + } + + static func persistedTextInputShortcut() -> ClickyKeyboardShortcut { + guard let data = UserDefaults.standard.data(forKey: textInputUserDefaultsKey), + let shortcut = try? JSONDecoder().decode(ClickyKeyboardShortcut.self, from: data), + shortcut.validationErrorMessage == nil else { + return defaultTextInputShortcut + } + + return shortcut + } + + func persistAsTextInputShortcut() { + guard let data = try? JSONEncoder().encode(self) else { return } + UserDefaults.standard.set(data, forKey: Self.textInputUserDefaultsKey) + } + + func matches(keyCode eventKeyCode: UInt16, modifierFlags eventModifierFlags: NSEvent.ModifierFlags) -> Bool { + eventKeyCode == keyCode + && eventModifierFlags + .intersection(.deviceIndependentFlagsMask) + .isSuperset(of: modifierFlags) + } + + static func shortcut(from event: NSEvent) -> ClickyKeyboardShortcut? { + guard event.type == .keyDown else { return nil } + + let keyDisplayText = keyDisplayText(for: event) + guard !keyDisplayText.isEmpty else { return nil } + + return ClickyKeyboardShortcut( + keyCode: event.keyCode, + modifierFlags: event.modifierFlags.intersection(.deviceIndependentFlagsMask), + keyDisplayText: keyDisplayText + ) + } + + static func shortcut( + keyCode: UInt16, + modifierFlagsRawValue: UInt64 + ) -> ClickyKeyboardShortcut? { + guard let keyDisplayText = keyDisplayText(forKeyCode: keyCode), + !keyDisplayText.isEmpty else { + return nil + } + + return ClickyKeyboardShortcut( + keyCode: keyCode, + modifierFlags: NSEvent.ModifierFlags(rawValue: UInt(modifierFlagsRawValue)) + .intersection(.deviceIndependentFlagsMask), + keyDisplayText: keyDisplayText + ) + } + + private static func displayText(for modifierFlags: NSEvent.ModifierFlags) -> String { + var displayText = "" + + if modifierFlags.contains(.control) { + displayText += "⌃" + } + if modifierFlags.contains(.option) { + displayText += "⌥" + } + if modifierFlags.contains(.shift) { + displayText += "⇧" + } + if modifierFlags.contains(.command) { + displayText += "⌘" + } + if modifierFlags.contains(.function) { + displayText += "fn " + } + + return displayText + } + + private static func keyDisplayText(for event: NSEvent) -> String { + if let specialKey = keyDisplayText(forKeyCode: event.keyCode), !specialKey.isEmpty { + return specialKey + } + + if let charactersIgnoringModifiers = event.charactersIgnoringModifiers, + let firstCharacter = charactersIgnoringModifiers.first { + return String(firstCharacter).uppercased() + } + + return "" + } + + private static func keyDisplayText(forKeyCode keyCode: UInt16) -> String? { + switch keyCode { + case 36: + return "Return" + case 48: + return "Tab" + case 49: + return "Space" + case 51: + return "Delete" + case 53: + return "Esc" + case 76: + return "Enter" + case 123: + return "←" + case 124: + return "→" + case 125: + return "↓" + case 126: + return "↑" + default: + return nil + } + } +} diff --git a/leanring-buddy/CompanionManager.swift b/leanring-buddy/CompanionManager.swift index 0234cf19..2a0be6db 100644 --- a/leanring-buddy/CompanionManager.swift +++ b/leanring-buddy/CompanionManager.swift @@ -64,7 +64,9 @@ final class CompanionManager: ObservableObject { let buddyDictationManager = BuddyDictationManager() let globalPushToTalkShortcutMonitor = GlobalPushToTalkShortcutMonitor() + let globalTextInputShortcutMonitor = GlobalTextInputShortcutMonitor() let overlayWindowManager = OverlayWindowManager() + lazy var textCommandPopupManager = TextCommandPopupManager(companionManager: self) // Response text is now displayed inline on the cursor overlay via // streamingResponseText, so no separate response overlay manager is needed. @@ -89,10 +91,12 @@ final class CompanionManager: ObservableObject { private var currentResponseTask: Task? private var shortcutTransitionCancellable: AnyCancellable? + private var textInputShortcutCancellable: AnyCancellable? private var voiceStateCancellable: AnyCancellable? private var audioPowerCancellable: AnyCancellable? private var accessibilityCheckTimer: Timer? private var pendingKeyboardShortcutStartTask: Task? + @Published private(set) var isTypedCommandSubmissionInFlight = false /// Scheduled hide for transient cursor mode — cancelled if the user /// speaks again before the delay elapses. private var transientHideTask: Task? @@ -116,6 +120,22 @@ final class CompanionManager: ObservableObject { claudeAPI.model = model } + @Published var textInputKeyboardShortcut: ClickyKeyboardShortcut = .persistedTextInputShortcut() + + var isTextCommandSubmissionBusy: Bool { + isTypedCommandSubmissionInFlight + || buddyDictationManager.isDictationInProgress + || voiceState == .listening + || voiceState == .processing + } + + func setTextInputKeyboardShortcut(_ shortcut: ClickyKeyboardShortcut) { + guard shortcut.validationErrorMessage == nil else { return } + textInputKeyboardShortcut = shortcut + shortcut.persistAsTextInputShortcut() + globalTextInputShortcutMonitor.currentShortcut = shortcut + } + /// User preference for whether the Clicky cursor should be shown. /// When toggled off, the overlay is hidden and push-to-talk is disabled. /// Persisted to UserDefaults so the choice survives app restarts. @@ -179,6 +199,8 @@ final class CompanionManager: ObservableObject { bindVoiceStateObservation() bindAudioPowerLevel() bindShortcutTransitions() + bindTextInputShortcut() + _ = textCommandPopupManager // Eagerly touch the Claude API so its TLS warmup handshake completes // well before the onboarding demo fires at ~40s into the video. _ = claudeAPI @@ -289,13 +311,16 @@ final class CompanionManager: ObservableObject { func stop() { globalPushToTalkShortcutMonitor.stop() + globalTextInputShortcutMonitor.stop() buddyDictationManager.cancelCurrentDictation() overlayWindowManager.hideOverlay() + textCommandPopupManager.hidePopup() transientHideTask?.cancel() currentResponseTask?.cancel() currentResponseTask = nil shortcutTransitionCancellable?.cancel() + textInputShortcutCancellable?.cancel() voiceStateCancellable?.cancel() audioPowerCancellable?.cancel() accessibilityCheckTimer?.invalidate() @@ -313,8 +338,10 @@ final class CompanionManager: ObservableObject { if currentlyHasAccessibility { globalPushToTalkShortcutMonitor.start() + globalTextInputShortcutMonitor.start() } else { globalPushToTalkShortcutMonitor.stop() + globalTextInputShortcutMonitor.stop() } hasScreenRecordingPermission = WindowPositionManager.hasScreenRecordingPermission() @@ -470,6 +497,23 @@ final class CompanionManager: ObservableObject { } } + private func bindTextInputShortcut() { + textInputShortcutCancellable = globalTextInputShortcutMonitor + .shortcutTriggeredPublisher + .receive(on: DispatchQueue.main) + .sink { [weak self] in + self?.handleTextInputShortcut() + } + } + + private func handleTextInputShortcut() { + guard hasCompletedOnboarding && allPermissionsGranted else { return } + guard !showOnboardingVideo else { return } + + NotificationCenter.default.post(name: .clickyDismissPanel, object: nil) + textCommandPopupManager.showPopup() + } + private func handleShortcutTransition(_ transition: BuddyPushToTalkShortcut.ShortcutTransition) { switch transition { case .pressed: @@ -490,23 +534,15 @@ final class CompanionManager: ObservableObject { // Dismiss the menu bar panel so it doesn't cover the screen NotificationCenter.default.post(name: .clickyDismissPanel, object: nil) + textCommandPopupManager.hidePopup() // Cancel any in-progress response and TTS from a previous utterance currentResponseTask?.cancel() + isTypedCommandSubmissionInFlight = false elevenLabsTTSClient.stopPlayback() clearDetectedElementLocation() - // Dismiss the onboarding prompt if it's showing - if showOnboardingPrompt { - withAnimation(.easeOut(duration: 0.3)) { - onboardingPromptOpacity = 0.0 - } - DispatchQueue.main.asyncAfter(deadline: .now() + 0.35) { - self.showOnboardingPrompt = false - self.onboardingPromptText = "" - } - } - + dismissOnboardingPromptIfNeeded() ClickyAnalytics.trackPushToTalkStarted() @@ -539,10 +575,52 @@ final class CompanionManager: ObservableObject { } } + func submitTypedCommand(_ typedCommand: String) { + let trimmedTypedCommand = typedCommand.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmedTypedCommand.isEmpty else { return } + guard !isTextCommandSubmissionBusy else { return } + + textCommandPopupManager.hidePopup() + NotificationCenter.default.post(name: .clickyDismissPanel, object: nil) + + transientHideTask?.cancel() + transientHideTask = nil + + if !isClickyCursorEnabled && !isOverlayVisible { + overlayWindowManager.hasShownOverlayBefore = true + overlayWindowManager.showOverlay(onScreens: NSScreen.screens, companionManager: self) + isOverlayVisible = true + } + + clearDetectedElementLocation() + dismissOnboardingPromptIfNeeded() + + isTypedCommandSubmissionInFlight = true + lastTranscript = trimmedTypedCommand + print("⌨️ Companion received typed command: \(trimmedTypedCommand)") + ClickyAnalytics.trackUserMessageSent(transcript: trimmedTypedCommand) + + sendTranscriptToClaudeWithScreenshot(transcript: trimmedTypedCommand) { [weak self] in + self?.isTypedCommandSubmissionInFlight = false + } + } + + private func dismissOnboardingPromptIfNeeded() { + guard showOnboardingPrompt else { return } + + withAnimation(.easeOut(duration: 0.3)) { + onboardingPromptOpacity = 0.0 + } + DispatchQueue.main.asyncAfter(deadline: .now() + 0.35) { + self.showOnboardingPrompt = false + self.onboardingPromptText = "" + } + } + // MARK: - Companion Prompt private static let companionVoiceResponseSystemPrompt = """ - you're clicky, a friendly always-on companion that lives in the user's menu bar. the user just spoke to you via push-to-talk and you can see their screen(s). your reply will be spoken aloud via text-to-speech, so write the way you'd actually talk. this is an ongoing conversation — you remember everything they've said before. + you're clicky, a friendly always-on companion that lives in the user's menu bar. the user sent you a command by voice or text and you can see their screen(s). your reply will be spoken aloud via text-to-speech, so write the way you'd actually talk. this is an ongoing conversation — you remember everything they've said before. rules: - default to one or two sentences. be direct and dense. BUT if the user asks you to explain more, go deeper, or elaborate, then go all out — give a thorough, detailed explanation with no length limit. @@ -583,11 +661,18 @@ final class CompanionManager: ObservableObject { /// the spinner/processing state until TTS audio begins playing. /// Claude's response may include a [POINT:x,y:label] tag which triggers /// the buddy to fly to that element on screen. - private func sendTranscriptToClaudeWithScreenshot(transcript: String) { + private func sendTranscriptToClaudeWithScreenshot( + transcript: String, + onCompletion: (@MainActor () -> Void)? = nil + ) { currentResponseTask?.cancel() elevenLabsTTSClient.stopPlayback() currentResponseTask = Task { + defer { + onCompletion?() + } + // Stay in processing (spinner) state — no streaming text displayed voiceState = .processing diff --git a/leanring-buddy/CompanionPanelView.swift b/leanring-buddy/CompanionPanelView.swift index 76789b4c..83d72132 100644 --- a/leanring-buddy/CompanionPanelView.swift +++ b/leanring-buddy/CompanionPanelView.swift @@ -8,11 +8,15 @@ // import AVFoundation +import AppKit import SwiftUI struct CompanionPanelView: View { @ObservedObject var companionManager: CompanionManager @State private var emailInput: String = "" + @State private var isRecordingTextHotkey = false + @State private var textHotkeyValidationMessage: String? + @State private var textHotkeyRecorderMonitor: Any? var body: some View { VStack(alignment: .leading, spacing: 0) { @@ -31,6 +35,12 @@ struct CompanionPanelView: View { modelPickerRow .padding(.horizontal, 16) + + Spacer() + .frame(height: 10) + + textHotkeyRow + .padding(.horizontal, 16) } if !companionManager.allPermissionsGranted { @@ -79,6 +89,9 @@ struct CompanionPanelView: View { } .frame(width: 320) .background(panelBackground) + .onDisappear { + stopRecordingTextHotkey() + } } // MARK: - Header @@ -596,6 +609,108 @@ struct CompanionPanelView: View { .padding(.vertical, 4) } + // MARK: - Text Hotkey + + private var textHotkeyRow: some View { + VStack(alignment: .leading, spacing: 6) { + HStack { + Text("Text Hotkey") + .font(.system(size: 13, weight: .medium)) + .foregroundColor(DS.Colors.textSecondary) + + Spacer() + + Text(isRecordingTextHotkey ? "Press keys..." : companionManager.textInputKeyboardShortcut.displayText) + .font(.system(size: 11, weight: .semibold)) + .foregroundColor(isRecordingTextHotkey ? DS.Colors.blue400 : DS.Colors.textPrimary) + .padding(.horizontal, 8) + .padding(.vertical, 4) + .background( + RoundedRectangle(cornerRadius: 5, style: .continuous) + .fill(Color.white.opacity(0.08)) + ) + .overlay( + RoundedRectangle(cornerRadius: 5, style: .continuous) + .stroke(isRecordingTextHotkey ? DS.Colors.blue400.opacity(0.7) : DS.Colors.borderSubtle, lineWidth: 0.6) + ) + + Button(action: { + toggleTextHotkeyRecording() + }) { + Text(isRecordingTextHotkey ? "Cancel" : "Record") + .font(.system(size: 11, weight: .semibold)) + .foregroundColor(DS.Colors.textSecondary) + .padding(.horizontal, 9) + .padding(.vertical, 5) + .background( + Capsule() + .stroke(DS.Colors.borderSubtle, lineWidth: 0.8) + ) + } + .buttonStyle(.plain) + .pointerCursor() + } + + if let textHotkeyValidationMessage { + Text(textHotkeyValidationMessage) + .font(.system(size: 10)) + .foregroundColor(DS.Colors.warning) + } else { + Text("Open typed input near the cursor.") + .font(.system(size: 10)) + .foregroundColor(DS.Colors.textTertiary) + } + } + .padding(.vertical, 4) + } + + private func toggleTextHotkeyRecording() { + if isRecordingTextHotkey { + stopRecordingTextHotkey() + } else { + startRecordingTextHotkey() + } + } + + private func startRecordingTextHotkey() { + stopRecordingTextHotkey() + isRecordingTextHotkey = true + textHotkeyValidationMessage = nil + + textHotkeyRecorderMonitor = NSEvent.addLocalMonitorForEvents(matching: [.keyDown]) { event in + guard isRecordingTextHotkey else { return event } + + if event.keyCode == 53 { + stopRecordingTextHotkey() + return nil + } + + guard let shortcut = ClickyKeyboardShortcut.shortcut(from: event) else { + textHotkeyValidationMessage = "Choose a key with at least one modifier." + return nil + } + + if let validationErrorMessage = shortcut.validationErrorMessage { + textHotkeyValidationMessage = validationErrorMessage + } else { + companionManager.setTextInputKeyboardShortcut(shortcut) + textHotkeyValidationMessage = nil + stopRecordingTextHotkey() + } + + return nil + } + } + + private func stopRecordingTextHotkey() { + isRecordingTextHotkey = false + + if let textHotkeyRecorderMonitor { + NSEvent.removeMonitor(textHotkeyRecorderMonitor) + self.textHotkeyRecorderMonitor = nil + } + } + // MARK: - Model Picker private var modelPickerRow: some View { diff --git a/leanring-buddy/GlobalTextInputShortcutMonitor.swift b/leanring-buddy/GlobalTextInputShortcutMonitor.swift new file mode 100644 index 00000000..e393483e --- /dev/null +++ b/leanring-buddy/GlobalTextInputShortcutMonitor.swift @@ -0,0 +1,134 @@ +// +// GlobalTextInputShortcutMonitor.swift +// leanring-buddy +// +// Captures the typed command popup shortcut while Clicky is backgrounded. +// + +import AppKit +import Combine +import CoreGraphics +import Foundation + +final class GlobalTextInputShortcutMonitor: ObservableObject { + let shortcutTriggeredPublisher = PassthroughSubject() + + var currentShortcut: ClickyKeyboardShortcut { + didSet { + isShortcutCurrentlyPressed = false + } + } + + private var globalEventTap: CFMachPort? + private var globalEventTapRunLoopSource: CFRunLoopSource? + private var isShortcutCurrentlyPressed = false + + init(currentShortcut: ClickyKeyboardShortcut = .persistedTextInputShortcut()) { + self.currentShortcut = currentShortcut + } + + deinit { + stop() + } + + func start() { + guard globalEventTap == nil else { return } + + let monitoredEventTypes: [CGEventType] = [.flagsChanged, .keyDown, .keyUp] + let eventMask = monitoredEventTypes.reduce(CGEventMask(0)) { currentMask, eventType in + currentMask | (CGEventMask(1) << eventType.rawValue) + } + + let eventTapCallback: CGEventTapCallBack = { _, eventType, event, userInfo in + guard let userInfo else { + return Unmanaged.passUnretained(event) + } + + let globalTextInputShortcutMonitor = Unmanaged + .fromOpaque(userInfo) + .takeUnretainedValue() + + return globalTextInputShortcutMonitor.handleGlobalEventTap( + eventType: eventType, + event: event + ) + } + + guard let globalEventTap = CGEvent.tapCreate( + tap: .cgSessionEventTap, + place: .headInsertEventTap, + options: .listenOnly, + eventsOfInterest: eventMask, + callback: eventTapCallback, + userInfo: Unmanaged.passUnretained(self).toOpaque() + ) else { + print("⚠️ Global text input shortcut: couldn't create CGEvent tap") + return + } + + guard let globalEventTapRunLoopSource = CFMachPortCreateRunLoopSource( + kCFAllocatorDefault, + globalEventTap, + 0 + ) else { + CFMachPortInvalidate(globalEventTap) + print("⚠️ Global text input shortcut: couldn't create event tap run loop source") + return + } + + self.globalEventTap = globalEventTap + self.globalEventTapRunLoopSource = globalEventTapRunLoopSource + + CFRunLoopAddSource(CFRunLoopGetMain(), globalEventTapRunLoopSource, .commonModes) + CGEvent.tapEnable(tap: globalEventTap, enable: true) + } + + func stop() { + isShortcutCurrentlyPressed = false + + if let globalEventTapRunLoopSource { + CFRunLoopRemoveSource(CFRunLoopGetMain(), globalEventTapRunLoopSource, .commonModes) + self.globalEventTapRunLoopSource = nil + } + + if let globalEventTap { + CFMachPortInvalidate(globalEventTap) + self.globalEventTap = nil + } + } + + private func handleGlobalEventTap( + eventType: CGEventType, + event: CGEvent + ) -> Unmanaged? { + if eventType == .tapDisabledByTimeout || eventType == .tapDisabledByUserInput { + if let globalEventTap { + CGEvent.tapEnable(tap: globalEventTap, enable: true) + } + return Unmanaged.passUnretained(event) + } + + let eventKeyCode = UInt16(event.getIntegerValueField(.keyboardEventKeycode)) + let eventModifierFlags = NSEvent.ModifierFlags(rawValue: UInt(event.flags.rawValue)) + .intersection(.deviceIndependentFlagsMask) + let eventMatchesCurrentShortcut = currentShortcut.matches( + keyCode: eventKeyCode, + modifierFlags: eventModifierFlags + ) + + if eventType == .keyDown && eventMatchesCurrentShortcut && !isShortcutCurrentlyPressed { + isShortcutCurrentlyPressed = true + shortcutTriggeredPublisher.send(()) + } + + if eventType == .keyUp && eventKeyCode == currentShortcut.keyCode { + isShortcutCurrentlyPressed = false + } + + if eventType == .flagsChanged && !eventModifierFlags.isSuperset(of: currentShortcut.modifierFlags) { + isShortcutCurrentlyPressed = false + } + + return Unmanaged.passUnretained(event) + } +} diff --git a/leanring-buddy/TextCommandPopupManager.swift b/leanring-buddy/TextCommandPopupManager.swift new file mode 100644 index 00000000..a972faa4 --- /dev/null +++ b/leanring-buddy/TextCommandPopupManager.swift @@ -0,0 +1,342 @@ +// +// TextCommandPopupManager.swift +// leanring-buddy +// +// Manages the compact typed command popup opened by Clicky's text hotkey. +// + +import AppKit +import SwiftUI + +@MainActor +final class TextCommandPopupManager: NSObject { + private final class TextCommandPanel: NSPanel { + var onEscapeKeyPressed: (() -> Void)? + + override var canBecomeKey: Bool { true } + override var canBecomeMain: Bool { false } + + override func keyDown(with event: NSEvent) { + if event.keyCode == 53 { + onEscapeKeyPressed?() + return + } + + super.keyDown(with: event) + } + } + + private var panel: TextCommandPanel? + private var popupEventMonitors: [Any] = [] + private var cursorTrackingTimer: Timer? + + private let companionManager: CompanionManager + private let popupWidth: CGFloat = 360 + private let popupHeight: CGFloat = 62 + + init(companionManager: CompanionManager) { + self.companionManager = companionManager + super.init() + createPanelIfNeeded() + } + + deinit { + for popupEventMonitor in popupEventMonitors { + NSEvent.removeMonitor(popupEventMonitor) + } + } + + func showPopup() { + createPanelIfNeeded() + refreshPanelContent() + positionPopupNearCursor() + + panel?.makeKeyAndOrderFront(nil) + panel?.orderFrontRegardless() + installPopupEventMonitors() + startTrackingCursor() + } + + func hidePopup() { + stopTrackingCursor() + panel?.orderOut(nil) + removePopupEventMonitors() + } + + private func createPanelIfNeeded() { + guard panel == nil else { return } + + let textCommandPanel = TextCommandPanel( + contentRect: NSRect(x: 0, y: 0, width: popupWidth, height: popupHeight), + styleMask: [.borderless, .nonactivatingPanel], + backing: .buffered, + defer: false + ) + + textCommandPanel.onEscapeKeyPressed = { [weak self] in + self?.hidePopup() + } + textCommandPanel.isFloatingPanel = true + textCommandPanel.level = .floating + textCommandPanel.isOpaque = false + textCommandPanel.backgroundColor = .clear + textCommandPanel.hasShadow = false + textCommandPanel.hidesOnDeactivate = false + textCommandPanel.isExcludedFromWindowsMenu = true + textCommandPanel.collectionBehavior = [.canJoinAllSpaces, .fullScreenAuxiliary] + textCommandPanel.isMovableByWindowBackground = false + textCommandPanel.titleVisibility = .hidden + textCommandPanel.titlebarAppearsTransparent = true + + panel = textCommandPanel + refreshPanelContent() + } + + private func refreshPanelContent() { + let textCommandPopupView = TextCommandPopupView(companionManager: companionManager) + .frame(width: popupWidth) + + let hostingView = NSHostingView(rootView: textCommandPopupView) + hostingView.frame = NSRect(x: 0, y: 0, width: popupWidth, height: popupHeight) + hostingView.wantsLayer = true + hostingView.layer?.backgroundColor = .clear + + panel?.contentView = hostingView + } + + private func positionPopupNearCursor() { + guard let panel else { return } + + let cursorLocation = NSEvent.mouseLocation + let screenContainingCursor = NSScreen.screens.first { screen in + screen.frame.contains(cursorLocation) + } ?? NSScreen.main + + guard let targetScreenFrame = screenContainingCursor?.visibleFrame else { return } + + let preferredOffset = CGPoint(x: 22, y: -18) + let preferredOrigin = CGPoint( + x: cursorLocation.x + preferredOffset.x, + y: cursorLocation.y - popupHeight + preferredOffset.y + ) + + let clampedOrigin = CGPoint( + x: min(max(preferredOrigin.x, targetScreenFrame.minX + 10), targetScreenFrame.maxX - popupWidth - 10), + y: min(max(preferredOrigin.y, targetScreenFrame.minY + 10), targetScreenFrame.maxY - popupHeight - 10) + ) + + panel.setFrame( + NSRect(x: clampedOrigin.x, y: clampedOrigin.y, width: popupWidth, height: popupHeight), + display: true + ) + } + + private func startTrackingCursor() { + stopTrackingCursor() + + cursorTrackingTimer = Timer.scheduledTimer(withTimeInterval: 1.0 / 30.0, repeats: true) { [weak self] _ in + Task { @MainActor [weak self] in + guard let self, self.panel?.isVisible == true else { return } + self.positionPopupNearCursor() + } + } + } + + private func stopTrackingCursor() { + cursorTrackingTimer?.invalidate() + cursorTrackingTimer = nil + } + + private func installPopupEventMonitors() { + removePopupEventMonitors() + + if let localEscapeKeyMonitor = NSEvent.addLocalMonitorForEvents( + matching: [.keyDown] + ) { [weak self] event in + if event.keyCode == 53 { + self?.hidePopup() + return nil + } + + return event + } { + popupEventMonitors.append(localEscapeKeyMonitor) + } + + if let globalClickOutsideMonitor = NSEvent.addGlobalMonitorForEvents( + matching: [.leftMouseDown, .rightMouseDown] + ) { [weak self] _ in + self?.hidePopupIfNeeded(forClickAt: NSEvent.mouseLocation) + } { + popupEventMonitors.append(globalClickOutsideMonitor) + } + + if let localClickOutsideMonitor = NSEvent.addLocalMonitorForEvents( + matching: [.leftMouseDown, .rightMouseDown] + ) { [weak self] event in + self?.hidePopupIfNeeded(forClickAt: NSEvent.mouseLocation) + return event + } { + popupEventMonitors.append(localClickOutsideMonitor) + } + } + + private func hidePopupIfNeeded(forClickAt clickLocation: CGPoint) { + guard let panel else { return } + guard !panel.frame.contains(clickLocation) else { return } + + DispatchQueue.main.async { + self.hidePopup() + } + } + + private func removePopupEventMonitors() { + for popupEventMonitor in popupEventMonitors { + NSEvent.removeMonitor(popupEventMonitor) + } + popupEventMonitors.removeAll() + } +} + +private struct TextCommandPopupView: View { + @ObservedObject var companionManager: CompanionManager + @State private var typedCommandText = "" + + private var trimmedTypedCommandText: String { + typedCommandText.trimmingCharacters(in: .whitespacesAndNewlines) + } + + private var canSubmitTypedCommand: Bool { + !trimmedTypedCommandText.isEmpty && !companionManager.isTextCommandSubmissionBusy + } + + var body: some View { + HStack(spacing: 10) { + Image(systemName: companionManager.isTextCommandSubmissionBusy ? "hourglass" : "bubble.left") + .font(.system(size: 15, weight: .semibold)) + .foregroundColor(DS.Colors.blue300) + .frame(width: 18) + + PopupTextField( + text: $typedCommandText, + placeholder: companionManager.isTextCommandSubmissionBusy ? "Clicky is busy..." : "Ask Clicky...", + isEnabled: !companionManager.isTextCommandSubmissionBusy, + onSubmit: { + submitTypedCommandIfPossible() + } + ) + .frame(height: 22) + .overlay(IBeamCursorView()) + } + .padding(.horizontal, 16) + .padding(.vertical, 11) + .background( + RoundedRectangle(cornerRadius: 18, style: .continuous) + .fill(DS.Colors.background.opacity(0.94)) + .shadow(color: Color.black.opacity(0.48), radius: 18, x: 0, y: 9) + .shadow(color: DS.Colors.blue500.opacity(0.10), radius: 12, x: 0, y: 0) + ) + .overlay( + RoundedRectangle(cornerRadius: 18, style: .continuous) + .stroke( + LinearGradient( + colors: [ + Color.white.opacity(0.22), + DS.Colors.blue400.opacity(0.18), + Color.white.opacity(0.08) + ], + startPoint: .topLeading, + endPoint: .bottomTrailing + ), + lineWidth: 0.9 + ) + ) + .padding(5) + .onAppear { + typedCommandText = "" + } + } + + private func submitTypedCommandIfPossible() { + guard canSubmitTypedCommand else { return } + companionManager.submitTypedCommand(trimmedTypedCommandText) + } +} + +private struct PopupTextField: NSViewRepresentable { + @Binding var text: String + let placeholder: String + let isEnabled: Bool + let onSubmit: () -> Void + + func makeNSView(context: Context) -> NSTextField { + let textField = NSTextField() + textField.delegate = context.coordinator + textField.target = context.coordinator + textField.action = #selector(Coordinator.submitTextField) + textField.isBezeled = false + textField.isBordered = false + textField.drawsBackground = false + textField.backgroundColor = .clear + textField.focusRingType = .none + textField.lineBreakMode = .byTruncatingTail + textField.usesSingleLineMode = true + textField.cell?.wraps = false + textField.cell?.isScrollable = true + textField.font = .systemFont(ofSize: 15, weight: .medium) + textField.textColor = NSColor(calibratedRed: 0.925, green: 0.933, blue: 0.929, alpha: 1.0) + + DispatchQueue.main.async { + textField.window?.makeFirstResponder(textField) + } + + return textField + } + + func updateNSView(_ textField: NSTextField, context: Context) { + context.coordinator.text = $text + context.coordinator.onSubmit = onSubmit + + if textField.stringValue != text { + textField.stringValue = text + } + + textField.placeholderAttributedString = NSAttributedString( + string: placeholder, + attributes: [ + .foregroundColor: NSColor(calibratedRed: 0.671, green: 0.710, blue: 0.698, alpha: 1.0), + .font: NSFont.systemFont(ofSize: 15, weight: .medium) + ] + ) + textField.isEnabled = isEnabled + + if isEnabled && textField.window?.firstResponder !== textField.currentEditor() { + DispatchQueue.main.async { + textField.window?.makeFirstResponder(textField) + } + } + } + + func makeCoordinator() -> Coordinator { + Coordinator(text: $text, onSubmit: onSubmit) + } + + final class Coordinator: NSObject, NSTextFieldDelegate { + var text: Binding + var onSubmit: () -> Void + + init(text: Binding, onSubmit: @escaping () -> Void) { + self.text = text + self.onSubmit = onSubmit + } + + func controlTextDidChange(_ notification: Notification) { + guard let textField = notification.object as? NSTextField else { return } + text.wrappedValue = textField.stringValue + } + + @objc func submitTextField() { + onSubmit() + } + } +} diff --git a/leanring-buddyTests/leanring_buddyTests.swift b/leanring-buddyTests/leanring_buddyTests.swift index 188fe7ae..d488c25a 100644 --- a/leanring-buddyTests/leanring_buddyTests.swift +++ b/leanring-buddyTests/leanring_buddyTests.swift @@ -6,6 +6,7 @@ // import Testing +import AppKit @testable import leanring_buddy struct leanring_buddyTests { @@ -37,4 +38,24 @@ struct leanring_buddyTests { #expect(shouldTreatPermissionAsGranted) } + @Test func defaultTextInputShortcutUsesOptionCommandK() async throws { + let shortcut = ClickyKeyboardShortcut.defaultTextInputShortcut + + #expect(shortcut.keyCode == 40) + #expect(shortcut.modifierFlags.contains(.option)) + #expect(shortcut.modifierFlags.contains(.command)) + #expect(shortcut.displayText == "⌥⌘K") + #expect(shortcut.validationErrorMessage == nil) + } + + @Test func textInputShortcutRejectsVoiceShortcutConflict() async throws { + let shortcut = ClickyKeyboardShortcut( + keyCode: 49, + modifierFlags: [.control, .option], + keyDisplayText: "Space" + ) + + #expect(shortcut.validationErrorMessage == "Control+Option is reserved for voice.") + } + }