forked from ethanplusai/jarvis
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathserver.py
More file actions
2601 lines (2223 loc) · 116 KB
/
server.py
File metadata and controls
2601 lines (2223 loc) · 116 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""
JARVIS Server — Voice AI + Development Orchestration
Handles:
1. WebSocket voice interface (browser audio <-> LLM <-> TTS)
2. Claude Code task manager (spawn/manage claude -p subprocesses)
3. Project awareness (scan Desktop for git repos)
4. REST API for task management
"""
import asyncio
import base64
import json
import logging
import os
import sys
import time
from pathlib import Path
# Load .env file if present
_env_path = Path(__file__).parent / ".env"
if _env_path.exists():
for _line in _env_path.read_text().splitlines():
_line = _line.strip()
if _line and not _line.startswith("#") and "=" in _line:
_k, _, _v = _line.partition("=")
os.environ.setdefault(_k.strip(), _v.strip().strip('"').strip("'"))
import uuid
from contextlib import asynccontextmanager
from dataclasses import dataclass, field, asdict
from datetime import datetime
from pathlib import Path
from typing import Optional
import anthropic
import httpx
from fastapi import FastAPI, WebSocket, WebSocketDisconnect
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from actions import execute_action, monitor_build, open_terminal, open_browser, open_claude_in_project, _generate_project_name, prompt_existing_terminal
from work_mode import WorkSession, is_casual_question
from screen import get_active_windows, take_screenshot, describe_screen, format_windows_for_context
from calendar_access import get_todays_events, get_upcoming_events, get_next_event, format_events_for_context, format_schedule_summary, refresh_cache as refresh_calendar_cache
from mail_access import get_unread_count, get_unread_messages, get_recent_messages, search_mail, read_message, format_unread_summary, format_messages_for_context, format_messages_for_voice
from memory import (
remember, recall, get_open_tasks, create_task, complete_task, search_tasks,
create_note, search_notes, get_tasks_for_date, build_memory_context,
format_tasks_for_voice, extract_memories, get_important_memories,
)
from notes_access import get_recent_notes, read_note, search_notes_apple, create_apple_note
from dispatch_registry import DispatchRegistry
from planner import TaskPlanner, detect_planning_mode, BYPASS_PHRASES
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(name)s] %(message)s")
log = logging.getLogger("jarvis")
# ---------------------------------------------------------------------------
# Config
# ---------------------------------------------------------------------------
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY", "")
FISH_API_KEY = os.getenv("FISH_API_KEY", "")
FISH_VOICE_ID = os.getenv("FISH_VOICE_ID", "612b878b113047d9a770c069c8b4fdfe") # JARVIS (MCU)
FISH_API_URL = "https://api.fish.audio/v1/tts"
USER_NAME = os.getenv("USER_NAME", "sir")
PROJECT_DIR = os.path.dirname(os.path.abspath(__file__))
DESKTOP_PATH = Path.home() / "Desktop"
JARVIS_SYSTEM_PROMPT = """\
You are JARVIS — Just A Rather Very Intelligent System. You serve as {user_name}'s AI assistant, modeled precisely after Tony Stark's AI from the MCU films.
VOICE & PERSONALITY:
- British butler elegance with understated dry wit
- Address {user_name} as "sir" naturally — not every sentence, but regularly
- Never say "How can I help you?" or "Is there anything else?" — just act
- Deliver bad news calmly, like reporting weather: "We have a slight problem, sir."
- Your humor is observational, never jokes: state facts and let implications land
- Economy of language — say more with less. No filler, no corporate-speak
- When things go wrong, get CALMER, not more alarmed
TIME & WEATHER AWARENESS:
- Current time: {current_time}
- Greet accordingly: "Good morning, sir" / "Good evening, sir"
- {weather_info}
CONVERSATION STYLE:
- "Will do, sir." — acknowledging tasks
- "For you, sir, always." — when asked for something significant
- "As always, sir, a great pleasure watching you work." — dry wit
- "I've taken the liberty of..." — proactive actions
- Lead status reports with data: numbers first, then context
- When you don't know something: "I'm afraid I don't have that information, sir" not "I don't know"
SELF-AWARENESS:
You ARE the JARVIS project at {project_dir} on {user_name}'s computer. Your code is Python (FastAPI server, WebSocket voice, Fish Audio TTS, Anthropic API). You were built by {user_name}. If asked about yourself, your code, how you work, or your line count — use [ACTION:PROMPT_PROJECT] to check the jarvis project. You have full access to your own source code.
YOUR CAPABILITIES (these are REAL and ACTIVE — you CAN do all of these RIGHT NOW):
- You CAN open Terminal.app via AppleScript
- You CAN open Google Chrome and browse any URL or search query
- You CAN spawn Claude Code in a Terminal window for coding tasks
- You CAN create project folders on the Desktop
- You CAN check Desktop projects and their git status
- You CAN plan complex tasks by asking smart questions before executing
- You CAN see what's on {user_name}'s screen — open windows, active apps, and screenshot vision
- You CAN read {user_name}'s calendar — today's events, upcoming meetings, schedule overview
- You CAN read {user_name}'s email (READ-ONLY) — unread count, recent messages, search by sender/subject. You CANNOT send, delete, or modify emails.
- You CAN read Apple Notes and create NEW notes — but you CANNOT edit or delete existing notes
- You CAN manage tasks — create, complete, and list to-do items with priorities and due dates
- You CAN help plan {user_name}'s day — combine calendar events, tasks, and priorities into an organized plan
- You CAN remember facts about {user_name} — preferences, decisions, goals. Use [ACTION:REMEMBER] to store important info.
DAY PLANNING:
When {user_name} asks to plan his day or schedule, DO NOT dispatch to a project. Instead:
1. Look at the calendar context and tasks already in your system prompt
2. Ask what his priorities are
3. Help organize by suggesting time blocks and task order
4. Use [ACTION:ADD_TASK] to create tasks he agrees to
5. Use [ACTION:ADD_NOTE] to save the plan as a note
Keep the planning conversational — don't try to do everything in one response.
BUILD PLANNING:
When {user_name} wants to BUILD something new:
- Do NOT immediately dispatch [ACTION:BUILD]. Ask 1-2 quick questions FIRST to nail down specifics.
- Good questions: "What should this look like?" / "Any specific features?" / "Which framework?"
- If he says "just build it" or "figure it out" — skip questions, use React + Tailwind as defaults.
- Once you have enough info, confirm the plan in ONE sentence and THEN dispatch [ACTION:BUILD] with a detailed description.
- The DISPATCHES section shows what you're currently building and what finished recently.
- When asked "where are we at" or "status" — check DISPATCHES, don't re-dispatch.
- NEVER hallucinate progress. If the build is still running, say "Still working on it, sir" — don't make up details about what's happening.
- NEVER guess localhost ports. Check the DISPATCHES section for the actual URL. If a dispatch says "Running at http://localhost:5174" — use THAT URL, not a guess.
- When asked to "pull it up" or "show me" — use [ACTION:BROWSE] with the URL from DISPATCHES. Do NOT dispatch to the project again just to find the URL.
IMPORTANT: Actions like opening Terminal, Chrome, or building projects are handled AUTOMATICALLY by your system — you do NOT need to describe doing them. If the user asks you to build something or search something, your system will handle the execution separately. In your response, just TALK — have a conversation. Don't say "I'll build that now" or "Claude Code is working on..." unless your system has actually triggered the action.
If the user asks you to do something you genuinely can't do, say "I'm afraid that's beyond my current reach, sir." Don't fake executing actions.
YOUR INTERFACE:
The user interacts with you through a web browser showing a particle orb visualization that reacts to your voice. The interface has these controls:
- **Three-dot menu** (top right): contains Settings, Restart Server, and Fix Yourself options
- **Settings panel**: Opens from the menu. Users can enter API keys (Anthropic, Fish Audio), test connections, set their name and preferences, and see system status (calendar, mail, notes connectivity). Keys are saved to the .env file.
- **Mute button**: Toggles your listening on/off. When muted, you can't hear the user. They click it again to unmute.
- **Restart Server**: Restarts your backend process. Useful if something seems stuck.
- **Fix Yourself**: Opens Claude Code in your own project directory so you can debug and fix issues in your own code.
- **The orb**: The glowing particle visualization in the center. It reacts to your voice when speaking, pulses when listening, and swirls when thinking.
If asked about any of these, explain them briefly and naturally. If the user is having trouble, suggest the relevant control: "Try the settings panel — the gear icon in the top right." or "The mute button may be active, sir."
SPEECH-TO-TEXT CORRECTIONS (the user speaks, speech recognition may mishear):
- "Cloud code" or "cloud" = "Claude Code" or "Claude"
- "Travis" = "JARVIS"
- "clock code" = "Claude Code"
RESPONSE LENGTH — THIS IS CRITICAL:
ONE sentence is ideal. TWO is the maximum for the spoken part. Never three.
No markdown, no bullet points, no code blocks in voice responses.
Action tags at the end do NOT count toward your sentence limit.
BANNED PHRASES — NEVER USE THESE:
- "Absolutely" / "Absolutely right"
- "Great question"
- "I'd be happy to"
- "Of course"
- "How can I help"
- "Is there anything else"
- "I apologize"
- "I should clarify"
- "I cannot" (for things listed in YOUR CAPABILITIES)
- "I don't have access to" (instead: "I'm afraid that's beyond my current reach, sir")
- "As an AI" (never break character)
- "Let me know if" / "Feel free to"
- Any sentence starting with "I"
INSTEAD SAY:
- "Will do, sir."
- "Right away, sir."
- "Understood."
- "Consider it done."
- "Done, sir."
- "Terminal is open."
- "Pulled that up in Chrome."
ACTION SYSTEM:
When you decide the user needs something DONE (not just discussed), include an action tag in your response:
- [ACTION:SCREEN] — capture and describe what's visible on the user's screen. Use when user says "look at my screen", "what's running", "what do you see", etc. Do NOT use PROMPT_PROJECT for screen requests.
- [ACTION:BUILD] description — when user wants a project built. Claude Code does the work.
- [ACTION:BROWSE] url or search query — when user wants to see a webpage or search result in Chrome
- [ACTION:RESEARCH] detailed research brief — when user wants real research with real data. Claude Code will browse the web, find real listings/data, and create a report document. Give it a detailed brief of what to find.
- [ACTION:OPEN_TERMINAL] — when user just wants a fresh Claude Code terminal with no specific project
CRITICAL: When the user asks about their SCREEN, what's RUNNING, or what they're LOOKING AT — ALWAYS use [ACTION:SCREEN] or let the fast action system handle it. NEVER use [ACTION:PROMPT_PROJECT] for screen requests. PROMPT_PROJECT is ONLY for working on code projects.
- [ACTION:PROMPT_PROJECT] project_name ||| prompt — THIS IS YOUR MOST POWERFUL ACTION. Use it whenever the user wants to work on, jump into, resume, check on, or interact with ANY existing project. You connect directly to Claude Code in that project and can read its response. Craft a clear prompt based on what the user wants. Examples:
"jump into client engine" → [ACTION:PROMPT_PROJECT] The Client Engine ||| What is the current state of this project? Summarize what was being worked on most recently.
"check for improvements on my-app" → [ACTION:PROMPT_PROJECT] my-app ||| Review the project and identify improvements we should make.
"resume where we left off on harvey" → [ACTION:PROMPT_PROJECT] harvey ||| Summarize what was being worked on most recently and what we should focus on next.
- [ACTION:ADD_TASK] priority ||| title ||| description ||| due_date — create a task. Priority: high/medium/low. Due date: YYYY-MM-DD or empty.
"remind me to call the client tomorrow" → [ACTION:ADD_TASK] medium ||| Call the client ||| Follow up on proposal ||| 2026-03-20
- [ACTION:ADD_NOTE] topic ||| content — save a note for future reference.
"note that the API key expires in April" → [ACTION:ADD_NOTE] general ||| API key expires in April, need to renew before then
- [ACTION:COMPLETE_TASK] task_id — mark a task as done.
- [ACTION:REMEMBER] content — store an important fact about the user for future context.
"I prefer React over Vue" → [ACTION:REMEMBER] User prefers React over Vue for frontend projects
- [ACTION:CREATE_NOTE] title ||| body — create a new Apple Note. For saving plans, ideas, lists.
"save that as a note" → [ACTION:CREATE_NOTE] Day Plan March 19 ||| Morning: client calls. Afternoon: TikTok dashboard. Evening: JARVIS improvements.
- [ACTION:READ_NOTE] title search — read an existing Apple Note by title keyword.
You use Claude Code as your tool to build, research, and write code — but YOU are the one doing the work. Never say "Claude Code did X" or "Claude Code is asking" — say "I built X", "I'm checking on that", "I found X". You ARE the intelligence. Claude Code is just your hands.
IMPORTANT: When the user says "jump into X", "work on X", "check on X", "resume X", "go back to X" — ALWAYS use [ACTION:PROMPT_PROJECT]. You have the ability to connect to any project and work on it directly. DO NOT say you can't see terminal history or don't have access — you DO.
Place the tag at the END of your spoken response. Example:
"Right away, sir — connecting to The Client Engine now. [ACTION:PROMPT_PROJECT] The Client Engine ||| Review the current state and what was being worked on. What should we focus on next?"
IMPORTANT:
- Do NOT use action tags for casual conversation
- Do NOT use action tags if the user is still explaining (ask questions first)
- Do NOT use [ACTION:BROWSE] just because someone mentions a URL in conversation
- When in doubt, just TALK — you can always act later
SCREEN AWARENESS:
{screen_context}
SCHEDULE:
{calendar_context}
EMAIL:
{mail_context}
ACTIVE TASKS:
{active_tasks}
DISPATCHES:
If the DISPATCHES section shows a recent completed result for a project, DO NOT dispatch again. Use the existing result. Only re-dispatch if the user explicitly asks for a FRESH review or NEW information.
{dispatch_context}
KNOWN PROJECTS:
{known_projects}
"""
# ---------------------------------------------------------------------------
# Weather (wttr.in)
# ---------------------------------------------------------------------------
_cached_weather: Optional[str] = None
_weather_fetched: bool = False
async def fetch_weather() -> str:
"""Fetch current weather from wttr.in. Cached for the session."""
global _cached_weather, _weather_fetched
if _weather_fetched:
return _cached_weather or "Weather data unavailable."
_weather_fetched = True
try:
async with httpx.AsyncClient(timeout=5.0) as http:
resp = await http.get("https://wttr.in/?format=%l:+%C,+%t", headers={"User-Agent": "curl"})
if resp.status_code == 200:
_cached_weather = resp.text.strip()
return _cached_weather
except Exception as e:
log.warning(f"Weather fetch failed: {e}")
_cached_weather = None
return "Weather data unavailable."
# ---------------------------------------------------------------------------
# Data Models
# ---------------------------------------------------------------------------
@dataclass
class ClaudeTask:
id: str
prompt: str
status: str = "pending" # pending, running, completed, failed, cancelled
working_dir: str = "."
pid: Optional[int] = None
result: str = ""
error: str = ""
started_at: Optional[datetime] = None
completed_at: Optional[datetime] = None
def to_dict(self) -> dict:
d = asdict(self)
d["started_at"] = self.started_at.isoformat() if self.started_at else None
d["completed_at"] = self.completed_at.isoformat() if self.completed_at else None
d["elapsed_seconds"] = self.elapsed_seconds
return d
@property
def elapsed_seconds(self) -> float:
if not self.started_at:
return 0
end = self.completed_at or datetime.now()
return (end - self.started_at).total_seconds()
class TaskRequest(BaseModel):
prompt: str
working_dir: str = "."
# ---------------------------------------------------------------------------
# Claude Task Manager
# ---------------------------------------------------------------------------
class ClaudeTaskManager:
"""Manages background claude -p subprocesses."""
def __init__(self, max_concurrent: int = 3):
self._tasks: dict[str, ClaudeTask] = {}
self._max_concurrent = max_concurrent
self._processes: dict[str, asyncio.subprocess.Process] = {}
self._websockets: list[WebSocket] = [] # for push notifications
def register_websocket(self, ws: WebSocket):
if ws not in self._websockets:
self._websockets.append(ws)
def unregister_websocket(self, ws: WebSocket):
if ws in self._websockets:
self._websockets.remove(ws)
async def _notify(self, message: dict):
"""Push a message to all connected WebSocket clients."""
dead = []
for ws in self._websockets:
try:
await ws.send_json(message)
except Exception:
dead.append(ws)
for ws in dead:
self._websockets.remove(ws)
async def spawn(self, prompt: str, working_dir: str = ".") -> str:
"""Spawn a claude -p subprocess. Returns task_id. Non-blocking."""
active = await self.get_active_count()
if active >= self._max_concurrent:
raise RuntimeError(
f"Max concurrent tasks ({self._max_concurrent}) reached. "
f"Wait for a task to complete or cancel one."
)
task_id = str(uuid.uuid4())[:8]
task = ClaudeTask(
id=task_id,
prompt=prompt,
working_dir=working_dir,
status="pending",
)
self._tasks[task_id] = task
# Fire and forget — the background coroutine updates the task
asyncio.create_task(self._run_task(task))
log.info(f"Spawned task {task_id}: {prompt[:80]}...")
await self._notify({
"type": "task_spawned",
"task_id": task_id,
"prompt": prompt,
})
return task_id
def _generate_project_name(self, prompt: str) -> str:
"""Generate a kebab-case project folder name from the prompt."""
import re
# Extract key words
words = re.sub(r'[^a-zA-Z0-9\s]', '', prompt.lower()).split()
# Take first 3-4 meaningful words
skip = {"a", "the", "an", "me", "build", "create", "make", "for", "with", "and", "to", "of"}
meaningful = [w for w in words if w not in skip][:4]
name = "-".join(meaningful) if meaningful else "jarvis-project"
return name
async def _run_task(self, task: ClaudeTask):
"""Open a Terminal window and run claude code visibly."""
task.status = "running"
task.started_at = datetime.now()
# Create project directory if it doesn't exist
work_dir = task.working_dir
if work_dir == "." or not work_dir:
# Create a new project folder on Desktop
project_name = self._generate_project_name(task.prompt)
work_dir = str(Path.home() / "Desktop" / project_name)
os.makedirs(work_dir, exist_ok=True)
task.working_dir = work_dir
# Write the prompt to a temp file so we can pipe it to claude
prompt_file = Path(work_dir) / ".jarvis_prompt.md"
prompt_file.write_text(task.prompt)
# Open Terminal.app with claude running in the project directory
applescript = f'''
tell application "Terminal"
activate
set newTab to do script "cd {work_dir} && cat .jarvis_prompt.md | claude -p --dangerously-skip-permissions | tee .jarvis_output.txt; echo '\\n--- JARVIS TASK COMPLETE ---'"
end tell
'''
process = await asyncio.create_subprocess_exec(
"osascript", "-e", applescript,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
await process.communicate()
task.pid = process.pid
# Monitor the output file for completion
output_file = Path(work_dir) / ".jarvis_output.txt"
start = time.time()
timeout = 600 # 10 minutes
while time.time() - start < timeout:
await asyncio.sleep(5)
if output_file.exists():
content = output_file.read_text()
if "--- JARVIS TASK COMPLETE ---" in content or len(content) > 100:
task.result = content.replace("--- JARVIS TASK COMPLETE ---", "").strip()
task.status = "completed"
break
else:
task.status = "timed_out"
task.error = f"Task timed out after {timeout}s"
task.completed_at = datetime.now()
# Notify via WebSocket
await self._notify({
"type": "task_complete",
"task_id": task.id,
"status": task.status,
"summary": task.result[:200] if task.result else task.error,
})
# Clean up prompt file
try:
prompt_file.unlink()
except:
pass
# Auto-QA on completed tasks
if task.status == "completed":
asyncio.create_task(self._run_qa(task))
async def _run_qa(self, task: ClaudeTask, attempt: int = 1):
"""Run QA verification on a completed task, auto-retry on failure."""
try:
qa_result = await qa_agent.verify(task.prompt, task.result, task.working_dir)
duration = task.elapsed_seconds
if qa_result.passed:
log.info(f"Task {task.id} passed QA: {qa_result.summary}")
success_tracker.log_task("dev", task.prompt, True, attempt - 1, duration)
await self._notify({
"type": "qa_result",
"task_id": task.id,
"passed": True,
"summary": qa_result.summary,
})
# Proactive suggestion after successful task
suggestion = suggest_followup(
task_type="dev",
task_description=task.prompt,
working_dir=task.working_dir,
qa_result=qa_result,
)
if suggestion:
success_tracker.log_suggestion(task.id, suggestion.text)
await self._notify({
"type": "suggestion",
"task_id": task.id,
"text": suggestion.text,
"action_type": suggestion.action_type,
"action_details": suggestion.action_details,
})
else:
log.warning(f"Task {task.id} failed QA: {qa_result.issues}")
if attempt < 3:
log.info(f"Auto-retrying task {task.id} (attempt {attempt + 1}/3)")
retry_result = await qa_agent.auto_retry(
task.prompt, qa_result.issues, task.working_dir, attempt,
)
if retry_result["status"] == "completed":
task.result = retry_result["result"]
# Re-verify
await self._run_qa(task, attempt + 1)
else:
success_tracker.log_task("dev", task.prompt, False, attempt, duration)
await self._notify({
"type": "qa_result",
"task_id": task.id,
"passed": False,
"summary": f"Failed after {attempt + 1} attempts: {qa_result.issues}",
})
else:
success_tracker.log_task("dev", task.prompt, False, attempt, duration)
await self._notify({
"type": "qa_result",
"task_id": task.id,
"passed": False,
"summary": f"Failed QA after {attempt} attempts: {qa_result.issues}",
})
except Exception as e:
log.error(f"QA error for task {task.id}: {e}")
async def get_status(self, task_id: str) -> Optional[ClaudeTask]:
return self._tasks.get(task_id)
async def list_tasks(self) -> list[ClaudeTask]:
return list(self._tasks.values())
async def get_active_count(self) -> int:
return sum(1 for t in self._tasks.values() if t.status in ("pending", "running"))
async def cancel(self, task_id: str) -> bool:
task = self._tasks.get(task_id)
if not task or task.status not in ("pending", "running"):
return False
process = self._processes.get(task_id)
if process:
try:
process.terminate()
try:
await asyncio.wait_for(process.wait(), timeout=5.0)
except asyncio.TimeoutError:
process.kill()
except ProcessLookupError:
pass
task.status = "cancelled"
task.completed_at = datetime.now()
self._processes.pop(task_id, None)
log.info(f"Cancelled task {task_id}")
return True
def get_active_tasks_summary(self) -> str:
"""Format active tasks for injection into the system prompt."""
active = [t for t in self._tasks.values() if t.status in ("pending", "running")]
completed_recent = [
t for t in self._tasks.values()
if t.status == "completed"
and t.completed_at
and (datetime.now() - t.completed_at).total_seconds() < 300
]
if not active and not completed_recent:
return "No active or recent tasks."
lines = []
for t in active:
elapsed = f"{t.elapsed_seconds:.0f}s" if t.started_at else "queued"
lines.append(f"- [{t.id}] RUNNING ({elapsed}): {t.prompt[:100]}")
for t in completed_recent:
lines.append(f"- [{t.id}] COMPLETED: {t.prompt[:60]} -> {t.result[:80]}")
return "\n".join(lines)
# ---------------------------------------------------------------------------
# Project Scanner
# ---------------------------------------------------------------------------
async def scan_projects() -> list[dict]:
"""Quick scan of ~/Desktop for git repos (depth 1)."""
projects = []
desktop = DESKTOP_PATH
if not desktop.exists():
return projects
try:
for entry in sorted(desktop.iterdir()):
if not entry.is_dir() or entry.name.startswith("."):
continue
git_dir = entry / ".git"
if git_dir.exists():
branch = "unknown"
head_file = git_dir / "HEAD"
try:
head_content = head_file.read_text().strip()
if head_content.startswith("ref: refs/heads/"):
branch = head_content.replace("ref: refs/heads/", "")
except Exception:
pass
projects.append({
"name": entry.name,
"path": str(entry),
"branch": branch,
})
except PermissionError:
pass
return projects
def format_projects_for_prompt(projects: list[dict]) -> str:
if not projects:
return "No projects found on Desktop."
lines = []
for p in projects:
lines.append(f"- {p['name']} ({p['branch']}) @ {p['path']}")
return "\n".join(lines)
# ---------------------------------------------------------------------------
# Speech-to-Text Corrections
# ---------------------------------------------------------------------------
STT_CORRECTIONS = {
r"\bcloud code\b": "Claude Code",
r"\bclock code\b": "Claude Code",
r"\bquad code\b": "Claude Code",
r"\bclawed code\b": "Claude Code",
r"\bclod code\b": "Claude Code",
r"\bcloud\b": "Claude",
r"\bquad\b": "Claude",
r"\btravis\b": "JARVIS",
r"\bjarves\b": "JARVIS",
}
def apply_speech_corrections(text: str) -> str:
"""Fix common speech-to-text errors before processing."""
import re as _stt_re
result = text
for pattern, replacement in STT_CORRECTIONS.items():
result = _stt_re.sub(pattern, replacement, result, flags=_stt_re.IGNORECASE)
return result
# ---------------------------------------------------------------------------
# LLM Intent Classifier (replaces keyword-based action detection)
# ---------------------------------------------------------------------------
async def classify_intent(text: str, client: anthropic.AsyncAnthropic) -> dict:
"""Classify every user message using Haiku LLM.
Returns: {"action": "open_terminal|browse|build|chat", "target": "description"}
"""
try:
response = await client.messages.create(
model="claude-haiku-4-5-20251001",
max_tokens=100,
system=(
"Classify this voice command. The user is talking to JARVIS, an AI assistant that can:\n"
"- Open Terminal and run Claude Code (coding AI tool)\n"
"- Open Chrome browser for web searches and URLs\n"
"- Build software projects via Claude Code in Terminal\n"
"- Research topics by opening Chrome search\n\n"
"Note: speech-to-text may produce errors like \"Cloud\" for \"Claude\", "
"\"Travis\" for \"JARVIS\", \"clock code\" for \"Claude Code\".\n\n"
"Return ONLY valid JSON: {\"action\": \"open_terminal|browse|build|chat\", "
"\"target\": \"description of what to do\"}\n"
"open_terminal = user wants to open terminal or launch Claude Code\n"
"browse = user wants to search the web, look something up, visit a URL\n"
"build = user wants to create/build a software project\n"
"chat = just conversation, questions, or anything else\n"
"If unclear, default to \"chat\"."
),
messages=[{"role": "user", "content": text}],
)
raw = response.content[0].text.strip()
if raw.startswith("```"):
raw = raw.split("\n", 1)[1].rsplit("```", 1)[0].strip()
data = json.loads(raw)
return {
"action": data.get("action", "chat"),
"target": data.get("target", text),
}
except Exception as e:
log.warning(f"Intent classification failed: {e}")
return {"action": "chat", "target": text}
# ---------------------------------------------------------------------------
# Markdown Stripping for TTS
# ---------------------------------------------------------------------------
def strip_markdown_for_tts(text: str) -> str:
"""Strip ALL markdown from text before sending to TTS."""
import re as _md_re
result = text
# Remove code blocks (``` ... ```)
result = _md_re.sub(r"```[\s\S]*?```", "", result)
# Remove inline code
result = result.replace("`", "")
# Remove bold/italic markers
result = result.replace("**", "").replace("*", "")
# Remove headers
result = _md_re.sub(r"^#{1,6}\s*", "", result, flags=_md_re.MULTILINE)
# Convert [text](url) to just text
result = _md_re.sub(r"\[([^\]]+)\]\([^\)]+\)", r"\1", result)
# Remove bullet points
result = _md_re.sub(r"^\s*[-*+]\s+", "", result, flags=_md_re.MULTILINE)
# Remove numbered lists
result = _md_re.sub(r"^\s*\d+\.\s+", "", result, flags=_md_re.MULTILINE)
# Double newlines to period
result = _md_re.sub(r"\n{2,}", ". ", result)
# Single newlines to space
result = result.replace("\n", " ")
# Clean up multiple spaces
result = _md_re.sub(r"\s{2,}", " ", result)
# Strip banned phrases
banned = ["my apologies", "i apologize", "absolutely", "great question",
"i'd be happy to", "of course", "how can i help",
"is there anything else", "i should clarify", "let me know if",
"feel free to"]
result_lower = result.lower()
for phrase in banned:
idx = result_lower.find(phrase)
while idx != -1:
# Remove the phrase and any trailing comma/dash
end = idx + len(phrase)
if end < len(result) and result[end] in " ,—-":
end += 1
result = result[:idx] + result[end:]
result_lower = result.lower()
idx = result_lower.find(phrase)
return result.strip().strip(",").strip("—").strip("-").strip()
# ---------------------------------------------------------------------------
# Action Tag Extraction (parse [ACTION:X] from LLM responses)
# ---------------------------------------------------------------------------
import re as _action_re
def extract_action(response: str) -> tuple[str, dict | None]:
"""Extract [ACTION:X] tag from LLM response.
Returns (clean_text_for_tts, action_dict_or_none).
"""
match = _action_re.search(
r'\[ACTION:(BUILD|BROWSE|RESEARCH|OPEN_TERMINAL|PROMPT_PROJECT|ADD_TASK|ADD_NOTE|COMPLETE_TASK|REMEMBER|CREATE_NOTE|READ_NOTE|SCREEN)\]\s*(.*?)$',
response, _action_re.DOTALL,
)
if match:
action_type = match.group(1).lower()
action_target = match.group(2).strip()
clean_text = response[:match.start()].strip()
return clean_text, {"action": action_type, "target": action_target}
return response, None
async def _execute_build(target: str):
"""Execute a build action from an LLM-embedded [ACTION:BUILD] tag."""
try:
await handle_build(target)
except Exception as e:
log.error(f"Build execution failed: {e}")
async def _execute_browse(target: str):
"""Execute a browse action from an LLM-embedded [ACTION:BROWSE] tag."""
try:
if target.startswith("http") or "." in target.split()[0]:
await open_browser(target)
else:
from urllib.parse import quote
await open_browser(f"https://www.google.com/search?q={quote(target)}")
except Exception as e:
log.error(f"Browse execution failed: {e}")
async def _execute_research(target: str, ws=None):
"""Execute research via claude -p in background. Opens report and speaks when done."""
try:
name = _generate_project_name(target)
path = str(Path.home() / "Desktop" / name)
os.makedirs(path, exist_ok=True)
prompt = (
f"{target}\n\n"
f"Research this thoroughly. Find REAL data — not made-up examples.\n"
f"Create a well-designed HTML file called `report.html` in the current directory.\n"
f"Dark theme, clean typography, organized sections, real links and sources.\n"
f"The working directory is: {path}"
)
log.info(f"Research started via claude -p in {path}")
process = await asyncio.create_subprocess_exec(
"claude", "-p", "--output-format", "text", "--dangerously-skip-permissions",
stdin=asyncio.subprocess.PIPE,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=path,
)
stdout, stderr = await asyncio.wait_for(
process.communicate(input=prompt.encode()),
timeout=300,
)
result = stdout.decode().strip()
log.info(f"Research complete ({len(result)} chars)")
recently_built.append({"name": name, "path": path, "time": time.time()})
# Find and open any HTML report
report = Path(path) / "report.html"
if not report.exists():
# Check for any HTML file
html_files = list(Path(path).glob("*.html"))
if html_files:
report = html_files[0]
if report.exists():
await open_browser(f"file://{report}")
log.info(f"Opened {report.name} in browser")
# Notify via voice if WebSocket still connected
if ws:
try:
notify_text = f"Research is complete, sir. Report is open in your browser."
audio = await synthesize_speech(notify_text)
if audio:
await ws.send_json({"type": "status", "state": "speaking"})
await ws.send_json({"type": "audio", "data": base64.b64encode(audio).decode(), "text": notify_text})
await ws.send_json({"type": "status", "state": "idle"})
log.info(f"JARVIS: {notify_text}")
except Exception:
pass # WebSocket might be gone
except asyncio.TimeoutError:
log.error("Research timed out after 5 minutes")
if ws:
try:
audio = await synthesize_speech("Research timed out, sir. It was taking too long.")
if audio:
await ws.send_json({"type": "audio", "data": base64.b64encode(audio).decode(), "text": "Research timed out, sir."})
except Exception:
pass
except Exception as e:
log.error(f"Research execution failed: {e}")
async def _focus_terminal_window(project_name: str):
"""Bring a Terminal window matching the project name to front."""
escaped = project_name.replace('"', '\\"')
script = f'''
tell application "Terminal"
repeat with w in windows
if name of w contains "{escaped}" then
set index of w to 1
activate
exit repeat
end if
end repeat
end tell
'''
try:
proc = await asyncio.create_subprocess_exec(
"osascript", "-e", script,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
await asyncio.wait_for(proc.communicate(), timeout=5)
except Exception:
pass
async def _execute_open_terminal():
"""Execute an open-terminal action from an LLM-embedded [ACTION:OPEN_TERMINAL] tag."""
try:
await handle_open_terminal()
except Exception as e:
log.error(f"Open terminal failed: {e}")
def _find_project_dir(project_name: str) -> str | None:
"""Find a project directory by name from cached projects or Desktop."""
for p in cached_projects:
if project_name.lower() in p.get("name", "").lower():
return p.get("path")
desktop = Path.home() / "Desktop"
for d in desktop.iterdir():
if d.is_dir() and project_name.lower() in d.name.lower():
return str(d)
return None
async def _execute_prompt_project(project_name: str, prompt: str, work_session: WorkSession, ws, dispatch_id: int = None, history: list[dict] = None, voice_state: dict = None):
"""Dispatch a prompt to Claude Code in a project directory.
Runs entirely in the background. JARVIS returns to conversation mode
immediately. When Claude Code finishes, JARVIS interrupts to report.
"""
try:
project_dir = _find_project_dir(project_name)
# Register dispatch if not already registered
if dispatch_id is None:
dispatch_id = dispatch_registry.register(project_name, project_dir or "", prompt)
if not project_dir:
msg = f"Couldn't find the {project_name} project directory, sir."
audio = await synthesize_speech(msg)
if audio and ws:
try:
await ws.send_json({"type": "status", "state": "speaking"})
await ws.send_json({"type": "audio", "data": base64.b64encode(audio).decode(), "text": msg})
except Exception:
pass
return
# Use a SEPARATE session so we don't trap the main conversation
dispatch = WorkSession()
await dispatch.start(project_dir, project_name)
# Bring matching Terminal window to front so user can watch
asyncio.create_task(_focus_terminal_window(project_name))
log.info(f"Dispatching to {project_name} in {project_dir}: {prompt[:80]}")
dispatch_registry.update_status(dispatch_id, "building")
# Run claude -p in background
full_response = await dispatch.send(prompt)
await dispatch.stop()
# Auto-open any localhost URLs from response
import re as _re
# Check for the explicit RUNNING_AT marker first
running_match = _re.search(r'RUNNING_AT=(https?://localhost:\d+)', full_response or "")
if not running_match:
running_match = _re.search(r'https?://localhost:\d+', full_response or "")
if running_match:
url = running_match.group(1) if running_match.lastindex else running_match.group(0)
asyncio.create_task(_execute_browse(url))
log.info(f"Auto-opening {url}")
# Store URL in dispatch
if dispatch_id:
dispatch_registry.update_status(dispatch_id, "completed",
response=full_response[:2000], summary=f"Running at {url}")
if not full_response or full_response.startswith("Hit a problem") or full_response.startswith("That's taking"):
dispatch_registry.update_status(dispatch_id, "failed" if full_response else "timeout", response=full_response or "")
msg = f"Sir, I ran into an issue with {project_name}. {full_response[:150] if full_response else 'No response received.'}"
else:
# Summarize via Haiku — don't read word for word
if anthropic_client:
try:
summary = await anthropic_client.messages.create(
model="claude-haiku-4-5-20251001",
max_tokens=150,
system=(
"You are JARVIS reporting back on what you found or built in a project. "
"Speak in first person — 'I found', 'I built', 'I reviewed'. "
"Start with 'Sir, ' to get the user's attention. "
"Be specific but concise — highlight the key findings or actions taken. "
"If there are multiple items, give the count and top 2-3 briefly. "
"End by asking how the user wants to proceed. "
"NEVER read out URLs or localhost addresses. NEVER say 'Claude Code'. "
"2-3 sentences max. No markdown. Natural spoken voice."
),
messages=[{"role": "user", "content": f"Project: {project_name}\nClaude Code reported:\n{full_response[:3000]}"}],
)
msg = summary.content[0].text
except Exception:
msg = f"Sir, {project_name} finished. Here's the gist: {full_response[:200]}"
else:
msg = f"Sir, {project_name} is done. {full_response[:200]}"
# Speak the result — skip if user has spoken recently to avoid audio collision
log.info(f"Dispatch summary for {project_name}: {msg[:100]}")
if voice_state and time.time() - voice_state["last_user_time"] < 3:
log.info(f"Skipping dispatch audio for {project_name} — user spoke recently")
# Result is still stored in history below so JARVIS can reference it
else:
audio = await synthesize_speech(strip_markdown_for_tts(msg))
if ws:
try:
await ws.send_json({"type": "status", "state": "speaking"})
if audio:
await ws.send_json({"type": "audio", "data": base64.b64encode(audio).decode(), "text": msg})
log.info(f"Dispatch audio sent for {project_name}")
else:
await ws.send_json({"type": "text", "text": msg})
log.info(f"Dispatch text fallback sent for {project_name}")
except Exception as e:
log.error(f"Dispatch audio send failed: {e}")
# Store dispatch result in conversation history so JARVIS remembers it
if history is not None:
history.append({"role": "assistant", "content": f"[Dispatch result for {project_name}]: {msg}"})
dispatch_registry.update_status(dispatch_id, "completed", response=full_response[:2000], summary=msg[:200])
log.info(f"Project {project_name} dispatch complete ({len(full_response)} chars)")
except Exception as e:
log.error(f"Prompt project failed: {e}", exc_info=True)
try:
msg = f"Had trouble connecting to {project_name}, sir."