abetlen · amandwivedi45 · Jul 22, 2025
diff --git a/examples/high_level_api/fastapi_server.py b/examples/high_level_api/fastapi_server.py
@@ -24,15 +24,24 @@
 To actually see the implementation of the server, see llama_cpp/server/app.py
 
 """
-
 import os
 import uvicorn
-
 from llama_cpp.server.app import create_app
+import asyncio
 
 if __name__ == "__main__":
     app = create_app()
 
-    uvicorn.run(
-        app, host=os.getenv("HOST", "localhost"), port=int(os.getenv("PORT", 8000))
-    )
+    try:
+        # Run the server with timeout config (graceful shutdown handling)
+        uvicorn.run(
+            app,
+            host=os.getenv("HOST", "localhost"),
+            port=int(os.getenv("PORT", 8000)),
+            timeout_keep_alive=10,  # Optional: disconnect inactive clients after 10s
+            timeout_notify=5        # Optional: timeout for graceful shutdown notification
+        )
+    except asyncio.TimeoutError:
+        print("⏰ Server startup timed out.")
+    except Exception as e:
+        print(f"🚨 An unexpected error occurred: {e}")