@@ -45,14 +45,30 @@ def print_endpoint(
4545
4646def print_api_error (
4747 e : InvalidRequestError ,
48+ endpoint_id : str | None = None ,
4849) -> None :
4950 error_details = e .api_response .message
51+ error_lower = error_details .lower () if error_details else ""
5052
51- if error_details and (
52- "credentials" in error_details .lower ()
53- or "authentication" in error_details .lower ()
54- ):
53+ if "credentials" in error_lower or "authentication" in error_lower :
5554 click .echo ("Error: Invalid API key or authentication failed" , err = True )
55+ elif "not found" in error_lower and "endpoint" in error_lower :
56+ endpoint_display = f"'{ endpoint_id } '" if endpoint_id else ""
57+ click .echo (f"Error: Endpoint { endpoint_display } not found." , err = True )
58+ click .echo (
59+ "The endpoint may have been deleted or the ID may be incorrect." ,
60+ err = True ,
61+ )
62+ click .echo (
63+ "Use 'together endpoints list --mine true' to see your endpoints." ,
64+ err = True ,
65+ )
66+ elif "permission" in error_lower or "forbidden" in error_lower or "unauthorized" in error_lower :
67+ click .echo ("Error: You don't have permission to access this endpoint." , err = True )
68+ click .echo (
69+ "This endpoint may belong to another user or organization." ,
70+ err = True ,
71+ )
5672 else :
5773 click .echo (f"Error: { error_details } " , err = True )
5874
@@ -65,7 +81,9 @@ def wrapper(*args: Any, **kwargs: Any) -> Any:
6581 try :
6682 return f (* args , ** kwargs )
6783 except InvalidRequestError as e :
68- print_api_error (e )
84+ # Try to extract endpoint_id from kwargs for better error messages
85+ endpoint_id = kwargs .get ("endpoint_id" )
86+ print_api_error (e , endpoint_id = endpoint_id )
6987 sys .exit (1 )
7088 except Exception as e :
7189 click .echo (f"Error: An unexpected error occurred - { str (e )} " , err = True )
@@ -81,6 +99,92 @@ def endpoints(ctx: click.Context) -> None:
8199 pass
82100
83101
102+ def _print_hardware_error (
103+ client : Together ,
104+ model : str ,
105+ hardware_id : str ,
106+ gpu : str ,
107+ gpu_count : int ,
108+ * ,
109+ speculative_decoding_enabled : bool = False ,
110+ ) -> None :
111+ """Print a detailed error message when hardware selection fails."""
112+ click .echo (
113+ f"Error: Cannot create endpoint with { gpu_count } x { gpu .upper ()} for model '{ model } '" ,
114+ err = True ,
115+ )
116+
117+ # Fetch hardware options for this model to provide specific guidance
118+ try :
119+ hardware_options = client .endpoints .list_hardware (model )
120+ except Exception :
121+ # If we can't fetch hardware options, just show a generic message
122+ click .echo (
123+ "\n Use 'together endpoints hardware --model <model>' to see available options." ,
124+ err = True ,
125+ )
126+ return
127+
128+ # Check if the requested hardware exists for this model
129+ requested_hw = next ((hw for hw in hardware_options if hw .id == hardware_id ), None )
130+
131+ if requested_hw is None :
132+ # Hardware configuration is not compatible with this model
133+ click .echo (
134+ f"\n The hardware configuration '{ hardware_id } ' is not compatible with this model." ,
135+ err = True ,
136+ )
137+ elif requested_hw .availability :
138+ status = requested_hw .availability .status
139+ if status == "unavailable" :
140+ click .echo (
141+ f"\n The { gpu_count } x { gpu .upper ()} configuration is currently unavailable. "
142+ "This hardware type has no available capacity at this time." ,
143+ err = True ,
144+ )
145+ elif status == "insufficient" :
146+ click .echo (
147+ f"\n The { gpu_count } x { gpu .upper ()} configuration has insufficient capacity. "
148+ "Not enough GPUs available for the requested number of replicas." ,
149+ err = True ,
150+ )
151+ elif status == "available" :
152+ # Hardware is available but request failed - suggest toggling speculative decoding
153+ if speculative_decoding_enabled :
154+ click .echo (
155+ "\n Hardware is available but this configuration is not supported. "
156+ "Try adding --no-speculative-decoding." ,
157+ err = True ,
158+ )
159+ else :
160+ click .echo (
161+ "\n Hardware is available but this configuration is not supported. "
162+ "Try removing --no-speculative-decoding to enable speculative decoding." ,
163+ err = True ,
164+ )
165+ return
166+
167+ # Show available alternatives
168+ available_options = [
169+ hw
170+ for hw in hardware_options
171+ if hw .availability is not None and hw .availability .status == "available"
172+ ]
173+
174+ if available_options :
175+ click .echo ("\n Available hardware options for this model:" , err = True )
176+ click .echo ("" , err = True )
177+ _format_hardware_options (available_options , show_availability = True )
178+ else :
179+ click .echo (
180+ "\n No hardware is currently available for this model. Please try again later." ,
181+ err = True ,
182+ )
183+ click .echo ("\n All hardware options for this model:" , err = True )
184+ click .echo ("" , err = True )
185+ _format_hardware_options (hardware_options , show_availability = True )
186+
187+
84188@endpoints .command ()
85189@click .option (
86190 "--model" ,
@@ -162,6 +266,51 @@ def create(
162266 wait : bool ,
163267) -> None :
164268 """Create a new dedicated inference endpoint."""
269+ # Client-side validation for replicas
270+ if min_replicas < 0 :
271+ click .echo (
272+ f"Error: --min-replicas must be non-negative, got { min_replicas } " , err = True
273+ )
274+ sys .exit (1 )
275+ if max_replicas < 0 :
276+ click .echo (
277+ f"Error: --max-replicas must be non-negative, got { max_replicas } " , err = True
278+ )
279+ sys .exit (1 )
280+ if min_replicas > max_replicas :
281+ click .echo (
282+ f"Error: --min-replicas ({ min_replicas } ) cannot be greater than "
283+ f"--max-replicas ({ max_replicas } )" ,
284+ err = True ,
285+ )
286+ sys .exit (1 )
287+
288+ # Validate GPU count
289+ valid_gpu_counts = [1 , 2 , 4 , 8 ]
290+ if gpu_count not in valid_gpu_counts :
291+ click .echo (
292+ f"Error: --gpu-count must be one of { valid_gpu_counts } , got { gpu_count } " ,
293+ err = True ,
294+ )
295+ sys .exit (1 )
296+
297+ # Validate availability zone if specified
298+ if availability_zone :
299+ try :
300+ valid_zones = client .endpoints .list_avzones ()
301+ if availability_zone not in valid_zones :
302+ click .echo (
303+ f"Error: Invalid availability zone '{ availability_zone } '" , err = True
304+ )
305+ if valid_zones :
306+ click .echo ("Available zones:" , err = True )
307+ for zone in sorted (valid_zones ):
308+ click .echo (f" { zone } " , err = True )
309+ sys .exit (1 )
310+ except Exception :
311+ # If we can't fetch zones, let the API validate it
312+ pass
313+
165314 # Map GPU types to their full hardware ID names
166315 gpu_map = {
167316 "b200" : "nvidia_b200_180gb_sxm" ,
@@ -189,16 +338,55 @@ def create(
189338 availability_zone = availability_zone ,
190339 )
191340 except InvalidRequestError as e :
341+ error_msg = str (e .args [0 ]).lower () if e .args else ""
192342 if (
193- "check the hardware api" in str (e .args [0 ]).lower ()
194- or "invalid hardware provided" in str (e .args [0 ]).lower ()
195- or "the selected configuration" in str (e .args [0 ]).lower ()
343+ "check the hardware api" in error_msg
344+ or "invalid hardware provided" in error_msg
345+ or "the selected configuration" in error_msg
346+ ):
347+ # speculative decoding is enabled when --no-speculative-decoding is NOT passed
348+ speculative_decoding_enabled = not no_speculative_decoding
349+ _print_hardware_error (
350+ client ,
351+ model ,
352+ hardware_id ,
353+ gpu ,
354+ gpu_count ,
355+ speculative_decoding_enabled = speculative_decoding_enabled ,
356+ )
357+ elif "model" in error_msg and (
358+ "not found" in error_msg
359+ or "invalid" in error_msg
360+ or "does not exist" in error_msg
361+ or "not supported" in error_msg
196362 ):
197- click .secho ("Invalid hardware selected." , fg = "red" , err = True )
198- click .echo ("\n Available hardware options:" )
199- fetch_and_print_hardware_options (
200- client = client , model = model , print_json = False , available = True
363+ click .echo (
364+ f"Error: Model '{ model } ' was not found or is not available for "
365+ "dedicated endpoints." ,
366+ err = True ,
367+ )
368+ click .echo (
369+ "Please check that the model name is correct and that it supports "
370+ "dedicated endpoint deployment." ,
371+ err = True ,
372+ )
373+ click .echo (
374+ "You can browse available models at: https://api.together.ai/models" ,
375+ err = True ,
376+ )
377+ elif "availability" in error_msg and "zone" in error_msg :
378+ click .echo (
379+ f"Error: Availability zone '{ availability_zone } ' is not valid." ,
380+ err = True ,
201381 )
382+ try :
383+ valid_zones = client .endpoints .list_avzones ()
384+ if valid_zones :
385+ click .echo ("\n Available zones:" , err = True )
386+ for zone in sorted (valid_zones ):
387+ click .echo (f" { zone } " , err = True )
388+ except Exception :
389+ pass
202390 else :
203391 print_api_error (e )
204392 sys .exit (1 )
0 commit comments