Skip to content

Commit a62ea5a

Browse files
atihkincursoragent
andcommitted
feat(cli): improve error messages for endpoint creation failures
- Add specific error messages for hardware configuration issues: - Hardware not compatible with model - Hardware unavailable (no capacity) - Insufficient capacity for replicas - Hardware available but config not supported (suggests toggling speculative decoding) - Add client-side validation with clear errors: - min/max replicas: must be non-negative, min <= max - gpu-count: must be 1, 2, 4, or 8 - availability-zone: validates against available zones - Improve API error handling: - Model not found: suggests checking model name - Endpoint not found: suggests listing endpoints - Permission denied: clear access error message Fixes MLE-3108 Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent e96624e commit a62ea5a

File tree

1 file changed

+200
-12
lines changed

1 file changed

+200
-12
lines changed

src/together/cli/api/endpoints.py

Lines changed: 200 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -45,14 +45,30 @@ def print_endpoint(
4545

4646
def print_api_error(
4747
e: InvalidRequestError,
48+
endpoint_id: str | None = None,
4849
) -> None:
4950
error_details = e.api_response.message
51+
error_lower = error_details.lower() if error_details else ""
5052

51-
if error_details and (
52-
"credentials" in error_details.lower()
53-
or "authentication" in error_details.lower()
54-
):
53+
if "credentials" in error_lower or "authentication" in error_lower:
5554
click.echo("Error: Invalid API key or authentication failed", err=True)
55+
elif "not found" in error_lower and "endpoint" in error_lower:
56+
endpoint_display = f"'{endpoint_id}'" if endpoint_id else ""
57+
click.echo(f"Error: Endpoint {endpoint_display} not found.", err=True)
58+
click.echo(
59+
"The endpoint may have been deleted or the ID may be incorrect.",
60+
err=True,
61+
)
62+
click.echo(
63+
"Use 'together endpoints list --mine true' to see your endpoints.",
64+
err=True,
65+
)
66+
elif "permission" in error_lower or "forbidden" in error_lower or "unauthorized" in error_lower:
67+
click.echo("Error: You don't have permission to access this endpoint.", err=True)
68+
click.echo(
69+
"This endpoint may belong to another user or organization.",
70+
err=True,
71+
)
5672
else:
5773
click.echo(f"Error: {error_details}", err=True)
5874

@@ -65,7 +81,9 @@ def wrapper(*args: Any, **kwargs: Any) -> Any:
6581
try:
6682
return f(*args, **kwargs)
6783
except InvalidRequestError as e:
68-
print_api_error(e)
84+
# Try to extract endpoint_id from kwargs for better error messages
85+
endpoint_id = kwargs.get("endpoint_id")
86+
print_api_error(e, endpoint_id=endpoint_id)
6987
sys.exit(1)
7088
except Exception as e:
7189
click.echo(f"Error: An unexpected error occurred - {str(e)}", err=True)
@@ -81,6 +99,92 @@ def endpoints(ctx: click.Context) -> None:
8199
pass
82100

83101

102+
def _print_hardware_error(
103+
client: Together,
104+
model: str,
105+
hardware_id: str,
106+
gpu: str,
107+
gpu_count: int,
108+
*,
109+
speculative_decoding_enabled: bool = False,
110+
) -> None:
111+
"""Print a detailed error message when hardware selection fails."""
112+
click.echo(
113+
f"Error: Cannot create endpoint with {gpu_count}x {gpu.upper()} for model '{model}'",
114+
err=True,
115+
)
116+
117+
# Fetch hardware options for this model to provide specific guidance
118+
try:
119+
hardware_options = client.endpoints.list_hardware(model)
120+
except Exception:
121+
# If we can't fetch hardware options, just show a generic message
122+
click.echo(
123+
"\nUse 'together endpoints hardware --model <model>' to see available options.",
124+
err=True,
125+
)
126+
return
127+
128+
# Check if the requested hardware exists for this model
129+
requested_hw = next((hw for hw in hardware_options if hw.id == hardware_id), None)
130+
131+
if requested_hw is None:
132+
# Hardware configuration is not compatible with this model
133+
click.echo(
134+
f"\nThe hardware configuration '{hardware_id}' is not compatible with this model.",
135+
err=True,
136+
)
137+
elif requested_hw.availability:
138+
status = requested_hw.availability.status
139+
if status == "unavailable":
140+
click.echo(
141+
f"\nThe {gpu_count}x {gpu.upper()} configuration is currently unavailable. "
142+
"This hardware type has no available capacity at this time.",
143+
err=True,
144+
)
145+
elif status == "insufficient":
146+
click.echo(
147+
f"\nThe {gpu_count}x {gpu.upper()} configuration has insufficient capacity. "
148+
"Not enough GPUs available for the requested number of replicas.",
149+
err=True,
150+
)
151+
elif status == "available":
152+
# Hardware is available but request failed - suggest toggling speculative decoding
153+
if speculative_decoding_enabled:
154+
click.echo(
155+
"\nHardware is available but this configuration is not supported. "
156+
"Try adding --no-speculative-decoding.",
157+
err=True,
158+
)
159+
else:
160+
click.echo(
161+
"\nHardware is available but this configuration is not supported. "
162+
"Try removing --no-speculative-decoding to enable speculative decoding.",
163+
err=True,
164+
)
165+
return
166+
167+
# Show available alternatives
168+
available_options = [
169+
hw
170+
for hw in hardware_options
171+
if hw.availability is not None and hw.availability.status == "available"
172+
]
173+
174+
if available_options:
175+
click.echo("\nAvailable hardware options for this model:", err=True)
176+
click.echo("", err=True)
177+
_format_hardware_options(available_options, show_availability=True)
178+
else:
179+
click.echo(
180+
"\nNo hardware is currently available for this model. Please try again later.",
181+
err=True,
182+
)
183+
click.echo("\nAll hardware options for this model:", err=True)
184+
click.echo("", err=True)
185+
_format_hardware_options(hardware_options, show_availability=True)
186+
187+
84188
@endpoints.command()
85189
@click.option(
86190
"--model",
@@ -162,6 +266,51 @@ def create(
162266
wait: bool,
163267
) -> None:
164268
"""Create a new dedicated inference endpoint."""
269+
# Client-side validation for replicas
270+
if min_replicas < 0:
271+
click.echo(
272+
f"Error: --min-replicas must be non-negative, got {min_replicas}", err=True
273+
)
274+
sys.exit(1)
275+
if max_replicas < 0:
276+
click.echo(
277+
f"Error: --max-replicas must be non-negative, got {max_replicas}", err=True
278+
)
279+
sys.exit(1)
280+
if min_replicas > max_replicas:
281+
click.echo(
282+
f"Error: --min-replicas ({min_replicas}) cannot be greater than "
283+
f"--max-replicas ({max_replicas})",
284+
err=True,
285+
)
286+
sys.exit(1)
287+
288+
# Validate GPU count
289+
valid_gpu_counts = [1, 2, 4, 8]
290+
if gpu_count not in valid_gpu_counts:
291+
click.echo(
292+
f"Error: --gpu-count must be one of {valid_gpu_counts}, got {gpu_count}",
293+
err=True,
294+
)
295+
sys.exit(1)
296+
297+
# Validate availability zone if specified
298+
if availability_zone:
299+
try:
300+
valid_zones = client.endpoints.list_avzones()
301+
if availability_zone not in valid_zones:
302+
click.echo(
303+
f"Error: Invalid availability zone '{availability_zone}'", err=True
304+
)
305+
if valid_zones:
306+
click.echo("Available zones:", err=True)
307+
for zone in sorted(valid_zones):
308+
click.echo(f" {zone}", err=True)
309+
sys.exit(1)
310+
except Exception:
311+
# If we can't fetch zones, let the API validate it
312+
pass
313+
165314
# Map GPU types to their full hardware ID names
166315
gpu_map = {
167316
"b200": "nvidia_b200_180gb_sxm",
@@ -189,16 +338,55 @@ def create(
189338
availability_zone=availability_zone,
190339
)
191340
except InvalidRequestError as e:
341+
error_msg = str(e.args[0]).lower() if e.args else ""
192342
if (
193-
"check the hardware api" in str(e.args[0]).lower()
194-
or "invalid hardware provided" in str(e.args[0]).lower()
195-
or "the selected configuration" in str(e.args[0]).lower()
343+
"check the hardware api" in error_msg
344+
or "invalid hardware provided" in error_msg
345+
or "the selected configuration" in error_msg
346+
):
347+
# speculative decoding is enabled when --no-speculative-decoding is NOT passed
348+
speculative_decoding_enabled = not no_speculative_decoding
349+
_print_hardware_error(
350+
client,
351+
model,
352+
hardware_id,
353+
gpu,
354+
gpu_count,
355+
speculative_decoding_enabled=speculative_decoding_enabled,
356+
)
357+
elif "model" in error_msg and (
358+
"not found" in error_msg
359+
or "invalid" in error_msg
360+
or "does not exist" in error_msg
361+
or "not supported" in error_msg
196362
):
197-
click.secho("Invalid hardware selected.", fg="red", err=True)
198-
click.echo("\nAvailable hardware options:")
199-
fetch_and_print_hardware_options(
200-
client=client, model=model, print_json=False, available=True
363+
click.echo(
364+
f"Error: Model '{model}' was not found or is not available for "
365+
"dedicated endpoints.",
366+
err=True,
367+
)
368+
click.echo(
369+
"Please check that the model name is correct and that it supports "
370+
"dedicated endpoint deployment.",
371+
err=True,
372+
)
373+
click.echo(
374+
"You can browse available models at: https://api.together.ai/models",
375+
err=True,
376+
)
377+
elif "availability" in error_msg and "zone" in error_msg:
378+
click.echo(
379+
f"Error: Availability zone '{availability_zone}' is not valid.",
380+
err=True,
201381
)
382+
try:
383+
valid_zones = client.endpoints.list_avzones()
384+
if valid_zones:
385+
click.echo("\nAvailable zones:", err=True)
386+
for zone in sorted(valid_zones):
387+
click.echo(f" {zone}", err=True)
388+
except Exception:
389+
pass
202390
else:
203391
print_api_error(e)
204392
sys.exit(1)

0 commit comments

Comments
 (0)