Skip to content
This repository was archived by the owner on Apr 28, 2026. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 6 additions & 14 deletions examples/agents/simple_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from llama_stack_client import LlamaStackClient, Agent, AgentEventLogger
from termcolor import colored

from .utils import check_model_is_available, get_any_available_model
from .utils import check_model_is_available, get_any_available_chat_model


def main(host: str, port: int, model_id: str | None = None):
Expand All @@ -27,14 +27,8 @@ def main(host: str, port: int, model_id: str | None = None):
provider_data={"tavily_search_api_key": os.getenv("TAVILY_SEARCH_API_KEY")},
)

available_shields = [shield.identifier for shield in client.shields.list()]
if not available_shields:
print(colored("No available shields. Disabling safety.", "yellow"))
else:
print(f"Available shields found: {available_shields}")

if model_id is None:
model_id = get_any_available_model(client)
model_id = get_any_available_chat_model(client)
if model_id is None:
return
else:
Expand All @@ -47,10 +41,8 @@ def main(host: str, port: int, model_id: str | None = None):
client,
model=model_id,
instructions="",
tools=["builtin::websearch"],
input_shields=available_shields,
output_shields=available_shields,
enable_session_persistence=False,
# OpenAI Responses tool schema requires a type discriminator.
tools=[{"type": "web_search"}],
)
user_prompts = [
"Hello",
Expand All @@ -65,8 +57,8 @@ def main(host: str, port: int, model_id: str | None = None):
session_id=session_id,
)

for log in AgentEventLogger().log(response):
log.print()
for printable in AgentEventLogger().log(response):
print(printable, end="", flush=True)


if __name__ == "__main__":
Expand Down
65 changes: 61 additions & 4 deletions examples/agents/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,36 @@
from termcolor import colored


def _get_model_type(model) -> str | None:
for metadata_attr in ("custom_metadata", "metadata"):
metadata = getattr(model, metadata_attr, None)
if isinstance(metadata, dict):
value = metadata.get("model_type") or metadata.get("type")
if isinstance(value, str):
return value
return None


def _is_llm_model(model) -> bool:
model_type = _get_model_type(model)
# If the client schema doesn't expose type fields, assume LLM.
return model_type is None or model_type == "llm"


def _get_model_id(model) -> str | None:
for attr in ("identifier", "model_id", "id", "name"):
value = getattr(model, attr, None)
if isinstance(value, str):
return value
return None


def check_model_is_available(client: LlamaStackClient, model: str):
available_models = [
model.identifier
model_id
for model in client.models.list()
if model.model_type == "llm" and "guard" not in model.identifier
for model_id in [_get_model_id(model)]
if model_id and _is_llm_model(model) and "guard" not in model_id
]

if model not in available_models:
Expand All @@ -23,12 +48,44 @@ def check_model_is_available(client: LlamaStackClient, model: str):

def get_any_available_model(client: LlamaStackClient):
available_models = [
model.identifier
model_id
for model in client.models.list()
if model.model_type == "llm" and "guard" not in model.identifier
for model_id in [_get_model_id(model)]
if model_id and _is_llm_model(model) and "guard" not in model_id
]
if not available_models:
print(colored("No available models.", "red"))
return None

return available_models[0]


def can_model_chat(client: LlamaStackClient, model_id: str) -> bool:
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we have to run a chat completion to see if the model supports chat? we already have model type: https://github.com/llamastack/llama-stack/blob/ffa98595e696c7ab3e0e933d0ed75375ee1d7b84/src/llama_stack_api/models/models.py#L23

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@raghotham I can see there are some models with llm type still do not support chat, like ollama/all-minilm:latest.

(llama-stack) (base) gualiu@gualiu-mac llama-stack % curl -s http://localhost:8321/v1/models \
  | jq '.data[] | select(.id=="ollama/all-minilm:latest")'
{
  "id": "ollama/all-minilm:latest",
  "object": "model",
  "created": 1769569923,
  "owned_by": "llama_stack",
  "custom_metadata": {
    "model_type": "llm",
    "provider_id": "ollama",
    "provider_resource_id": "all-minilm:latest"
  }
}

But this model do not support chat.

(stack) gualiu@gualiu-mac llama-stack-apps % python -m examples.agents.simple_chat --host localhost --port 8321  --model
_id ollama/all-minilm:latest
INFO:httpx:HTTP Request: GET http://localhost:8321/v1/models "HTTP/1.1 200 OK"
Using model: ollama/all-minilm:latest
INFO:httpx:HTTP Request: POST http://localhost:8321/v1/conversations "HTTP/1.1 200 OK"
User> Hello
INFO:httpx:HTTP Request: POST http://localhost:8321/v1/responses "HTTP/1.1 200 OK"
🤔
❌ Turn failed: Error code: 400 - {'error': {'message': '"all-minilm:latest" does not support chat', 'type': 'api_error', 'param': None, 'code': None}}
User> Search web for which players played in the winning team of the NBA western conference semifinals of 2024
INFO:httpx:HTTP Request: POST http://localhost:8321/v1/responses "HTTP/1.1 200 OK"
🤔
❌ Turn failed: Error code: 400 - {'error': {'message': '"all-minilm:latest" does not support chat', 'type': 'api_error', 'param': None, 'code': None}}

I think besides model_type, we may need to add a new field named as capability for the model, the capability can be chat, completion, tool_calling etc, comments?

# Lightweight probe to ensure the model supports chat completions.
try:
client.chat.completions.create(
model=model_id,
messages=[{"role": "user", "content": "ping"}],
max_tokens=1,
)
except Exception:
return False
return True

def get_any_available_chat_model(client: LlamaStackClient):
available_models = [
model_id
for model in client.models.list()
for model_id in [_get_model_id(model)]
if model_id and _is_llm_model(model) and "guard" not in model_id
]
if not available_models:
print(colored("No available models.", "red"))
return None

for model_id in available_models:
if can_model_chat(client, model_id):
return model_id

print(colored("No available chat-capable models.", "red"))
return None