Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
1c120bf
last todos
sfierro Feb 20, 2026
2b69ecb
Merge pull request #1067 from Kiln-AI/sfierro/last-changes
scosman Feb 20, 2026
60e7a48
version bump for release
scosman Feb 20, 2026
e80c067
version bump for release (lockfile)
scosman Feb 20, 2026
18a0b60
refactor: autosave_runs in memory (not persisted)
leonardmq Feb 24, 2026
f5af283
cr: explicitly prevent dumping in memory to yaml
leonardmq Feb 24, 2026
ec915f1
refactor: combine in memory and persisted in settings
leonardmq Feb 24, 2026
69bb336
cr feedback (mostly about locking)
leonardmq Feb 24, 2026
c8091a3
cr lock in setattr
leonardmq Feb 24, 2026
ce6b564
telemetry docs
scosman Feb 24, 2026
07e0fe3
Merge pull request #1083 from Kiln-AI/scosman/telemetry_docs_2
scosman Feb 24, 2026
cf56e58
Allow tool server edit when it fails to connect
chiang-daniel Feb 24, 2026
905dee5
CR from Leonard
chiang-daniel Feb 25, 2026
05f9d4d
Merge pull request #1084 from Kiln-AI/dchiang/fix-tool-edit-button
chiang-daniel Feb 25, 2026
f04a785
update MCP error stack
chiang-daniel Feb 26, 2026
a520b78
use markdown
chiang-daniel Feb 26, 2026
d802886
better description
chiang-daniel Feb 26, 2026
2aee473
CR
chiang-daniel Feb 26, 2026
f086060
CR from Leonard
chiang-daniel Feb 26, 2026
cd21709
clean up
chiang-daniel Feb 26, 2026
6d0d3c9
update tests to use custom_error
chiang-daniel Feb 26, 2026
544ee18
Merge pull request #1086 from Kiln-AI/dchiang/KIL-415/mcp-error-cleanup
chiang-daniel Feb 26, 2026
c4326d8
chore: update litellm
leonardmq Feb 27, 2026
20d2450
Merge pull request #1090 from Kiln-AI/leonard/kil-439-chore-update-li…
leonardmq Feb 27, 2026
a0d1646
save tool id for tool use spec
sfierro Feb 27, 2026
33fd958
Merge pull request #1097 from Kiln-AI/sfierro/tool-id-bug
sfierro Feb 27, 2026
57896a8
Proof of concept streaming API
scosman Feb 18, 2026
3d6ced2
test: paid integration test for streaming
leonardmq Mar 3, 2026
1464fb8
test: add test for session + streaming together
leonardmq Mar 3, 2026
dee10b3
Update libs/core/kiln_ai/adapters/model_adapters/test_litellm_adapter…
leonardmq Mar 3, 2026
3fabd00
Merge pull request #1082 from Kiln-AI/leonard/kil-428-make-autosave_r…
leonardmq Mar 6, 2026
c520e27
fix: pin uv tools in CI and checks
leonardmq Mar 6, 2026
1eb0fe6
fix: pinned uv run in another workflow and mcp hooks
leonardmq Mar 6, 2026
f18aa3b
Merge pull request #1104 from Kiln-AI/leonard/kil-442-fix-pin-uv-tool…
leonardmq Mar 6, 2026
456088d
refactor: stream with support for AI SDK (with tool events) and OpenA…
leonardmq Mar 8, 2026
0ea65b4
refactor: ai sdk events as pydantic models
leonardmq Mar 8, 2026
a98d886
fix: model_dump implementation and remove to_see to leave transport s…
leonardmq Mar 8, 2026
e989dca
fix: should reset before next round of toolcalls
leonardmq Mar 8, 2026
11710f2
refactor: take in a trace instead of a task_run for session continuation
leonardmq Mar 8, 2026
3ad6a27
refactor: remove ability to continue task run at api level
leonardmq Mar 8, 2026
3f08ed5
Merge branch 'main' of github.com:Kiln-AI/Kiln into leonard/kil-420-a…
leonardmq Mar 8, 2026
4d8e99f
refactor: wrap stream iterators to allow exposing task run at the end
leonardmq Mar 8, 2026
eb537ed
fix: remove autosave_runs hardcoded
leonardmq Mar 8, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ jobs:
run: uv run python3 -m pytest --runslow .

- name: Check Python Types
run: uvx ty check
run: uv run ty check

- name: Build Core
run: uv build
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/format_and_lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,12 @@ jobs:

- name: Lint with ruff
run: |
uvx ruff check
uv run ruff check

- name: Format with ruff
run: |
uvx ruff format --check .
uv run ruff format --check .

- name: Typecheck with ty
run: |
uvx ty check
uv run ty check
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,5 @@ libs/server/build
dist/

.mcp.json

test_output/
2 changes: 1 addition & 1 deletion app/desktop/WinInnoSetup.iss
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

#define MyAppPath "build\dist\Kiln"
#define MyAppName "Kiln"
#define MyAppVersion "0.24.0"
#define MyAppVersion "0.25.0"
#define MyAppPublisher "Chesterfield Laboratories Inc"
#define MyAppURL "https://kiln.tech"
#define MyAppExeName "Kiln.exe"
Expand Down
2 changes: 1 addition & 1 deletion app/desktop/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "hatchling.build"

[project]
name = "kiln-studio-desktop"
version = "0.24.0"
version = "0.25.0"
description = "The Kiln Desktop App. Download from https://kiln.tech"
requires-python = ">=3.10"
dependencies = [
Expand Down
20 changes: 11 additions & 9 deletions app/desktop/studio_server/test_copilot_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,13 @@
from app.desktop.studio_server.copilot_api import connect_copilot_api
from fastapi import FastAPI
from fastapi.testclient import TestClient
from kiln_server.custom_errors import connect_custom_errors


@pytest.fixture
def app():
app = FastAPI()
connect_custom_errors(app)
connect_copilot_api(app)
return app

Expand Down Expand Up @@ -123,7 +125,7 @@ def test_clarify_spec_no_api_key(self, client, clarify_spec_input):

response = client.post("/api/copilot/clarify_spec", json=clarify_spec_input)
assert response.status_code == 401
assert "API key not configured" in response.json()["detail"]
assert "API key not configured" in response.json()["message"]

def test_clarify_spec_success(self, client, clarify_spec_input, mock_api_key):
mock_output = MagicMock(spec=ClarifySpecOutput)
Expand Down Expand Up @@ -194,7 +196,7 @@ def test_clarify_spec_no_response(self, client, clarify_spec_input, mock_api_key
):
response = client.post("/api/copilot/clarify_spec", json=clarify_spec_input)
assert response.status_code == 500
assert "Failed to analyze spec" in response.json()["detail"]
assert "Failed to analyze spec" in response.json()["message"]

def test_clarify_spec_validation_error(
self, client, clarify_spec_input, mock_api_key
Expand All @@ -210,7 +212,7 @@ def test_clarify_spec_validation_error(
):
response = client.post("/api/copilot/clarify_spec", json=clarify_spec_input)
assert response.status_code == 422
assert "Validation error from server" in response.json()["detail"]
assert "Validation error from server" in response.json()["message"]


class TestRefineSpec:
Expand All @@ -223,7 +225,7 @@ def test_refine_spec_no_api_key(self, client, refine_spec_input):

response = client.post("/api/copilot/refine_spec", json=refine_spec_input)
assert response.status_code == 401
assert "API key not configured" in response.json()["detail"]
assert "API key not configured" in response.json()["message"]

def test_refine_spec_success(self, client, refine_spec_input, mock_api_key):
mock_output = MagicMock(spec=RefineSpecApiOutput)
Expand Down Expand Up @@ -259,7 +261,7 @@ def test_refine_spec_no_response(self, client, refine_spec_input, mock_api_key):
):
response = client.post("/api/copilot/refine_spec", json=refine_spec_input)
assert response.status_code == 500
assert "Failed to refine spec" in response.json()["detail"]
assert "Failed to refine spec" in response.json()["message"]

def test_refine_spec_validation_error(
self, client, refine_spec_input, mock_api_key
Expand All @@ -275,7 +277,7 @@ def test_refine_spec_validation_error(
):
response = client.post("/api/copilot/refine_spec", json=refine_spec_input)
assert response.status_code == 422
assert "Validation error from server" in response.json()["detail"]
assert "Validation error from server" in response.json()["message"]


class TestGenerateBatch:
Expand All @@ -290,7 +292,7 @@ def test_generate_batch_no_api_key(self, client, generate_batch_input):
"/api/copilot/generate_batch", json=generate_batch_input
)
assert response.status_code == 401
assert "API key not configured" in response.json()["detail"]
assert "API key not configured" in response.json()["message"]

def test_generate_batch_success(self, client, generate_batch_input, mock_api_key):
mock_output = MagicMock(spec=GenerateBatchOutput)
Expand Down Expand Up @@ -328,7 +330,7 @@ def test_generate_batch_no_response(
"/api/copilot/generate_batch", json=generate_batch_input
)
assert response.status_code == 500
assert "Failed to generate synthetic data" in response.json()["detail"]
assert "Failed to generate synthetic data" in response.json()["message"]

def test_generate_batch_validation_error(
self, client, generate_batch_input, mock_api_key
Expand All @@ -346,4 +348,4 @@ def test_generate_batch_validation_error(
"/api/copilot/generate_batch", json=generate_batch_input
)
assert response.status_code == 422
assert "Validation error from server" in response.json()["detail"]
assert "Validation error from server" in response.json()["message"]
2 changes: 2 additions & 0 deletions app/desktop/studio_server/test_data_gen_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import pytest
from fastapi import FastAPI
from fastapi.testclient import TestClient
from kiln_server.custom_errors import connect_custom_errors
from kiln_ai.datamodel import (
DataSource,
DataSourceType,
Expand Down Expand Up @@ -31,6 +32,7 @@
@pytest.fixture
def app():
app = FastAPI()
connect_custom_errors(app)
connect_data_gen_api(app)
return app

Expand Down
28 changes: 15 additions & 13 deletions app/desktop/studio_server/test_eval_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from fastapi import FastAPI, HTTPException
from fastapi.responses import StreamingResponse
from fastapi.testclient import TestClient
from kiln_server.custom_errors import connect_custom_errors
from kiln_ai.adapters.ml_model_list import ModelProviderName
from kiln_ai.datamodel import (
DataSource,
Expand Down Expand Up @@ -55,6 +56,7 @@
@pytest.fixture
def app():
app = FastAPI()
connect_custom_errors(app)
connect_evals_api(app)
return app

Expand Down Expand Up @@ -189,7 +191,7 @@ def test_get_eval_not_found(client, mock_task, mock_task_from_id):
response = client.get("/api/projects/project1/tasks/task1/eval/non_existent")

assert response.status_code == 404
assert response.json()["detail"] == "Eval not found. ID: non_existent"
assert response.json()["message"] == "Eval not found. ID: non_existent"


@pytest.fixture
Expand Down Expand Up @@ -513,7 +515,7 @@ async def test_run_eval_config_no_run_configs_error(

assert response.status_code == 400
assert (
response.json()["detail"]
response.json()["message"]
== "No run config ids provided. At least one run config id is required."
)

Expand Down Expand Up @@ -785,7 +787,7 @@ def test_update_run_config_prompt_name_no_prompt(
json={"prompt_name": "New Name"},
)
assert response.status_code == 400
assert "no frozen prompt" in response.json()["detail"].lower()
assert "no frozen prompt" in response.json()["message"].lower()


@pytest.fixture
Expand Down Expand Up @@ -1354,7 +1356,7 @@ def test_delete_eval_not_found(client):

# Verify the response
assert response.status_code == 404
assert response.json()["detail"] == "Eval not found. ID: nonexistent_eval"
assert response.json()["message"] == "Eval not found. ID: nonexistent_eval"


async def test_create_eval_then_delete_on_spec_failure(
Expand Down Expand Up @@ -1477,7 +1479,7 @@ def test_update_eval_train_set_filter_id_when_already_set(
assert response.status_code == 400
assert (
"Train set filter is already set and cannot be changed"
in response.json()["detail"]
in response.json()["message"]
)


Expand Down Expand Up @@ -1527,7 +1529,7 @@ def test_update_eval_not_found(client):
)

assert response.status_code == 404
assert "Eval not found" in response.json()["detail"]
assert "Eval not found" in response.json()["message"]


def test_update_eval_empty_request(client, mock_task_from_id, mock_eval, mock_task):
Expand Down Expand Up @@ -1759,7 +1761,7 @@ async def test_get_eval_progress_not_found(client, mock_task_from_id, mock_task)

# Verify the response
assert response.status_code == 404
assert response.json()["detail"] == "Eval not found. ID: non_existent"
assert response.json()["message"] == "Eval not found. ID: non_existent"
mock_eval_from_id.assert_called_once_with("project1", "task1", "non_existent")


Expand Down Expand Up @@ -1810,7 +1812,7 @@ async def test_set_current_eval_config_not_found(

# Verify the response
assert response.status_code == 400
assert response.json()["detail"] == "Eval config not found."
assert response.json()["message"] == "Eval config not found."


@pytest.mark.parametrize(
Expand Down Expand Up @@ -1901,7 +1903,7 @@ async def test_create_task_run_config_invalid_temperature_values(
},
)
assert response.status_code == 422
error_detail = response.json()["detail"]
error_detail = response.json()["message"]
assert "temperature must be between 0 and 2" in str(error_detail)

# Test temperature above 2
Expand All @@ -1919,7 +1921,7 @@ async def test_create_task_run_config_invalid_temperature_values(
},
)
assert response.status_code == 422
error_detail = response.json()["detail"]
error_detail = response.json()["message"]
assert "temperature must be between 0 and 2" in str(error_detail)


Expand All @@ -1945,7 +1947,7 @@ async def test_create_task_run_config_invalid_top_p_values(
},
)
assert response.status_code == 422
error_detail = response.json()["detail"]
error_detail = response.json()["message"]
assert "top_p must be between 0 and 1" in str(error_detail)

# Test top_p above 1
Expand All @@ -1963,7 +1965,7 @@ async def test_create_task_run_config_invalid_top_p_values(
},
)
assert response.status_code == 422
error_detail = response.json()["detail"]
error_detail = response.json()["message"]
assert "top_p must be between 0 and 1" in str(error_detail)


Expand Down Expand Up @@ -2226,7 +2228,7 @@ def test_get_eval_configs_score_summary_no_filter_id(

assert response.status_code == 400
assert (
response.json()["detail"]
response.json()["message"]
== "No eval configs filter id set, cannot get eval configs score summary."
)
mock_eval_from_id.assert_called_once_with("project1", "task1", "eval1")
Expand Down
Loading
Loading