diff --git a/app/desktop/studio_server/finetune_api.py b/app/desktop/studio_server/finetune_api.py index aa5b69016..eb8cb35b2 100644 --- a/app/desktop/studio_server/finetune_api.py +++ b/app/desktop/studio_server/finetune_api.py @@ -5,6 +5,10 @@ import httpx from fastapi import FastAPI, HTTPException, Query from fastapi.responses import StreamingResponse +from kiln_ai.adapters.adapter_registry import ( + load_skills_for_task, + load_skills_from_tool_ids, +) from kiln_ai.adapters.fine_tune.base_finetune import FineTuneParameter, FineTuneStatus from kiln_ai.adapters.fine_tune.dataset_formatter import ( DatasetFormat, @@ -18,10 +22,6 @@ ModelProviderName, built_in_models, ) -from kiln_ai.adapters.adapter_registry import ( - load_skills_for_task, - load_skills_from_tool_ids, -) from kiln_ai.adapters.prompt_builders import ( chain_of_thought_prompt, prompt_builder_from_id, @@ -200,7 +200,8 @@ def compute_finetune_tag_info( high_quality_count: Dict[str, int] = {} reasoning_and_high_quality_count: Dict[str, int] = {} - required_tools_set = set(tool_filter) if tool_filter else None + # None means no filter; [] means explicitly match runs with no tools/skills. + required_tools_set = None if tool_filter is None else set(tool_filter) for sample in task.runs(readonly=True): # filter by tools if provided @@ -376,7 +377,14 @@ async def finetune_dataset_info( project_id: str, task_id: str, tool_ids: Annotated[list[str] | None, Query()] = None, + empty_tool_filter: bool = False, ) -> FinetuneDatasetInfo: + # In the fine-tune UI, "no tools/skills selected" should mean `tool_ids=[]`, + # but `openapi-fetch` omits empty arrays, so we recover that state from + # `empty_tool_filter=true`. + if empty_tool_filter and tool_ids is None: + tool_ids = [] + task = task_from_id(project_id, task_id) # Only include datasets that is part of a finetune. # Orphan datasets are created when user creates a dataset but didn't create a finetune. @@ -394,13 +402,20 @@ async def finetune_dataset_info( eligible_finetune_tags = compute_finetune_tag_info(task, tool_filter=tool_ids) eligible_datasets = existing_datasets - if tool_ids: + # Only filter datasets when the caller provided a tool/skill selection. + # `tool_ids=[]` is a real filter meaning "match datasets with no tools/skills". + if tool_ids is not None: required_tools_set = set(tool_ids) - eligible_datasets = [ - dataset - for dataset in existing_datasets - if set(dataset.tool_info().tools) == required_tools_set - ] + eligible_datasets = [] + for dataset in existing_datasets: + tool_info = dataset.tool_info() + # Reusable datasets must have a uniform tool/skill set. + # `tool_info.tools=None` means the dataset mixes different tool/skill selections. + if ( + tool_info.tools is not None + and set(tool_info.tools) == required_tools_set + ): + eligible_datasets.append(dataset) return FinetuneDatasetInfo( existing_datasets=existing_datasets, @@ -528,6 +543,11 @@ async def download_dataset_jsonl( ) tool_info = dataset.tool_info() + if tool_info.tools is None: + raise HTTPException( + status_code=400, + detail="Dataset contains mixed tool/skill selections and cannot be exported", + ) skills_dict = load_skills_from_tool_ids(task, tool_info.tools) skills = list(skills_dict.values()) diff --git a/app/desktop/studio_server/test_finetune_api.py b/app/desktop/studio_server/test_finetune_api.py index 5e7632745..92fb502e6 100644 --- a/app/desktop/studio_server/test_finetune_api.py +++ b/app/desktop/studio_server/test_finetune_api.py @@ -180,6 +180,95 @@ def client(): return TestClient(app) +@pytest.fixture +def empty_task(tmp_path): + project = Project(name="Test Project", path=str(tmp_path / "project.kiln")) + project.save_to_file() + + task = Task( + name="Test Task", + instruction="This is a test instruction", + description="This is a test task", + parent=project, + ) + task.save_to_file() + return task + + +def create_synthetic_run( + task: Task, + *, + run_id: str, + name: str, + output_text: str, + created_by: str, + tags: list[str] | None = None, + tool_ids: list[str] | None = None, +) -> TaskRun: + output_source: dict[str, object] = { + "type": "synthetic", + "properties": { + "model_name": "gpt-4", + "model_provider": "openai", + "adapter_name": "test", + "prompt_id": "simple_prompt_builder", + }, + } + if tool_ids is not None: + output_source["run_config"] = KilnAgentRunConfigProperties( + model_name="gpt-4", + model_provider_name="openai", + prompt_id="simple_prompt_builder", + structured_output_mode="default", + tools_config=ToolsRunConfig(tools=tool_ids), + ) + + run = TaskRun( + id=run_id, + name=name, + parent=task, + tags=tags or ["fine_tune_tools"], + input=f"Test input {run_id}", + input_source={"type": "human", "properties": {"created_by": created_by}}, + output=TaskOutput( + output=output_text, + source=output_source, + ), + ) + run.save_to_file() + return run + + +def create_dataset_split( + task: Task, *, split_id: str, name: str, run_ids: list[str] +) -> DatasetSplit: + split = DatasetSplit( + id=split_id, + name=name, + split_contents={"train": run_ids}, + splits=AllSplitDefinition, + ) + split.parent = task + split.save_to_file() + return split + + +def create_finetune( + task: Task, *, finetune_id: str, name: str, split_id: str +) -> Finetune: + finetune = Finetune( + id=finetune_id, + name=name, + provider="openai", + base_model_id="model1", + dataset_split_id=split_id, + system_message="System prompt", + ) + finetune.parent = task + finetune.save_to_file() + return finetune + + def test_finetune_provider_model_defaults(): model = FinetuneProviderModel( name="Test Provider", @@ -1004,6 +1093,46 @@ def test_download_dataset_jsonl_invalid_split( ) +def test_download_dataset_jsonl_rejects_mismatched_tool_dataset( + client, mock_task_from_id_disk_backed, valid_download_params, empty_task +): + create_synthetic_run( + empty_task, + run_id="run_with_skill_a", + name="Run With Skill A", + output_text="Test output with skill A", + created_by="user1", + tool_ids=["kiln_tool::skill::skill_a"], + ) + create_synthetic_run( + empty_task, + run_id="run_with_skill_b", + name="Run With Skill B", + output_text="Test output with skill B", + created_by="user2", + tool_ids=["kiln_tool::skill::skill_b"], + ) + create_dataset_split( + empty_task, + split_id="split_mismatch", + name="Split Mismatch", + run_ids=["run_with_skill_a", "run_with_skill_b"], + ) + mock_task_from_id_disk_backed.return_value = empty_task + + valid_download_params["dataset_id"] = "split_mismatch" + response = client.get( + "/api/download_dataset_jsonl", + params=valid_download_params, + ) + + assert response.status_code == 400 + assert ( + response.json()["message"] + == "Dataset contains mixed tool/skill selections and cannot be exported" + ) + + def test_download_dataset_jsonl_with_prompt_builder( client, mock_task_from_id_disk_backed, @@ -1750,120 +1879,45 @@ def test_finetune_dataset_info_no_datasets_or_finetunes( mock_task_from_id_disk_backed.assert_called_once_with("project1", "task1") -@pytest.fixture -def task_with_tools(tmp_path): - project_path = tmp_path / "project.kiln" - project = Project(name="Test Project", path=str(project_path)) - project.save_to_file() - - task = Task( - name="Test Task", - instruction="This is a test instruction", - description="This is a test task", - parent=project, - ) - task.save_to_file() - - runs = [ - TaskRun( - id="run_with_tool_a", - name="Run with tool A", - parent=task, - tags=["fine_tune_tools"], - input="Test input with tool A", - input_source={"type": "human", "properties": {"created_by": "user1"}}, - output=TaskOutput( - output="Test output A", - source={ - "type": "synthetic", - "properties": { - "model_name": "gpt-4", - "model_provider": "openai", - "adapter_name": "test", - "prompt_id": "simple_prompt_builder", - }, - "run_config": KilnAgentRunConfigProperties( - model_name="gpt-4", - model_provider_name="openai", - prompt_id="simple_prompt_builder", - structured_output_mode="default", - tools_config=ToolsRunConfig( - tools=["mcp::remote::server_a::tool_a"] - ), - ), - }, - ), - ), - TaskRun( - id="run_with_tool_a_b", - name="Run with tool A and B", - parent=task, - tags=["fine_tune_tools"], - input="Test input with tool A and B", - input_source={"type": "human", "properties": {"created_by": "user2"}}, - output=TaskOutput( - output="Test output AB", - source={ - "type": "synthetic", - "properties": { - "model_name": "gpt-4", - "model_provider": "openai", - "adapter_name": "test", - "prompt_id": "simple_prompt_builder", - }, - "run_config": KilnAgentRunConfigProperties( - model_name="gpt-4", - model_provider_name="openai", - prompt_id="simple_prompt_builder", - structured_output_mode="default", - tools_config=ToolsRunConfig( - tools=[ - "mcp::remote::server_a::tool_a", - "mcp::remote::server_b::tool_b", - ] - ), - ), - }, - ), - ), - TaskRun( - id="run_no_tools", - name="Run without tools", - parent=task, - tags=["fine_tune_tools"], - input="Test input no tools", - input_source={"type": "human", "properties": {"created_by": "user3"}}, - output=TaskOutput( - output="Test output no tools", - source={ - "type": "synthetic", - "properties": { - "model_name": "gpt-4", - "model_provider": "openai", - "adapter_name": "test", - "prompt_id": "simple_prompt_builder", - }, - }, - ), - ), - ] - for run in runs: - run.save_to_file() - - return task - - @pytest.mark.parametrize( "tool_filter, expected_count", [ (None, 3), + ([], 1), (["mcp::remote::server_a::tool_a"], 1), (["mcp::remote::server_a::tool_a", "mcp::remote::server_b::tool_b"], 1), (["mcp::remote::server_x::tool_x"], 0), ], ) -def test_compute_finetune_tag_info(task_with_tools, tool_filter, expected_count): - result = compute_finetune_tag_info(task_with_tools, tool_filter=tool_filter) +def test_compute_finetune_tag_info(empty_task, tool_filter, expected_count): + create_synthetic_run( + empty_task, + run_id="run_with_tool_a", + name="Run with tool A", + output_text="Test output A", + created_by="user1", + tool_ids=["mcp::remote::server_a::tool_a"], + ) + create_synthetic_run( + empty_task, + run_id="run_with_tool_a_b", + name="Run with tool A and B", + output_text="Test output AB", + created_by="user2", + tool_ids=[ + "mcp::remote::server_a::tool_a", + "mcp::remote::server_b::tool_b", + ], + ) + create_synthetic_run( + empty_task, + run_id="run_no_tools", + name="Run without tools", + output_text="Test output no tools", + created_by="user3", + ) + + result = compute_finetune_tag_info(empty_task, tool_filter=tool_filter) if expected_count == 0: assert len(result) == 0 @@ -1873,6 +1927,149 @@ def test_compute_finetune_tag_info(task_with_tools, tool_filter, expected_count) assert result[0].count == expected_count +@pytest.mark.parametrize( + "tool_filter, expected_tag", + [ + ([], "fine_tune_without_skill"), + (["kiln_tool::skill::skill_a"], "fine_tune_with_skill"), + ], +) +def test_compute_finetune_tag_info_with_skill_filters( + empty_task, tool_filter, expected_tag +): + skill_a = "kiln_tool::skill::skill_a" + create_synthetic_run( + empty_task, + run_id="run_with_skill", + name="Run With Skill", + output_text="Test output with skill", + created_by="user1", + tags=["fine_tune_with_skill"], + tool_ids=[skill_a], + ) + create_synthetic_run( + empty_task, + run_id="run_without_skill", + name="Run Without Skill", + output_text="Test output without skill", + created_by="user2", + tags=["fine_tune_without_skill"], + ) + + result = compute_finetune_tag_info(empty_task, tool_filter=tool_filter) + + assert len(result) == 1 + assert result[0].tag == expected_tag + assert result[0].count == 1 + + +def test_finetune_dataset_info_filters_datasets_by_selected_tool( + client, + mock_task_from_id_disk_backed, + empty_task, +): + create_synthetic_run( + empty_task, + run_id="run_with_tool", + name="Run With Tool", + output_text="Test output with tool", + created_by="user1", + tool_ids=["mcp::remote::server_a::tool_a"], + ) + create_synthetic_run( + empty_task, + run_id="run_without_tool", + name="Run Without Tool", + output_text="Test output without tool", + created_by="user2", + ) + + create_dataset_split( + empty_task, + split_id="split_with_tool", + name="Split With Tool", + run_ids=["run_with_tool"], + ) + create_dataset_split( + empty_task, + split_id="split_without_tool", + name="Split Without Tool", + run_ids=["run_without_tool"], + ) + + create_finetune( + empty_task, + finetune_id="ft_with_tool", + name="ft_with_tool", + split_id="split_with_tool", + ) + create_finetune( + empty_task, + finetune_id="ft_without_tool", + name="ft_without_tool", + split_id="split_without_tool", + ) + + mock_task_from_id_disk_backed.return_value = empty_task + + response = client.get( + "/api/projects/project1/tasks/task1/finetune_dataset_info", + params={"tool_ids": ["mcp::remote::server_a::tool_a"]}, + ) + + assert response.status_code == 200 + data = response.json() + assert {ds["id"] for ds in data["eligible_datasets"]} == {"split_with_tool"} + assert len(data["eligible_finetune_tags"]) == 1 + assert data["eligible_finetune_tags"][0]["tag"] == "fine_tune_tools" + assert data["eligible_finetune_tags"][0]["count"] == 1 + + +def test_finetune_dataset_info_empty_tool_filter_excludes_mismatched_datasets( + client, + mock_task_from_id_disk_backed, + empty_task, +): + for run_id, skill_id in [ + ("run_with_skill_a", "kiln_tool::skill::skill_a"), + ("run_with_skill_b", "kiln_tool::skill::skill_b"), + ]: + create_synthetic_run( + empty_task, + run_id=run_id, + name=run_id, + output_text=f"Test output {run_id}", + created_by="user1", + tool_ids=[skill_id], + ) + + create_dataset_split( + empty_task, + split_id="split_mismatch", + name="Split Mismatch", + run_ids=["run_with_skill_a", "run_with_skill_b"], + ) + create_finetune( + empty_task, + finetune_id="ft_mismatch", + name="Finetune Mismatch", + split_id="split_mismatch", + ) + + mock_task_from_id_disk_backed.return_value = empty_task + + # Mixed-skill datasets should not appear reusable when the fine-tune UI is + # filtering for no tools and no skills. + response = client.get( + "/api/projects/project1/tasks/task1/finetune_dataset_info", + params={"empty_tool_filter": True}, + ) + + assert response.status_code == 200 + data = response.json() + assert data["eligible_datasets"] == [] + + def test_system_message_from_request_with_skills(tmp_path): project = Project(name="Test Project", path=str(tmp_path / "project.kiln")) project.save_to_file() diff --git a/app/web_ui/src/lib/api_schema.d.ts b/app/web_ui/src/lib/api_schema.d.ts index e9c4bdd46..bbb643d4e 100644 --- a/app/web_ui/src/lib/api_schema.d.ts +++ b/app/web_ui/src/lib/api_schema.d.ts @@ -11182,6 +11182,7 @@ export interface operations { parameters: { query?: { tool_ids?: string[] | null; + empty_tool_filter?: boolean; }; header?: never; path: { diff --git a/app/web_ui/src/lib/ui/run_config_component/skills_selector.svelte b/app/web_ui/src/lib/ui/run_config_component/skills_selector.svelte index 88951a156..571989070 100644 --- a/app/web_ui/src/lib/ui/run_config_component/skills_selector.svelte +++ b/app/web_ui/src/lib/ui/run_config_component/skills_selector.svelte @@ -27,6 +27,7 @@ optional: true, } $: resolved = { ...default_settings, ...settings } + $: has_explicit_mandatory_skills = settings.mandatory_skills !== undefined const CREATE_NEW_SKILL = "__create_new_skill__" @@ -38,6 +39,18 @@ $: load_skills(project_id, task_id) + // When fine-tuning locks skills to an explicit empty set, clear any stale + // persisted selections so the bound value matches the disabled UI state. + $: if ( + resolved.disabled && + has_explicit_mandatory_skills && + Array.isArray(resolved.mandatory_skills) && + resolved.mandatory_skills.length === 0 && + skills.length > 0 + ) { + skills = [] + } + async function load_skills(project_id: string, task_id: string | null) { load_available_tools(project_id) diff --git a/app/web_ui/src/lib/ui/run_config_component/tools_selector.svelte b/app/web_ui/src/lib/ui/run_config_component/tools_selector.svelte index 5e9971c1e..46c6e0945 100644 --- a/app/web_ui/src/lib/ui/run_config_component/tools_selector.svelte +++ b/app/web_ui/src/lib/ui/run_config_component/tools_selector.svelte @@ -34,6 +34,7 @@ ...default_tools_selector_settings, ...settings, } + $: has_explicit_mandatory_tools = settings.mandatory_tools !== undefined onMount(async () => { await load_tools(project_id, task_id) @@ -42,6 +43,18 @@ // Load tools if project_id or task_id changes $: load_tools(project_id, task_id) + // When fine-tuning locks tools to an explicit empty set, clear any stale + // persisted selections so the bound value matches the disabled UI state. + $: if ( + tools_selector_settings.disabled && + has_explicit_mandatory_tools && + Array.isArray(tools_selector_settings.mandatory_tools) && + tools_selector_settings.mandatory_tools.length === 0 && + tools.length > 0 + ) { + tools = [] + } + function is_tool_available(tool_id: string, project_id: string): boolean { const available = $available_tools[project_id] if (!available) return false diff --git a/app/web_ui/src/routes/(app)/dataset/[project_id]/[task_id]/add_data/+page.svelte b/app/web_ui/src/routes/(app)/dataset/[project_id]/[task_id]/add_data/+page.svelte index 47cb3ed12..15a55b366 100644 --- a/app/web_ui/src/routes/(app)/dataset/[project_id]/[task_id]/add_data/+page.svelte +++ b/app/web_ui/src/routes/(app)/dataset/[project_id]/[task_id]/add_data/+page.svelte @@ -139,8 +139,14 @@ if (tool_id) params.set("tool_id", tool_id) const splits_param = $page.url.searchParams.get("splits") if (splits_param) params.set("splits", splits_param) - const fine_tuning_tools = $page.url.searchParams.get("fine_tuning_tools") - if (fine_tuning_tools) params.set("fine_tuning_tools", fine_tuning_tools) + // Preserve an inherited empty fine-tuning tool/skill set when routing into SDG. + // `fine_tuning_tools=` means "locked to no tools/skills", not "param missing". + if ($page.url.searchParams.has("fine_tuning_tools")) { + params.set( + "fine_tuning_tools", + $page.url.searchParams.get("fine_tuning_tools") ?? "", + ) + } const query_string = params.toString() const url = `/generate/${$page.params.project_id}/${$page.params.task_id}?${query_string}` diff --git a/app/web_ui/src/routes/(app)/fine_tune/[project_id]/[task_id]/create_finetune/+page.svelte b/app/web_ui/src/routes/(app)/fine_tune/[project_id]/[task_id]/create_finetune/+page.svelte index 3db7d26c0..75881cd51 100644 --- a/app/web_ui/src/routes/(app)/fine_tune/[project_id]/[task_id]/create_finetune/+page.svelte +++ b/app/web_ui/src/routes/(app)/fine_tune/[project_id]/[task_id]/create_finetune/+page.svelte @@ -857,10 +857,7 @@ 0 - ? [...selected_tool_ids, ...selected_skill_ids] - : undefined} + required_tool_ids={[...selected_tool_ids, ...selected_skill_ids]} {saved_dataset_id} bind:selected_dataset /> diff --git a/app/web_ui/src/routes/(app)/fine_tune/[project_id]/[task_id]/create_finetune/select_finetune_dataset.svelte b/app/web_ui/src/routes/(app)/fine_tune/[project_id]/[task_id]/create_finetune/select_finetune_dataset.svelte index 441bebbe0..17f35f4e8 100644 --- a/app/web_ui/src/routes/(app)/fine_tune/[project_id]/[task_id]/create_finetune/select_finetune_dataset.svelte +++ b/app/web_ui/src/routes/(app)/fine_tune/[project_id]/[task_id]/create_finetune/select_finetune_dataset.svelte @@ -83,10 +83,14 @@ task_id, }, query: - required_tool_ids && required_tool_ids.length > 0 - ? { - tool_ids: required_tool_ids, - } + required_tool_ids !== undefined + ? required_tool_ids.length > 0 + ? { + tool_ids: required_tool_ids, + } + : { + empty_tool_filter: true, + } : undefined, }, }, @@ -281,7 +285,7 @@ splits: "fine_tune_data:1.0", finetune_link: `/fine_tune/${project_id}/${task_id}/create_finetune`, }) - if (required_tool_ids && required_tool_ids.length > 0) { + if (required_tool_ids !== undefined) { params.set("fine_tuning_tools", required_tool_ids.join(",")) } let link = `/dataset/${project_id}/${task_id}/add_data?${params.toString()}` diff --git a/app/web_ui/src/routes/(app)/generate/[project_id]/[task_id]/synth/+page.svelte b/app/web_ui/src/routes/(app)/generate/[project_id]/[task_id]/synth/+page.svelte index fe6793a70..1299ad842 100644 --- a/app/web_ui/src/routes/(app)/generate/[project_id]/[task_id]/synth/+page.svelte +++ b/app/web_ui/src/routes/(app)/generate/[project_id]/[task_id]/synth/+page.svelte @@ -226,11 +226,19 @@ tool_id_param && tool_id_param.length > 0 ? tool_id_param : null const splitsParam = $page.url.searchParams.get("splits") const splits = get_splits_from_url_param(splitsParam) + // Distinguish "no inherited fine-tuning tools" from an inherited empty set. + // `fine_tuning_tools=` should round-trip as [] so SDG can lock to no tools/skills. + const has_fine_tuning_tools = + $page.url.searchParams.has("fine_tuning_tools") const fine_tuning_tools_param = $page.url.searchParams.get("fine_tuning_tools") - const fine_tuning_tools_list: string[] | null = fine_tuning_tools_param - ? fine_tuning_tools_param.split(",").filter((t) => t.length > 0) + const fine_tuning_tools_list: string[] | null = has_fine_tuning_tools + ? fine_tuning_tools_param?.split(",").filter((t) => t.length > 0) ?? [] : null + const fine_tuning_tools_key = + fine_tuning_tools_list === null + ? null + : fine_tuning_tools_list.join(",") const has_saved_state = $saved_state.gen_type !== null if (!has_saved_state) { @@ -252,7 +260,11 @@ $saved_state.gen_type === gen_type && $saved_state.template_id === template_id && $saved_state.eval_id === eval_id && - $saved_state.tool_id === tool_id + $saved_state.tool_id === tool_id && + ($saved_state.fine_tuning_tools === null + ? null + : $saved_state.fine_tuning_tools.join(",")) === + fine_tuning_tools_key ) { // Case 2: URL state matches saved state: load the saved state setup( @@ -741,19 +753,25 @@ let mandatory_tools: string[] | null = null let mandatory_skills: string[] | null = null + let fine_tuning_tools_locked = false + // Fine-tuning-derived tool/skill requirements are mandatory in SDG even when empty. + // An inherited empty set means "generate without tools/skills", not "unlocked". $: { const ft = $saved_state.fine_tuning_tools if ($saved_state.tool_id) { mandatory_tools = [$saved_state.tool_id] mandatory_skills = null - } else if (ft && ft.length > 0) { + fine_tuning_tools_locked = true + } else if (ft !== null) { const { tool_ids, skill_ids } = split_tool_and_skill_ids(ft) - mandatory_tools = tool_ids.length > 0 ? tool_ids : null - mandatory_skills = skill_ids.length > 0 ? skill_ids : null + mandatory_tools = tool_ids + mandatory_skills = skill_ids + fine_tuning_tools_locked = true } else { mandatory_tools = null mandatory_skills = null + fine_tuning_tools_locked = false } } @@ -1190,6 +1208,7 @@ {#if task} + DatasetToolInfo: elif run_tools != tools: # Mismatch found has_tool_mismatch = True - tools = set() + tools = None break # If no valid runs were processed, return empty tools if tools is None: - tools = set() + if not has_tool_mismatch: + tools = set() - return DatasetToolInfo(has_tool_mismatch=has_tool_mismatch, tools=sorted(tools)) + return DatasetToolInfo( + has_tool_mismatch=has_tool_mismatch, + tools=None if tools is None else sorted(tools), + ) def tool_info(self) -> DatasetToolInfo: """ diff --git a/libs/core/kiln_ai/datamodel/test_dataset_split.py b/libs/core/kiln_ai/datamodel/test_dataset_split.py index b89631b15..a540625fc 100644 --- a/libs/core/kiln_ai/datamodel/test_dataset_split.py +++ b/libs/core/kiln_ai/datamodel/test_dataset_split.py @@ -410,12 +410,12 @@ def _create_mock_run(tools): ( [["kiln_tool::add_numbers"], ["kiln_tool::multiply_numbers"]], True, - [], + None, ), # both runs have no tools ([None, None], False, []), # one run has tools, the other has none - ([["kiln_tool::add_numbers"], None], True, []), + ([["kiln_tool::add_numbers"], None], True, None), ([], False, []), ([["kiln_tool::add_numbers"]], False, ["kiln_tool::add_numbers"]), ([None], False, []), @@ -454,4 +454,4 @@ def test_compute_tool_info_treats_missing_config_as_empty_tools(mock_run_with_to [run_no_source, run_with_tools, run_no_config] ) assert tool_info.has_tool_mismatch is True - assert tool_info.tools == [] + assert tool_info.tools is None