Skip to content

Feature: {mcptools} support #111

@zacdav-db

Description

@zacdav-db

e.g.

library(ellmer)
library(brickster)

# Helper function to extract metadata from databricks objects
extract_metadata <- function(obj, fields = NULL) {
  if (is.null(fields)) {
    fields <- intersect(
      names(obj),
      c(
        "id",
        "name",
        "owner",
        "creator_name",
        "created_by",
        "state",
        "size",
        "catalog_type",
        "full_name",
        "securable_type",
        "securable_kind",
        "updated_at",
        "created_at"
      )
    )
  }
  result <- list()
  for (field in fields) {
    if (field %in% names(obj)) {
      result[[field]] <- obj[[field]]
    }
  }
  result
}

# Helper function to serialize data to JSON
to_json_output <- function(data, auto_unbox = TRUE) {
  jsonlite::toJSON(data, auto_unbox = auto_unbox)
}


# Define a tool for LLM to query Databricks
db_query_tool <- ellmer::tool(
  .fun = function(sql, warehouse_id) {
    db_sql_query(statement = sql, warehouse_id = warehouse_id) |>
      to_json_output(auto_unbox = FALSE)
  },
  .description = "Execute SQL queries against Databricks SQL warehouses. This tool allows you to run SELECT statements, data analysis queries, and other SQL operations on Databricks compute resources.",
  sql = ellmer::type_string(
    "The SQL query to execute. Should be a valid SQL statement compatible with Databricks SQL."
  ),
  warehouse_id = ellmer::type_string(
    "The unique identifier of the Databricks SQL warehouse to run the query against."
  ),
  .annotations = ellmer::tool_annotations(
    title = "Databricks SQL Query Tool",
    read_only_hint = FALSE,
    open_world_hint = TRUE
  )
)

# Define a tool for LLM to discover available SQL warehouses
db_warehouse_discovery_tool <- ellmer::tool(
  .fun = function() {
    warehouses <- db_sql_warehouse_list()
    purrr::map(
      warehouses,
      ~ extract_metadata(.x, c("id", "name", "size", "creator_name", "state"))
    ) |>
      to_json_output()
  },
  .description = "Discover and list all available Databricks SQL warehouses in the workspace. This tool returns information about warehouse IDs, names, states, cluster sizes, and other metadata needed to select an appropriate warehouse for running SQL queries.",
  .annotations = ellmer::tool_annotations(
    title = "Databricks Warehouse Discovery Tool",
    read_only_hint = TRUE,
    open_world_hint = FALSE
  )
)

# Define a tool for LLM to list Unity Catalog catalogs
db_uc_catalogs_tool <- ellmer::tool(
  .fun = function(max_results = 50, include_browse = TRUE) {
    catalogs <- db_uc_catalogs_list(
      max_results = max_results,
      include_browse = include_browse
    )
    purrr::map(
      catalogs,
      ~ extract_metadata(.x, c("name", "owner", "created_by", "catalog_type"))
    ) |>
      to_json_output()
  },
  .description = "List all Unity Catalog catalogs available in the Databricks workspace. Returns catalog names, metadata, ownership information, and access permissions. Use this to discover the top-level data organization structure in Unity Catalog.",
  max_results = ellmer::type_integer(
    "Maximum number of catalogs to return. Default is 50."
  ),
  include_browse = ellmer::type_boolean(
    "Whether to include catalogs for which the principal can only access selective metadata. Default is TRUE."
  ),
  .annotations = ellmer::tool_annotations(
    title = "Unity Catalog Catalogs Explorer",
    read_only_hint = TRUE,
    open_world_hint = FALSE
  )
)

# Define a tool for LLM to list Unity Catalog schemas
db_uc_schemas_tool <- ellmer::tool(
  .fun = function(catalog, max_results = 50) {
    schemas <- db_uc_schemas_list(catalog = catalog, max_results = max_results)
    purrr::map(
      schemas,
      ~ extract_metadata(.x, c("full_name", "name", "owner", "created_by"))
    ) |>
      to_json_output()
  },
  .description = "List all schemas within a specific Unity Catalog catalog. Returns schema names, metadata, ownership information, and access permissions. Use this after discovering catalogs to explore the schema-level organization within a catalog.",
  catalog = ellmer::type_string(
    "The name of the parent catalog to list schemas from."
  ),
  max_results = ellmer::type_integer(
    "Maximum number of schemas to return. Default is 50."
  ),
  .annotations = ellmer::tool_annotations(
    title = "Unity Catalog Schemas Explorer",
    read_only_hint = TRUE,
    open_world_hint = FALSE
  )
)

# Define a tool for LLM to list Unity Catalog tables and their contents
db_uc_tables_tool <- ellmer::tool(
  .fun = function(
    catalog,
    schema,
    max_results = 50,
    omit_columns = FALSE,
    include_delta_metadata = FALSE
  ) {
    tables <- db_uc_tables_list(
      catalog = catalog,
      schema = schema,
      max_results = max_results,
      omit_columns = omit_columns,
      include_delta_metadata = include_delta_metadata
    )
    purrr::map(tables, function(table) {
      metadata <- extract_metadata(
        table,
        c(
          "full_name",
          "securable_type",
          "securable_kind",
          "owner",
          "created_by",
          "updated_at",
          "created_at"
        )
      )
      if (!omit_columns && !is.null(table$columns)) {
        metadata$columns <- purrr::map(
          table$columns,
          ~ list(name = .x$name, type_text = .x$type_text)
        )
      }
      metadata
    }) |>
      to_json_output()
  },
  .description = "List all tables, views, and other objects within a specific Unity Catalog schema. Returns table names, types, column information (if requested), metadata, and ownership details. Use this after discovering catalogs and schemas to explore the actual data assets.",
  catalog = ellmer::type_string(
    "The name of the parent catalog containing the schema."
  ),
  schema = ellmer::type_string("The name of the schema to list tables from."),
  max_results = ellmer::type_integer(
    "Maximum number of tables to return. Default is 50, maximum allowed is 50."
  ),
  omit_columns = ellmer::type_boolean(
    "Whether to omit column information from the response. Default is FALSE to include column details."
  ),
  include_delta_metadata = ellmer::type_boolean(
    "Whether to include Delta Lake metadata in the response. Default is FALSE."
  ),
  .annotations = ellmer::tool_annotations(
    title = "Unity Catalog Tables Explorer",
    read_only_hint = TRUE,
    open_world_hint = FALSE
  )
)

mcptools::mcp_server(
  tools = list(
    db_query_tool,
    db_uc_catalogs_tool,
    db_uc_schemas_tool,
    db_uc_tables_tool,
    db_warehouse_discovery_tool
  )
)

Metadata

Metadata

Assignees

Labels

enhancementNew feature or request

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions