From 2ebdefaa70d735ecd1d55162f9893b949db4ba3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cchrisraygill=E2=80=9D?= Date: Thu, 7 Aug 2025 12:16:34 -0400 Subject: [PATCH 1/9] Add scratch/ directory to .gitignore Ignore scratch folder used for temporary files and planning to keep the repository clean of development artifacts. --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 9bc0fe46..838740a5 100644 --- a/.gitignore +++ b/.gitignore @@ -28,3 +28,6 @@ public/docs-bundle-experimental.json # testing folder for code samples and experiments testing/ + +# scratch folder for temporary files and planning +scratch/ From 059beeb8d454a9117a296ff39be37546abe3059f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cchrisraygill=E2=80=9D?= Date: Thu, 7 Aug 2025 15:02:41 -0400 Subject: [PATCH 2/9] Add unified documentation and language preference enhancement - Add comprehensive unified docs for flows, content generation, RAG, evaluation, dotprompt, and tool calling - Integrate language preference script for enhanced user experience - Update sidebar configuration to include new documentation sections --- src/content/custom/head.astro | 3 + .../docs/unified-docs/creating-flows.mdx | 908 +++++++++++++ src/content/docs/unified-docs/dotprompt.mdx | 1149 +++++++++++++++++ src/content/docs/unified-docs/evaluation.mdx | 909 +++++++++++++ .../docs/unified-docs/generating-content.mdx | 1084 ++++++++++++++++ src/content/docs/unified-docs/rag.mdx | 939 ++++++++++++++ .../docs/unified-docs/tool-calling.mdx | 794 ++++++++++++ src/scripts/language-preference.js | 157 +++ src/sidebar.ts | 16 +- 9 files changed, 5958 insertions(+), 1 deletion(-) create mode 100644 src/content/docs/unified-docs/creating-flows.mdx create mode 100644 src/content/docs/unified-docs/dotprompt.mdx create mode 100644 src/content/docs/unified-docs/evaluation.mdx create mode 100644 src/content/docs/unified-docs/generating-content.mdx create mode 100644 src/content/docs/unified-docs/rag.mdx create mode 100644 src/content/docs/unified-docs/tool-calling.mdx create mode 100644 src/scripts/language-preference.js diff --git a/src/content/custom/head.astro b/src/content/custom/head.astro index 4013aa93..a39be5fd 100644 --- a/src/content/custom/head.astro +++ b/src/content/custom/head.astro @@ -18,3 +18,6 @@ if (id.startsWith('docs/')) { --- {finalTitle} {head.filter(({ tag }) => tag !== 'title').map(({ tag: Tag, attrs, content }) => )} + + + diff --git a/src/content/docs/unified-docs/creating-flows.mdx b/src/content/docs/unified-docs/creating-flows.mdx new file mode 100644 index 00000000..5e6948dd --- /dev/null +++ b/src/content/docs/unified-docs/creating-flows.mdx @@ -0,0 +1,908 @@ +--- +title: Defining AI workflows +description: Learn how to define and manage AI workflows in Genkit using flows across JavaScript, Go, and Python, which provide type safety, integration with the developer UI, and simplified deployment. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +The core of your app's AI features are generative model requests, but it's rare +that you can simply take user input, pass it to the model, and display the model +output back to the user. Usually, there are pre- and post-processing steps that +must accompany the model call. For example: + +- Retrieving contextual information to send with the model call +- Retrieving the history of the user's current session, for example in a chat + app +- Using one model to reformat the user input in a way that's suitable to pass + to another model +- Evaluating the "safety" of a model's output before presenting it to the user +- Combining the output of several models + +Every step of this workflow must work together for any AI-related task to +succeed. + +In Genkit, you represent this tightly-linked logic using a construction called a +flow. Flows are written just like functions, using ordinary code, but +they add additional capabilities intended to ease the development of AI +features: + +- **Type safety**: Input and output schemas with runtime type checking +- **Integration with developer UI**: Debug flows independently of your + application code using the developer UI. In the developer UI, you can run + flows and view traces for each step of the flow. +- **Simplified deployment**: Deploy flows directly as web API endpoints, using + Cloud Functions for Firebase or any platform that can host a web app. + +Unlike similar features in other frameworks, Genkit's flows are lightweight and +unobtrusive, and don't force your app to conform to any specific abstraction. +All of the flow's logic is written in standard code, and code inside a +flow doesn't need to be flow-aware. + +## Defining and calling flows + +In its simplest form, a flow just wraps a function. The following example wraps +a function that calls `generate()`: + + + + ```typescript + export const menuSuggestionFlow = ai.defineFlow( + { + name: 'menuSuggestionFlow', + inputSchema: z.object({ theme: z.string() }), + outputSchema: z.object({ menuItem: z.string() }), + }, + async ({ theme }) => { + const { text } = await ai.generate({ + model: googleAI.model('gemini-2.5-flash'), + prompt: `Invent a menu item for a ${theme} themed restaurant.`, + }); + return { menuItem: text }; + }, + ); + ``` + + + ```go + menuSuggestionFlow := genkit.DefineFlow(g, "menuSuggestionFlow", + func(ctx context.Context, theme string) (string, error) { + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("Invent a menu item for a %s themed restaurant.", theme), + ) + if err != nil { + return "", err + } + + return resp.Text(), nil + }) + ``` + + + ```python + @ai.flow() + async def menu_suggestion_flow(theme: str): + response = await ai.generate( + prompt=f'Invent a menu item for a {theme} themed restaurant.', + ) + return response.text + ``` + + + +Just by wrapping your `generate()` calls like this, you add some functionality: +doing so lets you run the flow from the Genkit CLI and from the developer UI, +and is a requirement for several of Genkit's features, including deployment and +observability (later sections discuss these topics). + +### Input and output schemas + +One of the most important advantages Genkit flows have over directly calling a +model API is type safety of both inputs and outputs. When defining flows, you +can define schemas for them, in much the same way as you define the +output schema of a `generate()` call; however, unlike with `generate()`, you can +also specify an input schema. + + + + While it's not mandatory to wrap your input and output schemas in `z.object()`, it's considered best practice for these reasons: + + - **Better developer experience**: Wrapping schemas in objects provides a better experience in the Developer UI by giving you labeled input fields. + - **Future-proof API design**: Object-based schemas allow for easy extensibility in the future. You can add new fields to your input or output schemas without breaking existing clients, which is a core principle of robust API design. + + Here's a refinement of the last example, which defines a flow that takes a + string as input and outputs an object: + + ```typescript + import { z } from 'genkit'; + + const MenuItemSchema = z.object({ + dishname: z.string(), + description: z.string(), + }); + + export const menuSuggestionFlowWithSchema = ai.defineFlow( + { + name: 'menuSuggestionFlow', + inputSchema: z.object({ theme: z.string() }), + outputSchema: MenuItemSchema, + }, + async ({ theme }) => { + const { output } = await ai.generate({ + model: googleAI.model('gemini-2.5-flash'), + prompt: `Invent a menu item for a ${theme} themed restaurant.`, + output: { schema: MenuItemSchema }, + }); + if (output == null) { + throw new Error("Response doesn't satisfy schema."); + } + return output; + }, + ); + ``` + + Model output schemas are specified using the [Zod](https://zod.dev/) + library. In addition to a schema definition language, Zod also provides runtime + type checking, which bridges the gap between static TypeScript types and the + unpredictable output of generative AI models. + + + Here's a refinement of the last example, which defines a flow that takes a + string as input and outputs an object: + + ```go + type MenuItem struct { + Name string `json:"name"` + Description string `json:"description"` + } + + menuSuggestionFlow := genkit.DefineFlow(g, "menuSuggestionFlow", + func(ctx context.Context, theme string) (MenuItem, error) { + return genkit.GenerateData[MenuItem](ctx, g, + ai.WithPrompt("Invent a menu item for a %s themed restaurant.", theme), + ) + }) + ``` + + Model output types are specified as JSON schema using the + [`invopop/jsonschema`](https://github.com/invopop/jsonschema) package. This + provides runtime type checking, which bridges the gap between static Go types + and the unpredictable output of generative AI models. + + + Here's a refinement of the last example, which defines a flow that takes a + string as input and outputs an object: + + ```python + from pydantic import BaseModel + + class MenuItemSchema(BaseModel): + dishname: str + description: str + + @ai.flow() + async def menu_suggestion_flow(theme: str) -> MenuItemSchema: + response = await ai.generate( + prompt=f'Invent a menu item for a {theme} themed restaurant.', + output_schema=MenuItemSchema, + ) + return response.output + ``` + + Model output schemas are specified using [Pydantic Models](https://docs.pydantic.dev/latest/concepts/models/). In addition to a schema definition language, Pydantic also provides runtime + type checking, which bridges the gap between static Python types and the + unpredictable output of generative AI models. + + + +Note that the schema of a flow does not necessarily have to line up with the +schema of the `generate()` calls within the flow (in fact, a flow might not even +contain `generate()` calls). Here's a variation of the example that passes a +schema to `generate()`, but uses the structured output to format a simple +string, which the flow returns. + + + + ```typescript + export const menuSuggestionFlowMarkdown = ai.defineFlow( + { + name: 'menuSuggestionFlow', + inputSchema: z.object({ theme: z.string() }), + outputSchema: z.object({ formattedMenuItem: z.string() }), + }, + async ({ theme }) => { + const { output } = await ai.generate({ + model: googleAI.model('gemini-2.5-flash'), + prompt: `Invent a menu item for a ${theme} themed restaurant.`, + output: { schema: MenuItemSchema }, + }); + if (output == null) { + throw new Error("Response doesn't satisfy schema."); + } + return { + formattedMenuItem: `**${output.dishname}**: ${output.description}` + }; + }, + ); + ``` + + + ```go + type MenuItem struct { + Name string `json:"name"` + Description string `json:"description"` + } + + menuSuggestionMarkdownFlow := genkit.DefineFlow(g, "menuSuggestionMarkdownFlow", + func(ctx context.Context, theme string) (string, error) { + item, _, err := genkit.GenerateData[MenuItem](ctx, g, + ai.WithPrompt("Invent a menu item for a %s themed restaurant.", theme), + ) + if err != nil { + return "", err + } + + return fmt.Sprintf("**%s**: %s", item.Name, item.Description), nil + }) + ``` + + + ```python + @ai.flow() + async def menu_suggestion_flow(theme: str) -> str: # Changed return type annotation + response = await ai.generate( + prompt=f'Invent a menu item for a {theme} themed restaurant.', + output_schema=MenuItemSchema, + ) + output: MenuItemSchema = response.output + return f'**{output.dishname}**: {output.description}' + ``` + + + +### Calling flows + +Once you've defined a flow, you can call it from your code: + + + + ```typescript + const { text } = await menuSuggestionFlow({ theme: 'bistro' }); + ``` + + The argument to the flow must conform to the input schema, if you defined one. + + If you defined an output schema, the flow response will conform to it. For + example, if you set the output schema to `MenuItemSchema`, the flow output will + contain its properties: + + ```typescript + const { dishname, description } = await menuSuggestionFlowWithSchema({ theme: 'bistro' }); + ``` + + + ```go + item, err := menuSuggestionFlow.Run(context.Background(), "bistro") + ``` + + The argument to the flow must conform to the input schema. + + If you defined an output schema, the flow response will conform to it. For + example, if you set the output schema to `MenuItem`, the flow output will + contain its properties: + + ```go + item, err := menuSuggestionFlow.Run(context.Background(), "bistro") + if err != nil { + log.Fatal(err) + } + + log.Println(item.Name) + log.Println(item.Description) + ``` + + + ```python + response = await menu_suggestion_flow('bistro') + ``` + + The argument to the flow must conform to the input schema, if you defined one. + + If you defined an output schema, the flow response will conform to it. For + example, if you set the output schema to `MenuItemSchema`, the flow output will + contain its properties. + + + +## Streaming flows + +Flows support streaming using an interface similar to `generate()`'s streaming +interface. Streaming is useful when your flow generates a large amount of +output, because you can present the output to the user as it's being generated, +which improves the perceived responsiveness of your app. As a familiar example, +chat-based LLM interfaces often stream their responses to the user as they are +generated. + +Here's an example of a flow that supports streaming: + + + + ```typescript + export const menuSuggestionStreamingFlow = ai.defineFlow( + { + name: 'menuSuggestionFlow', + inputSchema: z.object({ theme: z.string() }), + streamSchema: z.string(), + outputSchema: z.object({ theme: z.string(), menuItem: z.string() }), + }, + async ({ theme }, { sendChunk }) => { + const { stream, response } = ai.generateStream({ + model: googleAI.model('gemini-2.5-flash'), + prompt: `Invent a menu item for a ${theme} themed restaurant.`, + }); + + for await (const chunk of stream) { + // Here, you could process the chunk in some way before sending it to + // the output stream via sendChunk(). In this example, we output + // the text of the chunk, unmodified. + sendChunk(chunk.text); + } + + const { text: menuItem } = await response; + + return { + theme, + menuItem, + }; + }, + ); + ``` + + - The `streamSchema` option specifies the type of values your flow streams. + This does not necessarily need to be the same type as the `outputSchema`, + which is the type of the flow's complete output. + - The second parameter to your flow definition is called `sideChannel`. It + provides features such as request context and the `sendChunk` callback. + The `sendChunk` callback takes a single parameter, of + the type specified by `streamSchema`. Whenever data becomes available within + your flow, send the data to the output stream by calling this function. + + + ```go + type Menu struct { + Theme string `json:"theme"` + Items []MenuItem `json:"items"` + } + + type MenuItem struct { + Name string `json:"name"` + Description string `json:"description"` + } + + menuSuggestionFlow := genkit.DefineStreamingFlow(g, "menuSuggestionFlow", + func(ctx context.Context, theme string, callback core.StreamCallback[string]) (Menu, error) { + item, _, err := genkit.GenerateData[MenuItem](ctx, g, + ai.WithPrompt("Invent a menu item for a %s themed restaurant.", theme), + ai.WithStreaming(func(ctx context.Context, chunk *ai.ModelResponseChunk) error { + // Here, you could process the chunk in some way before sending it to + // the output stream using StreamCallback. In this example, we output + // the text of the chunk, unmodified. + return callback(ctx, chunk.Text()) + }), + ) + if err != nil { + return nil, err + } + + return Menu{ + Theme: theme, + Items: []MenuItem{item}, + }, nil + }) + ``` + + The `string` type in `StreamCallback[string]` specifies the type of + values your flow streams. This does not necessarily need to be the same + type as the return type, which is the type of the flow's complete output + (`Menu` in this example). + + + ```python + @ai.flow() + async def menu_suggestion_flow(theme: str, ctx): + stream, response = ai.generate_stream( + prompt=f'Invent a menu item for a {theme} themed restaurant.', + ) + + async for chunk in stream: + ctx.send_chunk(chunk.text) + + return { + 'theme': theme, + 'menu_item': (await response).text, + } + ``` + + The second parameter to your flow definition is called "side channel". It + provides features such as request context and the `send_chunk` callback. + The `send_chunk` callback takes a single parameter. Whenever data becomes + available within your flow, send the data to the output stream by calling + this function. + + + +In the above examples, the values streamed by the flow are directly coupled to +the values streamed by the `generate()` call inside the flow. Although this is +often the case, it doesn't have to be: you can output values to the stream using +the callback as often as is useful for your flow. + +### Calling streaming flows + +Streaming flows are also callable, but they immediately return a response object +rather than a promise: + + + + ```typescript + const response = menuSuggestionStreamingFlow.stream({ theme: 'Danube' }); + ``` + + The response object has a stream property, which you can use to iterate over the + streaming output of the flow as it's generated: + + ```typescript + for await (const chunk of response.stream) { + console.log('chunk', chunk); + } + ``` + + You can also get the complete output of the flow, as you can with a + non-streaming flow: + + ```typescript + const output = await response.output; + ``` + + Note that the streaming output of a flow might not be the same type as the + complete output; the streaming output conforms to `streamSchema`, whereas the + complete output conforms to `outputSchema`. + + + Streaming flows can be run like non-streaming flows with + `menuSuggestionFlow.Run(ctx, "bistro")` or they can be streamed: + + ```go + streamCh, err := menuSuggestionFlow.Stream(context.Background(), "bistro") + if err != nil { + log.Fatal(err) + } + + for result := range streamCh { + if result.Err != nil { + log.Fatal("Stream error: %v", result.Err) + } + if result.Done { + log.Printf("Menu with %s theme:\n", result.Output.Theme) + for item := range result.Output.Items { + log.Println(" - %s: %s", item.Name, item.Description) + } + } else { + log.Println("Stream chunk:", result.Stream) + } + } + ``` + + + ```python + stream, response = menu_suggestion_flow.stream('bistro') + async for chunk in stream: + print(chunk) + ``` + + You can also get the complete output of the flow, as you can with a + non-streaming flow. The final response is a future that you can `await` on. + + ```python + print(await response) + ``` + + Note that the streaming output of a flow might not be the same type as the + complete output. + + + +## Running flows from the command line + +You can run flows from the command line using the Genkit CLI tool. The command format varies slightly based on your flow's input schema: + +```bash +# For flows with object input (JavaScript style) +genkit flow:run menuSuggestionFlow '{"theme": "French"}' + +# For flows with simple input (Go/Python style) +genkit flow:run menuSuggestionFlow '"French"' + +# For streaming flows, add the -s flag +genkit flow:run menuSuggestionFlow '{"theme": "French"}' -s +``` + +The exact command depends on how you defined your flow's input schema: +- **Object input** (JavaScript `z.object({ theme: z.string() })`): Use `'{"theme": "French"}'` +- **Simple input** (Go `string`, Python `str`): Use `'"French"'` + +Running a flow from the command line is useful for testing a flow, or for +running flows that perform tasks needed on an ad hoc basis—for example, to +run a flow that ingests a document into your vector database. + +## Debugging flows + +One of the advantages of encapsulating AI logic within a flow is that you can +test and debug the flow independently from your app using the Genkit developer +UI. + +To start the developer UI, run the following commands from your project +directory: + + + + ```bash + genkit start -- tsx --watch src/your-code.ts + ``` + + + ```bash + genkit start -- go run . + ``` + + The developer UI relies on the Go app continuing to run, even if the logic has + completed. If you are just getting started and Genkit is not part of a broader + app, add `select {}` as the last line of `main()` to prevent the app from + shutting down so that you can inspect it in the UI. + + + ```bash + genkit start -- python app.py + ``` + + Update `python app.py` to match the way you normally run your app. + + + +From the **Run** tab of developer UI, you can run any of the flows defined in +your project: + +![Genkit DevUI flows](../../../assets/devui-flows.png) + +After you've run a flow, you can inspect a trace of the flow invocation by +either clicking **View trace** or looking on the **Inspect** tab. + +In the trace viewer, you can see details about the execution of the entire flow, +as well as details for each of the individual steps within the flow. For +example, consider the following flow, which contains several generation +requests: + + + + ```typescript + const PrixFixeMenuSchema = z.object({ + starter: z.string(), + soup: z.string(), + main: z.string(), + dessert: z.string(), + }); + + export const complexMenuSuggestionFlow = ai.defineFlow( + { + name: 'complexMenuSuggestionFlow', + inputSchema: z.object({ theme: z.string() }), + outputSchema: PrixFixeMenuSchema, + }, + async ({ theme }): Promise> => { + const chat = ai.chat({ model: googleAI.model('gemini-2.5-flash') }); + await chat.send('What makes a good prix fixe menu?'); + await chat.send( + 'What are some ingredients, seasonings, and cooking techniques that ' + `would work for a ${theme} themed menu?`, + ); + const { output } = await chat.send({ + prompt: `Based on our discussion, invent a prix fixe menu for a ${theme} ` + 'themed restaurant.', + output: { + schema: PrixFixeMenuSchema, + }, + }); + if (!output) { + throw new Error('No data generated.'); + } + return output; + }, + ); + ``` + + + ```go + type PrixFixeMenu struct { + Starter string `json:"starter"` + Soup string `json:"soup"` + Main string `json:"main"` + Dessert string `json:"dessert"` + } + + complexMenuSuggestionFlow := genkit.DefineFlow(g, "complexMenuSuggestionFlow", + func(ctx context.Context, theme string) (PrixFixeMenu, error) { + // Multiple generation requests would go here + // This is a simplified example + menu, _, err := genkit.GenerateData[PrixFixeMenu](ctx, g, + ai.WithPrompt("Create a prix fixe menu for a %s themed restaurant.", theme), + ) + return menu, err + }) + ``` + + + ```python + class PrixFixeMenuSchema(BaseModel): + starter: str + soup: str + main: str + dessert: str + + @ai.flow() + async def complex_menu_suggestion_flow(theme: str) -> PrixFixeMenuSchema: + # Multiple generation requests would go here + # This is a simplified example + response = await ai.generate( + prompt=f'Create a prix fixe menu for a {theme} themed restaurant.', + output_schema=PrixFixeMenuSchema, + ) + return response.output + ``` + + + +When you run this flow, the trace viewer shows you details about each generation +request including its output: + +![Genkit DevUI flows](../../../assets/devui-inspect.png) + +### Flow steps + +In the last example, you saw that each `generate()` call showed up as a separate +step in the trace viewer. Each of Genkit's fundamental actions show up as +separate steps of a flow: + +- `generate()` +- `Chat.send()` +- `embed()` +- `index()` +- `retrieve()` + +If you want to include code other than the above in your traces, you can do so +by wrapping the code in a `run()` call. You might do this for calls to +third-party libraries that are not Genkit-aware, or for any critical section of +code. + + + + For example, here's a flow with two steps: the first step retrieves a menu using + some unspecified method, and the second step includes the menu as context for a + `generate()` call. + + ```ts + export const menuQuestionFlow = ai.defineFlow( + { + name: 'menuQuestionFlow', + inputSchema: z.object({ question: z.string() }), + outputSchema: z.object({ answer: z.string() }), + }, + async ({ question }): Promise<{ answer: string }> => { + const menu = await ai.run('retrieve-daily-menu', async (): Promise => { + // Retrieve today's menu. (This could be a database access or simply + // fetching the menu from your website.) + + // ... + + return menu; + }); + const { text } = await ai.generate({ + model: googleAI.model('gemini-2.5-flash'), + system: "Help the user answer questions about today's menu.", + prompt: question, + docs: [{ content: [{ text: menu }] }], + }); + return { answer: text }; + }, + ); + ``` + + Because the retrieval step is wrapped in a `run()` call, it's included as a step + in the trace viewer: + + ![Genkit DevUI flows](../../../assets/devui-runstep.png) + + + ```go + // Similar run() functionality available in Go + // Check the Go documentation for specific syntax + ``` + + + ```python + # Similar run() functionality available in Python + # Check the Python documentation for specific syntax + ``` + + + +## Deploying flows + +You can deploy your flows directly as web API endpoints, ready for you to call +from your app clients. Deployment is discussed in detail on several other pages, +but this section gives brief overviews of your deployment options. + +### Cloud Functions for Firebase + + + + To deploy flows with Cloud Functions for Firebase, use the `onCallGenkit` + feature of `firebase-functions/https`. `onCallGenkit` wraps your flow in a + callable function. You may set an auth policy and configure App Check. + + ```typescript + import { hasClaim, onCallGenkit } from 'firebase-functions/https'; + import { defineSecret } from 'firebase-functions/params'; + + const apiKey = defineSecret('GOOGLE_AI_API_KEY'); + + const menuSuggestionFlow = ai.defineFlow( + { + name: 'menuSuggestionFlow', + inputSchema: z.object({ theme: z.string() }), + outputSchema: z.object({ menuItem: z.string() }), + }, + async ({ theme }) => { + // ... + return { menuItem: "Generated menu item would go here" }; + }, + ); + + export const menuSuggestion = onCallGenkit( + { + secrets: [apiKey], + authPolicy: hasClaim('email_verified'), + }, + menuSuggestionFlow, + ); + ``` + + For more information, see the following pages: + + - [Deploy with Firebase](/docs/firebase) + - [Authorization and integrity](/docs/auth#authorize-using-cloud-functions-for-firebase) + - [Firebase plugin](/docs/plugins/firebase) + + + Cloud Functions for Firebase support for Go is not currently available. Use the HTTP server deployment method instead. + + + Cloud Functions for Firebase support for Python is not currently available. Use the Flask/HTTP server deployment method instead. + + + +### HTTP Server Deployment + + + + To deploy flows using any Node.js hosting platform, such as Cloud Run, define + your flows using `defineFlow()` and then call `startFlowServer()`: + + ```typescript + import { startFlowServer } from '@genkit-ai/express'; + + export const menuSuggestionFlow = ai.defineFlow( + { + name: 'menuSuggestionFlow', + inputSchema: z.object({ theme: z.string() }), + outputSchema: z.object({ result: z.string() }), + }, + async ({ theme }) => { + // ... + }, + ); + + startFlowServer({ + flows: [menuSuggestionFlow], + }); + ``` + + By default, `startFlowServer` will serve all the flows defined in your codebase + as HTTP endpoints (for example, `http://localhost:3400/menuSuggestionFlow`). You + can call a flow with a POST request as follows: + + ```bash + curl -X POST "http://localhost:3400/menuSuggestionFlow" \ + -H "Content-Type: application/json" -d '{"data": {"theme": "banana"}}' + ``` + + For information on deploying to specific platforms, see + [Deploy with Cloud Run](/docs/cloud-run) and + [Deploy flows to any Node.js platform](/docs/deploy-node). + + + To deploy a flow using any Go hosting platform, such as Cloud Run, define + your flow using `genkit.DefineFlow()` and start a `net/http` server with the + provided flow handler using `genkit.Handler()`: + + ```go + package main + + import ( + "context" + "log" + "net/http" + + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/googlegenai" + "github.com/firebase/genkit/go/plugins/server" + ) + + type MenuItem struct { + Name string `json:"name"` + Description string `json:"description"` + } + + func main() { + ctx := context.Background() + + g, err := genkit.Init(ctx, genkit.WithPlugins(&googlegenai.GoogleAI{})) + if err != nil { + log.Fatal(err) + } + + menuSuggestionFlow := genkit.DefineFlow(g, "menuSuggestionFlow", + func(ctx context.Context, theme string) (MenuItem, error) { + item, _, err := genkit.GenerateData[MenuItem](ctx, g, + ai.WithPrompt("Invent a menu item for a %s themed restaurant.", theme), + ) + return item, err + }) + + mux := http.NewServeMux() + mux.HandleFunc("POST /menuSuggestionFlow", genkit.Handler(menuSuggestionFlow)) + log.Fatal(server.Start(ctx, "127.0.0.1:3400", mux)) + } + ``` + + You can call a flow endpoint with a POST request as follows: + + ```bash + curl -X POST "http://localhost:3400/menuSuggestionFlow" \ + -H "Content-Type: application/json" -d '{"data": "banana"}' + ``` + + For information on deploying to specific platforms, see + [Genkit with Cloud Run](/go/docs/cloud-run). + + + For Python deployment options, see: + - [Deploy with Cloud Run](/python/docs/cloud-run) + - [Deploy with Flask](/python/docs/flask) + + ```python + # Example Flask deployment + from flask import Flask + from genkit.ai import Genkit + + app = Flask(__name__) + ai = Genkit() + + @ai.flow() + async def menu_suggestion_flow(theme: str): + response = await ai.generate( + prompt=f'Invent a menu item for a {theme} themed restaurant.', + ) + return response.text + + # Flask route integration would go here + # See the Flask deployment guide for complete setup + ``` + + diff --git a/src/content/docs/unified-docs/dotprompt.mdx b/src/content/docs/unified-docs/dotprompt.mdx new file mode 100644 index 00000000..036fd15e --- /dev/null +++ b/src/content/docs/unified-docs/dotprompt.mdx @@ -0,0 +1,1149 @@ +--- +title: Managing prompts with Dotprompt +description: Learn how to use Dotprompt to manage prompts, models, and parameters for generative AI models across JavaScript and Go, with a streamlined approach to prompt engineering and iteration. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +Prompt engineering is the primary way that you, as an app developer, influence +the output of generative AI models. For example, when using LLMs, you can craft +prompts that influence the tone, format, length, and other characteristics of +the models' responses. + +The way you write these prompts will depend on the model you're using; a prompt +written for one model might not perform well when used with another model. +Similarly, the model parameters you set (temperature, top-k, and so on) will +also affect output differently depending on the model. + +Getting all three of these factors—the model, the model parameters, and +the prompt—working together to produce the output you want is rarely a +trivial process and often involves substantial iteration and experimentation. +Genkit provides a library and file format called Dotprompt, that aims to make +this iteration faster and more convenient. + +[Dotprompt](https://github.com/google/dotprompt) is designed around the premise +that **prompts are code**. You define your prompts along with the models and +model parameters they're intended for separately from your application code. +Then, you (or, perhaps someone not even involved with writing application code) +can rapidly iterate on the prompts and model parameters using the Genkit +Developer UI. Once your prompts are working the way you want, you can import +them into your application and run them using Genkit. + +Your prompt definitions each go in a file with a `.prompt` extension. Here's an +example of what these files look like: + +```dotprompt +--- +model: googleai/gemini-2.5-flash +config: + temperature: 0.9 +input: + schema: + location: string + style?: string + name?: string + default: + location: a restaurant +--- + +You are the world's most welcoming AI assistant and are currently working at {{location}}. + +Greet a guest{{#if name}} named {{name}}{{/if}}{{#if style}} in the style of {{style}}{{/if}}. +``` + +The portion in the triple-dashes is YAML front matter, similar to the front +matter format used by GitHub Markdown and Jekyll; the rest of the file is the +prompt, which can optionally use Handlebars + templates. The following sections will go into more detail about each of +the parts that make a `.prompt` file and how to use them. + +## Before you begin + +Before reading this page, you should be familiar with the content covered on the +[Generating content with AI models](/unified-docs/generating-content) page. + +If you want to run the code examples on this page, first complete the steps in +the Getting started guide for your language: + + + + Complete the [Get started](/docs/get-started) guide. All examples assume you have already installed Genkit as a dependency in your project. + + + Complete the [Get started](/go/docs/get-started-go) guide. All examples assume you have already installed Genkit as a dependency in your project. + + + Dotprompt is not currently available for Python. You can define prompts directly in your Python code using the standard prompt patterns shown in the [Generating content](/unified-docs/generating-content) guide. + + + +## Creating prompt files + +Although Dotprompt provides several [different ways](#defining-prompts-in-code) to create +and load prompts, it's optimized for projects that organize their prompts as +`.prompt` files within a single directory (or subdirectories thereof). This +section shows you how to create and load prompts using this recommended setup. + +### Creating a prompt directory + +The Dotprompt library expects to find your prompts in a directory at your +project root and automatically loads any prompts it finds there. By default, +this directory is named `prompts`. For example, using the default directory +name, your project structure might look something like this: + + + + ``` + your-project/ + ├── lib/ + ├── node_modules/ + ├── prompts/ + │ └── hello.prompt + ├── src/ + ├── package-lock.json + ├── package.json + └── tsconfig.json + ``` + + + ``` + your-project/ + ├── prompts/ + │ └── hello.prompt + ├── main.go + ├── go.mod + └── go.sum + ``` + + + Dotprompt is not currently available for Python. Use standard prompt definition patterns in your Python code. + + + +If you want to use a different directory, you can specify it when you configure +Genkit: + + + + ```ts + const ai = genkit({ + promptDir: './llm_prompts', + // (Other settings...) + }); + ``` + + + ```go + g, err := genkit.Init(ctx.Background(), genkit.WithPromptDir("./llm_prompts")) + ``` + + + Not applicable - Dotprompt is not available for Python. + + + +### Creating a prompt file + +There are two ways to create a `.prompt` file: using a text editor, or with the +developer UI. + +#### Using a text editor + +If you want to create a prompt file using a text editor, create a text file with +the `.prompt` extension in your prompts directory: for example, +`prompts/hello.prompt`. + +Here is a minimal example of a prompt file: + +```dotprompt +--- +model: googleai/gemini-2.5-flash +--- +You are the world's most welcoming AI assistant. Greet the user and offer your assistance. +``` + +The portion in the dashes is YAML front matter, similar to the front matter +format used by GitHub markdown and Jekyll; the rest of the file is the prompt, +which can optionally use Handlebars templates. The front matter section is +optional, but most prompt files will at least contain metadata specifying a +model. The remainder of this page shows you how to go beyond this, and make use +of Dotprompt's features in your prompt files. + +#### Using the developer UI + +You can also create a prompt file using the model runner in the developer UI. +Start with application code that imports the Genkit library and configures it to +use the model plugin you're interested in: + + + + ```ts + import { genkit } from 'genkit'; + + // Import the model plugins you want to use. + import { googleAI } from '@genkit-ai/googleai'; + + const ai = genkit({ + // Initialize and configure the model plugins. + plugins: [ + googleAI({ + apiKey: 'your-api-key', // Or (preferred): export GEMINI_API_KEY=... + }), + ], + }); + ``` + + It's okay if the file contains other code, but the above is all that's required. + + Load the developer UI in the same project: + + ```bash + genkit start -- tsx --watch src/your-code.ts + ``` + + + ```go + package main + + import ( + "context" + "log" + + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/googlegenai" + ) + + func main() { + g, err := genkit.Init(context.Background(), genkit.WithPlugins(&googlegenai.GoogleAI{})) + if err != nil { + log.Fatal(err) + } + + // Blocks end of program execution to use the developer UI. + select {} + } + ``` + + Load the developer UI in the same project: + + ```bash + genkit start -- go run . + ``` + + + Dotprompt is not currently available for Python. Use the standard Genkit Python patterns for prompt definition. + + + +In the Models section, choose the model you want to use from the list of models +provided by the plugin. + +Then, experiment with the prompt and configuration until you get results you're +happy with. When you're ready, press the Export button and save the file to your +prompts directory. + +## Running prompts + +After you've created prompt files, you can run them from your application code, +or using the tooling provided by Genkit. Regardless of how you want to run your +prompts, first start with application code that imports the Genkit library and +the model plugins you're interested in. + +If you're storing your prompts in a directory other than the default, be sure to +specify it when you configure Genkit. + +### Run prompts from code + + + + To use a prompt, first load it using the `prompt('file_name')` method: + + ```ts + const helloPrompt = ai.prompt('hello'); + ``` + + Once loaded, you can call the prompt like a function: + + ```ts + const response = await helloPrompt(); + + // Alternatively, use destructuring assignments to get only the properties + // you're interested in: + const { text } = await helloPrompt(); + ``` + + Or you can also run the prompt in streaming mode: + + ```ts + const { response, stream } = helloPrompt.stream(); + + for await (const chunk of stream) { + console.log(chunk.text); + } + // optional final (aggregated) response + console.log((await response).text); + ``` + + A callable prompt takes two optional parameters: the input to the prompt (see + the section below on [specifying input schemas](#input-and-output-schemas)), and a configuration + object, similar to that of the `generate()` method. For example: + + ```ts + const response2 = await helloPrompt( + // Prompt input: + { name: 'Ted' }, + + // Generation options: + { + config: { + temperature: 0.4, + }, + }, + ); + ``` + + Similarly for streaming: + + ```ts + const { stream } = helloPrompt.stream(input, options); + ``` + + Any parameters you pass to the prompt call will override the same parameters + specified in the prompt file. + + See [Generate content with AI models](/unified-docs/generating-content) for descriptions of the available + options. + + + To use a prompt, first load it using the `genkit.LookupPrompt()` function: + + ```go + helloPrompt := genkit.LookupPrompt(g, "hello") + ``` + + An executable prompt has similar options to that of `genkit.Generate()` and many + of them are overridable at execution time, including things like input (see the + section about [specifying input schemas](#input-and-output-schemas)), configuration, and more: + + ```go + resp, err := helloPrompt.Execute(context.Background(), + ai.WithModelName("googleai/gemini-2.5-flash"), + ai.WithInput(map[string]any{"name": "John"}), + ai.WithConfig(&googlegenai.GeminiConfig{Temperature: 0.5}) + ) + ``` + + Any parameters you pass to the prompt call will override the same parameters + specified in the prompt file. + + See [Generate content with AI models](/unified-docs/generating-content) for descriptions of the available + options. + + + Dotprompt is not currently available for Python. Use the standard prompt patterns: + + ```python + # Define prompts directly in your code + response = await ai.generate( + prompt="You are a helpful assistant. Greet the user.", + model="googleai/gemini-2.5-flash" + ) + ``` + + + +### Using the developer UI + +As you're refining your app's prompts, you can run them in the Genkit developer +UI to quickly iterate on prompts and model configurations, independently from +your application code. + + + + Load the developer UI from your project directory: + + ```bash + genkit start -- tsx --watch src/your-code.ts + ``` + + + Load the developer UI from your project directory: + + ```bash + genkit start -- go run . + ``` + + + Dotprompt is not available for Python, but you can still use the developer UI to test prompts defined in your code. + + + +Once you've loaded prompts into the developer UI, you can run them with +different input values, and experiment with how changes to the prompt wording or +the configuration parameters affect the model output. When you're happy with the +result, you can click the **Export prompt** button to save the modified prompt +back into your project directory. + +## Model configuration + +In the front matter block of your prompt files, you can optionally specify model +configuration values for your prompt: + +```dotprompt +--- +model: googleai/gemini-2.5-flash +config: + temperature: 1.4 + topK: 50 + topP: 0.4 + maxOutputTokens: 400 + stopSequences: + - "" + - "" +--- +``` + +These values map directly to the configuration parameters: + + + + ```ts + const response3 = await helloPrompt( + {}, + { + config: { + temperature: 1.4, + topK: 50, + topP: 0.4, + maxOutputTokens: 400, + stopSequences: ['', ''], + }, + }, + ); + ``` + + + ```go + resp, err := helloPrompt.Execute(context.Background(), + ai.WithConfig(&googlegenai.GeminiConfig{ + Temperature: 1.4, + TopK: 50, + TopP: 0.4, + MaxOutputTokens: 400, + StopSequences: []string{"", ""}, + })) + ``` + + + Not applicable - use standard configuration patterns in Python code. + + + +See [Generate content with AI models](/unified-docs/generating-content) for descriptions of the available +options. + +## Input and output schemas + +You can specify input and output schemas for your prompt by defining them in the +front matter section: + +```dotprompt +--- +model: googleai/gemini-2.5-flash +input: + schema: + theme?: string + default: + theme: "pirate" +output: + schema: + dishname: string + description: string + calories: integer + allergens(array): string +--- +Invent a menu item for a {{theme}} themed restaurant. +``` + +These schemas are used in much the same way as those passed to a `generate()` +request or a flow definition. For example, the prompt defined above produces +structured output: + + + + ```ts + const menuPrompt = ai.prompt('menu'); + const { output } = await menuPrompt({ theme: 'medieval' }); + + const dishName = output['dishname']; + const description = output['description']; + ``` + + + ```go + menuPrompt := genkit.LookupPrompt(g, "menu") + if menuPrompt == nil { + log.Fatal("no prompt named 'menu' found") + } + + resp, err := menuPrompt.Execute(ctx, + ai.WithInput(map[string]any{"theme": "medieval"}), + ) + if err != nil { + log.Fatal(err) + } + + var output map[string]any + if err := resp.Output(&output); err != nil { + log.Fatal(err) + } + + log.Println(output["dishname"]) + log.Println(output["description"]) + ``` + + + Not applicable - use standard schema patterns in Python code. + + + +You have several options for defining schemas in a `.prompt` file: Dotprompt's +own schema definition format, Picoschema; standard JSON Schema; or, as +references to schemas defined in your application code. The following sections +describe each of these options in more detail. + +### Picoschema + +The schemas in the example above are defined in a format called Picoschema. +Picoschema is a compact, YAML-optimized schema definition format that makes it +easy to define the most important attributes of a schema for LLM usage. Here's a +longer example of a schema, which specifies the information an app might store +about an article: + +```yaml +schema: + title: string # string, number, and boolean types are defined like this + subtitle?: string # optional fields are marked with a `?` + draft?: boolean, true when in draft state + status?(enum, approval status): [PENDING, APPROVED] + date: string, the date of publication e.g. '2024-04-09' # descriptions follow a comma + tags(array, relevant tags for article): string # arrays are denoted via parentheses + authors(array): + name: string + email?: string + metadata?(object): # objects are also denoted via parentheses + updatedAt?: string, ISO timestamp of last update + approvedBy?: integer, id of approver + extra?: any, arbitrary extra data + (*): string, wildcard field +``` + +The above schema is equivalent to the following type definitions: + + + + ```ts + interface Article { + title: string; + subtitle?: string | null; + /** true when in draft state */ + draft?: boolean | null; + /** approval status */ + status?: 'PENDING' | 'APPROVED' | null; + /** the date of publication e.g. '2024-04-09' */ + date: string; + /** relevant tags for article */ + tags: string[]; + authors: { + name: string; + email?: string | null; + }[]; + metadata?: { + /** ISO timestamp of last update */ + updatedAt?: string | null; + /** id of approver */ + approvedBy?: number | null; + } | null; + /** arbitrary extra data */ + extra?: any; + /** wildcard field */ + [key: string]: any; + } + ``` + + + ```go + type Article struct { + Title string `json:"title"` + Subtitle string `json:"subtitle,omitempty" jsonschema:"required=false"` + Draft bool `json:"draft,omitempty"` // True when in draft state + Status string `json:"status,omitempty" jsonschema:"enum=PENDING,enum=APPROVED"` // Approval status + Date string `json:"date"` // The date of publication e.g. '2025-04-07' + Tags []string `json:"tags"` // Relevant tags for article + Authors []struct { + Name string `json:"name"` + Email string `json:"email,omitempty"` + } `json:"authors"` + Metadata struct { + UpdatedAt string `json:"updatedAt,omitempty"` // ISO timestamp of last update + ApprovedBy int `json:"approvedBy,omitempty"` // ID of approver + } `json:"metadata,omitempty"` + Extra any `json:"extra"` // Arbitrary extra data + } + ``` + + + Not applicable - use standard schema patterns in Python code. + + + +Picoschema supports scalar types `string`, `integer`, `number`, `boolean`, and +`any`. Objects, arrays, and enums are denoted by a parenthetical after the field +name. + +Objects defined by Picoschema have all properties required unless denoted +optional by `?`, and do not allow additional properties. When a property is +marked as optional, it is also made nullable to provide more leniency for LLMs +to return null instead of omitting a field. + +In an object definition, the special key `(*)` can be used to declare a +"wildcard" field definition. This will match any additional properties not +supplied by an explicit key. + +### JSON Schema + +Picoschema does not support many of the capabilities of full JSON schema. If you +require more robust schemas, you may supply a JSON Schema instead: + +```yaml +output: + schema: + type: object + properties: + field1: + type: number + minimum: 20 +``` + +### Schema references defined in code + + + + In addition to directly defining schemas in the `.prompt` file, you can + reference a schema registered with `defineSchema()` by name. If you're using + TypeScript, this approach will let you take advantage of the language's static + type checking features when you work with prompts. + + To register a schema using Zod: + + ```ts + import { z } from 'genkit'; + + const MenuItemSchema = ai.defineSchema( + 'MenuItemSchema', + z.object({ + dishname: z.string(), + description: z.string(), + calories: z.coerce.number(), + allergens: z.array(z.string()), + }), + ); + ``` + + Within your prompt, provide the name of the registered schema: + + ```dotprompt + --- + model: googleai/gemini-2.5-flash-latest + output: + schema: MenuItemSchema + --- + ``` + + The Dotprompt library will automatically resolve the name to the underlying + registered schema. You can then utilize the schema to strongly type the + output of a Dotprompt: + + ```ts + const menuPrompt = ai.prompt< + z.ZodTypeAny, // Input schema + typeof MenuItemSchema, // Output schema + z.ZodTypeAny // Custom options schema + >('menu'); + const { output } = await menuPrompt({ theme: 'medieval' }); + + // Now data is strongly typed as MenuItemSchema: + const dishName = output?.dishname; + const description = output?.description; + ``` + + + Schema references in Go are not yet implemented. Use Picoschema or JSON Schema directly in your `.prompt` files. + + + Not applicable - use standard schema patterns in Python code. + + + +## Prompt templates + +The portion of a `.prompt` file that follows the front matter (if present) is +the prompt itself, which will be passed to the model. While this prompt could be +a simple text string, very often you will want to incorporate user input into +the prompt. To do so, you can specify your prompt using the +Handlebars templating language. +Prompt templates can include placeholders that refer to the values defined by +your prompt's input schema. + +You already saw this in action in the section on input and output schemas: + +```dotprompt +--- +model: googleai/gemini-2.5-flash +input: + schema: + theme?: string + default: + theme: "pirate" +output: + schema: + dishname: string + description: string + calories: integer + allergens(array): string +--- +Invent a menu item for a {{theme}} themed restaurant. +``` + +In this example, the Handlebars expression, `{{theme}}`, +resolves to the value of the input's `theme` property when you run the +prompt. To pass input to the prompt: + + + + ```ts + const menuPrompt = ai.prompt('menu'); + const { output } = await menuPrompt({ theme: 'medieval' }); + ``` + + + ```go + menuPrompt := genkit.LookupPrompt(g, "menu") + + resp, err := menuPrompt.Execute(context.Background(), + ai.WithInput(map[string]any{"theme": "medieval"}), + ) + ``` + + + Not applicable - use standard templating patterns in Python code. + + + +Note that because the input schema declared the `theme` property to be optional +and provided a default, you could have omitted the property, +and the prompt would have resolved using the default value. + +Handlebars templates also support some limited logical constructs. For example, +as an alternative to providing a default, you could define the prompt using +Handlebars's `#if` helper: + +```dotprompt +--- +model: googleai/gemini-2.5-flash +input: + schema: + theme?: string +--- +Invent a menu item for a {{#if theme}}{{theme}} themed{{/if}} restaurant. +``` + +In this example, the prompt renders as "Invent a menu item for a restaurant" +when the `theme` property is unspecified. + +See the Handlebars +documentation for information on all of the built-in logical helpers. + +In addition to properties defined by your input schema, your templates can also +refer to values automatically defined by Genkit. The next few sections describe +these automatically-defined values and how you can use them. + +### Multi-message prompts + +By default, Dotprompt constructs a single message with a "user" role. +However, some prompts are best expressed as a combination of multiple +messages, such as a system prompt. + +The `{{role}}` helper provides a simple way to +construct multi-message prompts: + +```dotprompt +--- +model: googleai/gemini-2.5-flash +input: + schema: + userQuestion: string +--- +{{role "system"}} +You are a helpful AI assistant that really loves to talk about food. Try to work +food items into all of your conversations. +{{role "user"}} +{{userQuestion}} +``` + +Note that your final prompt must contain at least one `user` role. + +### Multi-modal prompts + +For models that support multimodal input, such as images alongside text, you can +use the `{{media}}` helper: + +```dotprompt +--- +model: googleai/gemini-2.5-flash +input: + schema: + photoUrl: string +--- +Describe this image in a detailed paragraph: + +{{media url=photoUrl}} +``` + +The URL can be `https:` or base64-encoded `data:` URIs for "inline" image usage. +In code, this would be: + + + + ```ts + const multimodalPrompt = ai.prompt('multimodal'); + const { text } = await multimodalPrompt({ + photoUrl: 'https://example.com/photo.jpg', + }); + ``` + + + ```go + multimodalPrompt := genkit.LookupPrompt(g, "multimodal") + + resp, err := multimodalPrompt.Execute(context.Background(), + ai.WithInput(map[string]any{"photoUrl": "https://example.com/photo.jpg"}), + ) + ``` + + + Not applicable - use standard multimodal patterns in Python code. + + + +See also [Multimodal input](/unified-docs/generating-content#multimodal-input), on the Generating content +page, for an example of constructing a `data:` URL. + +### Partials + +Partials are reusable templates that can be included inside any prompt. Partials +can be especially helpful for related prompts that share common behavior. + +When loading a prompt directory, any file prefixed with an underscore (`_`) is +considered a partial. So a file `_personality.prompt` might contain: + +```dotprompt +You should speak like a {{#if style}}{{style}}{{else}}helpful assistant.{{/if}}. +``` + +This can then be included in other prompts: + +```dotprompt +--- +model: googleai/gemini-2.5-flash +input: + schema: + name: string + style?: string +--- + +{{role "system"}} +{{>personality style=style}} + +{{role "user"}} +Give the user a friendly greeting. + +User's Name: {{name}} +``` + +Partials are inserted using the +`{{>NAME_OF_PARTIAL args...}}` +syntax. If no arguments are provided to the partial, it executes with the same +context as the parent prompt. + +Partials accept both named arguments as above or a single positional argument +representing the context. This can be helpful for tasks such as rendering +members of a list. + +**\_destination.prompt** + +```dotprompt +- {{name}} ({{country}}) +``` + +**chooseDestination.prompt** + +```dotprompt +--- +model: googleai/gemini-2.5-flash +input: + schema: + destinations(array): + name: string + country: string +--- +Help the user decide between these vacation destinations: + +{{#each destinations}} +{{>destination this}} +{{/each}} +``` + +#### Defining partials in code + +You can also define partials in code: + + + + ```ts + ai.definePartial('personality', 'Talk like a {{#if style}}{{style}}{{else}}helpful assistant{{/if}}.'); + ``` + + Code-defined partials are available in all prompts. + + + ```go + genkit.DefinePartial(g, "personality", "Talk like a {{#if style}}{{style}}{{else}}helpful assistant{{/if}}.") + ``` + + Code-defined partials are available in all prompts. + + + Not applicable - use standard templating patterns in Python code. + + + +### Defining Custom Helpers + +You can define custom helpers to process and manage data inside of a prompt. +Helpers are registered globally: + + + + ```ts + ai.defineHelper('shout', (text: string) => text.toUpperCase()); + ``` + + Once a helper is defined you can use it in any prompt: + + ```dotprompt + --- + model: googleai/gemini-2.5-flash + input: + schema: + name: string + --- + + HELLO, {{shout name}}!!! + ``` + + + ```go + genkit.DefineHelper(g, "shout", func(input string) string { + return strings.ToUpper(input) + }) + ``` + + Once a helper is defined you can use it in any prompt: + + ```dotprompt + --- + model: googleai/gemini-2.5-flash + input: + schema: + name: string + --- + + HELLO, {{shout name}}!!! + ``` + + + Not applicable - use standard templating patterns in Python code. + + + +## Prompt variants + +Because prompt files are just text, you can (and should!) commit them to your +version control system, allowing you to compare changes over time easily. Often, +tweaked versions of prompts can only be fully tested in a production environment +side-by-side with existing versions. Dotprompt supports this through its +variants feature. + +To create a variant, create a `[name].[variant].prompt` file. For instance, if +you were using Gemini 2.0 Flash in your prompt but wanted to see if Gemini 2.5 +Pro would perform better, you might create two files: + +- `my_prompt.prompt`: the "baseline" prompt +- `my_prompt.gemini25pro.prompt`: a variant named `gemini25pro` + +To use a prompt variant: + + + + Specify the variant option when loading: + + ```ts + const myPrompt = ai.prompt('my_prompt', { variant: 'gemini25pro' }); + ``` + + + Specify the variant in the prompt name when loading: + + ```go + myPrompt := genkit.LookupPrompt(g, "my_prompt.gemini25pro") + ``` + + + Not applicable - use standard prompt patterns in Python code. + + + +The name of the variant is included in the metadata of generation traces, so you +can compare and contrast actual performance between variants in the Genkit trace +inspector. + +## Defining prompts in code + +All of the examples discussed so far have assumed that your prompts are defined +in individual `.prompt` files in a single directory (or subdirectories thereof), +accessible to your app at runtime. Dotprompt is designed around this setup, and +its authors consider it to be the best developer experience overall. + +However, if you have use cases that are not well supported by this setup, +you can also define prompts in code: + + + + Use the `definePrompt()` function. The first parameter is analogous to the front matter block of a + `.prompt` file; the second parameter can either be a Handlebars template string, + as in a prompt file, or a function that returns a `GenerateRequest`: + + ```ts + const myPrompt = ai.definePrompt({ + name: 'myPrompt', + model: 'googleai/gemini-2.5-flash', + input: { + schema: z.object({ + name: z.string(), + }), + }, + prompt: 'Hello, {{name}}. How are you today?', + }); + ``` + + ```ts + const myPrompt = ai.definePrompt({ + name: 'myPrompt', + model: 'googleai/gemini-2.5-flash', + input: { + schema: z.object({ + name: z.string(), + }), + }, + messages: async (input) => { + return [ + { + role: 'user', + content: [{ text: `Hello, ${input.name}. How are you today?` }], + }, + ]; + }, + }); + ``` + + + Use the `genkit.DefinePrompt()` function: + + ```go + type GeoQuery struct { + CountryCount int `json:"countryCount"` + } + + type CountryList struct { + Countries []string `json:"countries"` + } + + geographyPrompt, err := genkit.DefinePrompt( + g, "GeographyPrompt", + ai.WithSystem("You are a geography teacher. Respond only when the user asks about geography."), + ai.WithPrompt("Give me the {{countryCount}} biggest countries in the world by inhabitants."), + ai.WithConfig(&googlegenai.GeminiConfig{Temperature: 0.5}), + ai.WithInputType(GeoQuery{CountryCount: 10}), // Defaults to 10. + ai.WithOutputType(CountryList{}), + ) + if err != nil { + log.Fatal(err) + } + + resp, err := geographyPrompt.Execute(context.Background(), ai.WithInput(GeoQuery{CountryCount: 15})) + if err != nil { + log.Fatal(err) + } + + var list CountryList + if err := resp.Output(&list); err != nil { + log.Fatal(err) + } + + log.Printf("Countries: %s", list.Countries) + ``` + + Prompts may also be rendered into a `GenerateActionOptions` which may then be + processed and passed into `genkit.GenerateWithRequest()`: + + ```go + actionOpts, err := geographyPrompt.Render(ctx, ai.WithInput(GeoQuery{CountryCount: 15})) + if err != nil { + log.Fatal(err) + } + + // Do something with the value... + actionOpts.Config = &googlegenai.GeminiConfig{Temperature: 0.8} + + resp, err := genkit.GenerateWithRequest(ctx, g, actionOpts, nil, nil) // No middleware or streaming + ``` + + Note that all prompt options carry over to `GenerateActionOptions` with the + exception of `WithMiddleware()`, which must be passed separately if using + `Prompt.Render()` instead of `Prompt.Execute()`. + + + Define prompts directly in your Python code using standard patterns: + + ```python + # Define prompts as functions or templates + def create_greeting_prompt(name: str) -> str: + return f"Hello, {name}. How are you today?" + + # Use with generate + response = await ai.generate( + prompt=create_greeting_prompt("Alice"), + model="googleai/gemini-2.5-flash" + ) + ``` + + + +## Next steps + +- Learn about [tool calling](/unified-docs/tool-calling) to give your prompts access to external functions and APIs +- Explore [retrieval-augmented generation (RAG)](/unified-docs/rag) to incorporate external knowledge into your prompts +- See [creating flows](/unified-docs/creating-flows) to build complex AI workflows using your prompts +- Check out the [evaluation guide](/docs/evaluation) for testing and improving your prompt performance diff --git a/src/content/docs/unified-docs/evaluation.mdx b/src/content/docs/unified-docs/evaluation.mdx new file mode 100644 index 00000000..d620cd83 --- /dev/null +++ b/src/content/docs/unified-docs/evaluation.mdx @@ -0,0 +1,909 @@ +--- +title: Evaluation +description: Learn about Genkit's evaluation capabilities across JavaScript and Go, including inference-based and raw evaluation, dataset creation, and how to use the Developer UI and CLI for testing and analysis. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; +import ThemeImage from '../../../components/ThemeImage.astro'; + +Evaluation is a form of testing that helps you validate your LLM's responses and +ensure they meet your quality bar. + +Genkit supports third-party evaluation tools through plugins, paired +with powerful observability features that provide insight into the runtime state +of your LLM-powered applications. Genkit tooling helps you automatically extract +data including inputs, outputs, and information from intermediate steps to +evaluate the end-to-end quality of LLM responses as well as understand the +performance of your system's building blocks. + +## Types of evaluation + +Genkit supports two types of evaluation: + +- **Inference-based evaluation**: This type of evaluation runs against a + collection of pre-determined inputs, assessing the corresponding outputs for + quality. + + This is the most common evaluation type, suitable for most use cases. This approach tests a system's actual output for each evaluation run. + + You can perform the quality assessment manually, by visually inspecting the results. Alternatively, you can automate the assessment by using an evaluation metric. + +- **Raw evaluation**: This type of evaluation directly assesses the quality of + inputs without any inference. This approach typically is used with automated + evaluation using metrics. All required fields for evaluation (e.g., `input`, + `context`, `output` and `reference`) must be present in the input dataset. This + is useful when you have data coming from an external source (e.g., collected + from your production traces) and you want to have an objective measurement of + the quality of the collected data. + + For more information, see the [Advanced use](#advanced-use) section of this page. + +This section explains how to perform inference-based evaluation using Genkit. + +## Quick start + +### Setup + +1. Use an existing Genkit app or create a new one by following the Getting started guide for your language: + + + + Follow the [Get started](/docs/get-started) guide. + + + Follow the [Get started](/go/docs/get-started-go) guide. + + + Evaluation features are not yet available for Python. You can use external evaluation tools with Python Genkit applications. + + + +2. Add the following code to define a simple RAG application to evaluate. For this guide, we use a dummy retriever that always returns the same documents. + + + + ```js + import { genkit, z, Document } from 'genkit'; + import { googleAI } from '@genkit-ai/googleai'; + + // Initialize Genkit + export const ai = genkit({ plugins: [googleAI()] }); + + // Dummy retriever that always returns the same docs + export const dummyRetriever = ai.defineRetriever( + { + name: 'dummyRetriever', + }, + async (i) => { + const facts = ["Dog is man's best friend", 'Dogs have evolved and were domesticated from wolves']; + // Just return facts as documents. + return { documents: facts.map((t) => Document.fromText(t)) }; + }, + ); + + // A simple question-answering flow + export const qaFlow = ai.defineFlow( + { + name: 'qaFlow', + inputSchema: z.object({ query: z.string() }), + outputSchema: z.object({ answer: z.string() }), + }, + async ({ query }) => { + const factDocs = await ai.retrieve({ + retriever: dummyRetriever, + query, + }); + + const { text } = await ai.generate({ + model: googleAI.model('gemini-2.5-flash'), + prompt: `Answer this question with the given context ${query}`, + docs: factDocs, + }); + return { answer: text }; + }, + ); + ``` + + + ```go + package main + + import ( + "context" + "fmt" + "log" + + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/googlegenai" + ) + + func main() { + ctx := context.Background() + + // Initialize Genkit + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlegenai.GoogleAI{}), + genkit.WithDefaultModel("googleai/gemini-2.5-flash"), + ) + if err != nil { + log.Fatalf("Genkit initialization error: %v", err) + } + + // Dummy retriever that always returns the same facts + dummyRetrieverFunc := func(ctx context.Context, req *ai.RetrieverRequest) (*ai.RetrieverResponse, error) { + facts := []string{ + "Dog is man's best friend", + "Dogs have evolved and were domesticated from wolves", + } + // Just return facts as documents. + var docs []*ai.Document + for _, fact := range facts { + docs = append(docs, ai.DocumentFromText(fact, nil)) + } + return &ai.RetrieverResponse{Documents: docs}, nil + } + factsRetriever := genkit.DefineRetriever(g, "local", "dogFacts", dummyRetrieverFunc) + + m := googlegenai.GoogleAIModel(g, "gemini-2.5-flash") + if m == nil { + log.Fatal("failed to find model") + } + + // A simple question-answering flow + genkit.DefineFlow(g, "qaFlow", func(ctx context.Context, query string) (string, error) { + factDocs, err := ai.Retrieve(ctx, factsRetriever, ai.WithTextDocs(query)) + if err != nil { + return "", fmt.Errorf("retrieval failed: %w", err) + } + llmResponse, err := genkit.Generate(ctx, g, + ai.WithModelName("googleai/gemini-2.5-flash"), + ai.WithPrompt("Answer this question with the given context: %s", query), + ai.WithDocs(factDocs.Documents...) + ) + if err != nil { + return "", fmt.Errorf("generation failed: %w", err) + } + return llmResponse.Text(), nil + }) + } + ``` + + + Evaluation features are not yet available for Python. You can use external evaluation tools with Python Genkit applications. + + + +3. (Optional) Add evaluation metrics to your application to use while evaluating: + + + + This guide uses the `MALICIOUSNESS` metric from the `genkitEval` plugin. + + ```js + import { genkitEval, GenkitMetric } from '@genkit-ai/evaluator'; + import { googleAI } from '@genkit-ai/googleai'; + + export const ai = genkit({ + plugins: [ + googleAI(), + // Add this plugin to your Genkit initialization block + genkitEval({ + judge: googleAI.model('gemini-2.5-flash'), + metrics: [GenkitMetric.MALICIOUSNESS], + }), + ], + }); + ``` + + **Note:** The configuration above requires installation of the [`@genkit-ai/evaluator`](https://www.npmjs.com/package/@genkit-ai/evaluator) package. + + ```bash + npm install @genkit-ai/evaluator + ``` + + + This guide uses the `EvaluatorRegex` metric from the `evaluators` package. + + ```go + import ( + "github.com/firebase/genkit/go/plugins/evaluators" + ) + + func main() { + // ... + + metrics := []evaluators.MetricConfig{ + { + MetricType: evaluators.EvaluatorRegex, + }, + } + + // Initialize Genkit + g, err := genkit.Init(ctx, + genkit.WithPlugins( + &googlegenai.GoogleAI{}, + &evaluators.GenkitEval{Metrics: metrics}, // Add this plugin + ), + genkit.WithDefaultModel("googleai/gemini-2.5-flash"), + ) + } + ``` + + **Note:** Ensure that the `evaluators` package is installed in your go project: + + ```bash + go get github.com/firebase/genkit/go/plugins/evaluators + ``` + + + Not applicable - evaluation features are not yet available for Python. + + + +4. Start your Genkit application: + + + + ```bash + genkit start -- + ``` + + + ```bash + genkit start -- go run main.go + ``` + + + Not applicable - evaluation features are not yet available for Python. + + + +### Create a dataset + +Create a dataset to define the examples we want to use for evaluating our flow. + +1. Go to the Dev UI at `http://localhost:4000` and click the **Datasets** button + to open the Datasets page. + +2. Click on the **Create Dataset** button to open the create dataset dialog. + + a. Provide a `datasetId` for your new dataset. This guide uses + `myFactsQaDataset`. + + b. Select `Flow` dataset type. + + c. Leave the validation target field empty and click **Save** + +3. Your new dataset page appears, showing an empty dataset. Add examples to it by following these steps: + + a. Click the **Add example** button to open the example editor panel. + + b. Only the `input` field is required. Add the input data: + + + + Enter `{"query": "Who is man's best friend?"}` in the `input` field, and click **Save** to add the example to your dataset. + + Repeat this process to add more examples: + + ``` + {"query": "Can I give milk to my cats?"} + {"query": "From which animals did dogs evolve?"} + ``` + + + Enter `"Who is man's best friend?"` in the `Input` field, and click **Save** to add the example to your dataset. + + If you have configured the `EvaluatorRegex` metric and would like to try it out, you need to specify a Reference string that contains the pattern to match the output against. For the preceding input, set the `Reference output` text to `"(?i)dog"`, which is a case-insensitive regular-expression pattern to match the word "dog" in the flow output. + + Repeat this process to add more examples: + + ```text + "Can I give milk to my cats?" + "From which animals did dogs evolve?" + ``` + + If you are using the regular-expression evaluator, use the corresponding reference strings: + + ```text + "(?i)don't know" + "(?i)wolf|wolves" + ``` + + + Not applicable - evaluation features are not yet available for Python. + + + +By the end of this step, your dataset should have 3 examples in it, with the +values mentioned above. + +### Run evaluation and view results + +To start evaluating the flow, click the **Run new evaluation** button on your +dataset page. You can also start a new evaluation from the _Evaluations_ tab. + +1. Select the `Flow` radio button to evaluate a flow. + +2. Select `qaFlow` as the target flow to evaluate. + +3. Select `myFactsQaDataset` as the target dataset to use for evaluation. + +4. (Optional) If you have installed an evaluator metric using Genkit plugins, + you can see these metrics in this page. Select the metrics that you want to use + with this evaluation run. This is entirely optional: Omitting this step will + still return the results in the evaluation run, but without any associated + metrics. + +5. Finally, click **Run evaluation** to start evaluation. Depending on the flow + you're testing, this may take a while. Once the evaluation is complete, a + success message appears with a link to view the results. Click on the link to go + to the _Evaluation details_ page. + +You can see the details of your evaluation on this page, including original +input, extracted context and metrics (if any). + +## Core concepts + +### Terminology + +- **Evaluation**: An evaluation is a process that assesses system performance. In Genkit, such a system is usually a Genkit primitive, such as a flow or a + model. An evaluation can be automated or manual (human evaluation). + +- **Bulk inference** Inference is the act of running an input on a flow or model to get the corresponding output. Bulk inference involves performing inference on multiple inputs simultaneously. + +- **Metric** An evaluation metric is a criterion on which an inference is scored. Examples include accuracy, faithfulness, maliciousness, whether the output is in English, etc. + +- **Dataset** A dataset is a collection of examples to use for inference-based + evaluation. A dataset typically consists of `input` and optional `reference` + fields. The `reference` field does not affect the inference step of evaluation + but it is passed verbatim to any evaluation metrics. In Genkit, you can create a + dataset through the Dev UI. There are two types of datasets in Genkit: _Flow_ + datasets and _Model_ datasets. + +### Schema validation + +Depending on the type, datasets have schema validation support in the Dev UI: + +- Flow datasets support validation of the `input` and `reference` fields of the dataset against a flow in the Genkit application. Schema validation is optional and is only enforced if a schema is specified on the target flow. + +- Model datasets have implicit schema, supporting both `string` and `GenerateRequest` input types. String validation provides a convenient way to evaluate simple text prompts, while `GenerateRequest` provides complete control for advanced use cases (e.g. providing model parameters, message history, tools, etc). + +Note: Schema validation is a helper tool for editing examples, but it is +possible to save an example with invalid schema. These examples may fail when +running an evaluation. + +## Supported evaluators + +### Genkit evaluators + +Genkit includes built-in evaluators to help you get started: + + + + Genkit includes a small number of native evaluators, inspired by [RAGAS](https://docs.ragas.io/en/stable/): + + - **Faithfulness** -- Measures the factual consistency of the generated answer against the given context + - **Answer Relevancy** -- Assesses how pertinent the generated answer is to the given prompt + - **Maliciousness** -- Measures whether the generated output intends to deceive, harm, or exploit + + + Genkit includes a small number of built-in evaluators, ported from the [JS evaluators plugin](https://js.api.genkit.dev/enums/_genkit-ai_evaluator.GenkitMetric.html): + + - **EvaluatorDeepEqual** -- Checks if the generated output is deep-equal to the reference output provided. + - **EvaluatorRegex** -- Checks if the generated output matches the regular expression provided in the reference field. + - **EvaluatorJsonata** -- Checks if the generated output matches the [JSONATA](https://jsonata.org/) expression provided in the reference field. + + + Evaluation features are not yet available for Python. + + + +### Evaluator plugins + +Genkit supports additional evaluators through plugins: + + + + - [Vertex Rapid Evaluators](/docs/plugins/vertex-ai#evaluators) via the VertexAI Plugin + - Custom evaluators through the plugin system + + + - Custom evaluators through the plugin system + - Third-party evaluation tools through plugins + + + Not applicable - evaluation features are not yet available for Python. + + + +## Advanced use + +### Evaluation comparison + +The Developer UI offers visual tools for side-by-side comparison of multiple +evaluation runs. This feature allows you to analyze variations across different +executions within a unified interface, making it easier to assess changes in +output quality. Additionally, you can highlight outputs based on the performance +of specific metrics, indicating improvements or regressions. + +When comparing evaluations, one run is designated as the _Baseline_. All other +evaluations are compared against this baseline to determine whether their +performance has improved or regressed. + + + +#### Prerequisites + +To use the evaluation comparison feature, the following conditions must be met: + +- Evaluations must originate from a dataset source. Evaluations from file + sources are not comparable. +- All evaluations being compared must be from the same dataset. +- For metric highlighting, all evaluations must use at least one common + metric that produces a `number` or `boolean` score. + +#### Comparing evaluations + +1. Ensure you have at least two evaluation runs performed on the same dataset. + For instructions, refer to the + [Run evaluation section](#run-evaluation-and-view-results). + +2. In the Developer UI, navigate to the **Datasets** page. + +3. Select the relevant dataset and open its **Evaluations** tab. You should see + all evaluation runs associated with that dataset. + +4. Choose one evaluation to serve as the baseline for comparison. + +5. On the evaluation results page, click the **+ Comparison** button. If this + button is disabled, it means no other comparable evaluations are available + for this dataset. + +6. A new column will appear with a dropdown menu. Select another evaluation + from this menu to load its results alongside the baseline. + +You can now view the outputs side-by-side to visually inspect differences in +quality. This feature supports comparing up to three evaluations simultaneously. + +##### Metric highlighting (Optional) + +If your evaluations include metrics, you can enable metric highlighting to +color-code the results. This feature helps you quickly identify changes in +performance: improvements are colored green, while regressions are red. + +Note that highlighting is only supported for numeric and boolean metrics, and +the selected metric must be present in all evaluations being compared. + +To enable metric highlighting: + +1. After initiating a comparison, a **Choose a metric to compare** menu will + become available. + +2. Select a metric from the dropdown. By default, lower scores (for numeric + metrics) and `false` values (for boolean metrics) are considered + improvements and highlighted in green. You can reverse this logic by + ticking the checkbox in the menu. + +The comparison columns will now be color-coded according to the selected metric +and configuration, providing an at-a-glance overview of performance changes. + +### Evaluation using the CLI + +Genkit CLI provides a rich API for performing evaluation. This is especially +useful in environments where the Dev UI is not available (e.g. in a CI/CD +workflow). + +Genkit CLI provides 3 main evaluation commands: `eval:flow`, `eval:extractData`, +and `eval:run`. + +#### `eval:flow` command + +The `eval:flow` command runs inference-based evaluation on an input dataset. +This dataset may be provided either as a JSON file or by referencing an existing +dataset in your Genkit runtime. + +```bash +# Referencing an existing dataset +genkit eval:flow qaFlow --input myFactsQaDataset + +# or, using a dataset from a file +genkit eval:flow qaFlow --input testInputs.json +``` + +Note: Make sure that you start your genkit app before running these CLI +commands. + + + + ```bash + genkit start -- + ``` + + + ```bash + genkit start -- go run main.go + ``` + + + Not applicable - evaluation features are not yet available for Python. + + + +Here, `testInputs.json` should be an array of objects containing an `input` +field and an optional `reference` field: + + + + ```json + [ + { + "input": { "query": "What is the French word for Cheese?" } + }, + { + "input": { "query": "What green vegetable looks like cauliflower?" }, + "reference": "Broccoli" + } + ] + ``` + + + ```json + [ + { + "input": "What is the French word for Cheese?" + }, + { + "input": "What green vegetable looks like cauliflower?", + "reference": "Broccoli" + } + ] + ``` + + + Not applicable - evaluation features are not yet available for Python. + + + +If your flow requires auth, you may specify it using the `--context` argument: + +```bash +genkit eval:flow qaFlow --input testInputs.json --context '{"auth": {"email_verified": true}}' +``` + +By default, the `eval:flow` and `eval:run` commands use all available metrics +for evaluation. To run on a subset of the configured evaluators, use the +`--evaluators` flag and provide a comma-separated list of evaluators by name: + + + + ```bash + genkit eval:flow qaFlow --input testInputs.json --evaluators=genkitEval/maliciousness,genkitEval/answer_relevancy + ``` + + + ```bash + genkit eval:flow qaFlow --input testInputs.json --evaluators=genkitEval/regex,genkitEval/jsonata + ``` + + + Not applicable - evaluation features are not yet available for Python. + + + +You can view the results of your evaluation run in the Dev UI at +`localhost:4000/evaluate`. + +#### `eval:extractData` and `eval:run` commands + +To support _raw evaluation_, Genkit provides tools to extract data from traces +and run evaluation metrics on extracted data. This is useful, for example, if +you are using a different framework for evaluation or if you are collecting +inferences from a different environment to test locally for output quality. + +You can batch run your Genkit flow and add a unique label to the run which then +can be used to extract an _evaluation dataset_. A raw evaluation dataset is a +collection of inputs for evaluation metrics, _without_ running any prior +inference. + +Run your flow over your test inputs: + + + + ```bash + genkit flow:batchRun qaFlow testInputs.json --label firstRunSimple + ``` + + + ```bash + genkit flow:batchRun qaFlow testInputs.json + ``` + + + Not applicable - evaluation features are not yet available for Python. + + + +Extract the evaluation data: + + + + ```bash + genkit eval:extractData qaFlow --label firstRunSimple --output factsEvalDataset.json + ``` + + + ```bash + genkit eval:extractData qaFlow --maxRows 2 --output factsEvalDataset.json + ``` + + + Not applicable - evaluation features are not yet available for Python. + + + +The exported data has a format different from the dataset format presented +earlier. This is because this data is intended to be used with evaluation +metrics directly, without any inference step. Here is the syntax of the +extracted data. + +```json +Array<{ + "testCaseId": string, + "input": any, + "output": any, + "context": any[], + "traceIds": string[], +}>; +``` + +The data extractor automatically locates retrievers and adds the produced docs +to the context array. You can run evaluation metrics on this extracted dataset +using the `eval:run` command. + +```bash +genkit eval:run factsEvalDataset.json +``` + +By default, `eval:run` runs against all configured evaluators, and as with +`eval:flow`, results for `eval:run` appear in the evaluation page of Developer +UI, located at `localhost:4000/evaluate`. + +### Batching evaluations + + + + :::note + This feature is only available in the Node.js SDK. + ::: + + You can speed up evaluations by processing the inputs in batches using the CLI and Dev UI. When batching is enabled, the input data is grouped into batches of size `batchSize`. The data points in a batch are all run in parallel to provide significant performance improvements, especially when dealing with large datasets and/or complex evaluators. By default (when the flag is omitted), batching is disabled. + + The `batchSize` option has been integrated into the `eval:flow` and `eval:run` CLI commands. When a `batchSize` greater than 1 is provided, the evaluator will process the dataset in chunks of the specified size. This feature only affects the evaluator logic and not inference (when using `eval:flow`). Here are some examples of enabling batching with the CLI: + + ```bash + genkit eval:flow myFlow --input yourDataset.json --evaluators=custom/myEval --batchSize 10 + ``` + + Or, with `eval:run` + + ```bash + genkit eval:run yourDataset.json --evaluators=custom/myEval --batchSize 10 + ``` + + Batching is also available in the Dev UI for Genkit (JS) applications. You can set batch size when running a new evaluation, to enable parallelization. + + + Batching features are not yet available for Go. Evaluations run sequentially. + + + Not applicable - evaluation features are not yet available for Python. + + + +### Custom extractors + + + + Genkit provides reasonable default logic for extracting the necessary fields + (`input`, `output` and `context`) while doing an evaluation. However, you may + find that you need more control over the extraction logic for these fields. + Genkit supports customs extractors to achieve this. You can provide custom + extractors to be used in `eval:extractData` and `eval:flow` commands. + + First, as a preparatory step, introduce an auxiliary step in our `qaFlow` + example: + + ```js + export const qaFlow = ai.defineFlow( + { + name: 'qaFlow', + inputSchema: z.object({ query: z.string() }), + outputSchema: z.object({ answer: z.string() }), + }, + async ({ query }) => { + const factDocs = await ai.retrieve({ + retriever: dummyRetriever, + query, + }); + const factDocsModified = await ai.run('factModified', async () => { + // Let us use only facts that are considered silly. This is a + // hypothetical step for demo purposes, you may perform any + // arbitrary task inside a step and reference it in custom + // extractors. + // + // Assume you have a method that checks if a fact is silly + return factDocs.filter((d) => isSillyFact(d.text)); + }); + + const { text } = await ai.generate({ + model: googleAI.model('gemini-2.5-flash'), + prompt: `Answer this question with the given context ${query}`, + docs: factDocsModified, + }); + return { answer: text }; + }, + ); + ``` + + Next, configure a custom extractor to use the output of the `factModified` step + when evaluating this flow. + + If you don't have one a tools-config file to configure custom extractors, add + one named `genkit-tools.conf.js` to your project root. + + ```bash + cd /path/to/your/genkit/app + + touch genkit-tools.conf.js + ``` + + In the tools config file, add the following code: + + ```js + module.exports = { + evaluators: [ + { + actionRef: '/flow/qaFlow', + extractors: { + context: { outputOf: 'factModified' }, + }, + }, + ], + }; + ``` + + This config overrides the default extractors of Genkit's tooling, specifically + changing what is considered as `context` when evaluating this flow. + + Running evaluation again reveals that context is now populated as the output of + the step `factModified`. + + ```bash + genkit eval:flow qaFlow --input testInputs.json + ``` + + Evaluation extractors are specified as follows: + + - `evaluators` field accepts an array of EvaluatorConfig objects, which are + scoped by `flowName` + - `extractors` is an object that specifies the extractor overrides. The + current supported keys in `extractors` are `[input, output, context]`. The + acceptable value types are: + - `string` - this should be a step name, specified as a string. The output + of this step is extracted for this key. + - `{ inputOf: string }` or `{ outputOf: string }` - These objects + represent specific channels (input or output) of a step. For example, `{ + inputOf: 'foo-step' }` would extract the input of step `foo-step` for + this key. + - `(trace) => string;` - For further flexibility, you can provide a + function that accepts a Genkit trace and returns an `any`-type value, + and specify the extraction logic inside this function. Refer to + `genkit/genkit-tools/common/src/types/trace.ts` for the exact TraceData + schema. + + **Note:** The extracted data for all these extractors is the type corresponding + to the extractor. For example, if you use context: `{ outputOf: 'foo-step' }`, + and `foo-step` returns an array of objects, the extracted context is also an + array of objects. + + + Custom extractors are not yet available for Go. Use the default extraction logic provided by Genkit. + + + Not applicable - evaluation features are not yet available for Python. + + + +### Synthesizing test data using an LLM + + + + Here is an example flow that uses a PDF file to generate potential user + questions. + + ```ts + import { genkit, z } from 'genkit'; + import { googleAI } from '@genkit-ai/googleai'; + import { chunk } from 'llm-chunk'; // npm install llm-chunk + import path from 'path'; + import { readFile } from 'fs/promises'; + import pdf from 'pdf-parse'; // npm install pdf-parse + + const ai = genkit({ plugins: [googleAI()] }); + + const chunkingConfig = { + minLength: 1000, // number of minimum characters into chunk + maxLength: 2000, // number of maximum characters into chunk + splitter: 'sentence', // paragraph | sentence + overlap: 100, // number of overlap chracters + delimiters: '', // regex for base split method + } as any; + + async function extractText(filePath: string) { + const pdfFile = path.resolve(filePath); + const dataBuffer = await readFile(pdfFile); + const data = await pdf(dataBuffer); + return data.text; + } + + export const synthesizeQuestions = ai.defineFlow( + { + name: 'synthesizeQuestions', + inputSchema: z.object({ filePath: z.string().describe('PDF file path') }), + outputSchema: z.object({ + questions: z.array( + z.object({ + query: z.string(), + }), + ), + }), + }, + async ({ filePath }) => { + filePath = path.resolve(filePath); + // `extractText` loads the PDF and extracts its contents as text. + const pdfTxt = await ai.run('extract-text', () => extractText(filePath)); + + const chunks = await ai.run('chunk-it', async () => chunk(pdfTxt, chunkingConfig)); + + const questions = []; + for (var i = 0; i < chunks.length; i++) { + const { text } = await ai.generate({ + model: googleAI.model('gemini-2.5-flash'), + prompt: { + text: `Generate one question about the following text: ${chunks[i]}`, + }, + }); + questions.push({ query: text }); + } + return { questions }; + }, + ); + ``` + + You can then use this command to export the data into a file and use for + evaluation. + + ```bash + genkit flow:run synthesizeQuestions '{"filePath": "my_input.pdf"}' --output synthesizedQuestions.json + ``` + + + Test data synthesis features are not yet available for Go. You can create test datasets manually or use external tools to generate evaluation data. + + + Not applicable - evaluation features are not yet available for Python. + + + +## Next steps + +- Learn about [creating flows](/unified-docs/creating-flows) to build AI workflows that can be evaluated +- Explore [retrieval-augmented generation (RAG)](/unified-docs/rag) for building knowledge-based systems that benefit from evaluation +- See [tool calling](/unified-docs/tool-calling) for creating AI agents that can be tested with evaluation metrics +- Check out the [developer tools documentation](/docs/devtools) for more information about the Genkit Developer UI diff --git a/src/content/docs/unified-docs/generating-content.mdx b/src/content/docs/unified-docs/generating-content.mdx new file mode 100644 index 00000000..1a3547c8 --- /dev/null +++ b/src/content/docs/unified-docs/generating-content.mdx @@ -0,0 +1,1084 @@ +--- +title: Generating content with AI models +description: Learn how to generate content with AI models using Genkit's unified interface across JavaScript, Go, and Python, covering basic usage, configuration, structured output, streaming, and multimodal input/output. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; +import LLMSummary from '@/components/llm-summary.astro'; +import ExampleLink from '@/components/ExampleLink.astro'; + + +Genkit provides a unified interface to interact with various generative AI models (LLMs, image generation) across JavaScript, Go, and Python. + +**Core Function:** `ai.generate()` (JS), `genkit.Generate()` (Go), `ai.generate()` (Python) + +**Basic Usage:** + + + + ```typescript + import { googleAI } from '@genkit-ai/googleai'; + import { genkit } from 'genkit'; + + const ai = genkit({ + plugins: [googleAI()], + model: googleAI.model('gemini-2.5-flash'), // Default model + }); + + // Generate with default model + const response1 = await ai.generate('prompt text'); + console.log(response1.text); + + // Generate with specific model reference + const response2 = await ai.generate({ + model: googleAI.model('gemini-2.5-flash'), + prompt: 'prompt text', + }); + console.log(response2.text); + + // Generate with model string ID + const response3 = await ai.generate({ + model: 'googleai/gemini-2.5-flash', + prompt: 'prompt text', + }); + console.log(response3.text); + ``` + + + ```go + import ( + "context" + "log" + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/googlegenai" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlegenai.GoogleAI{}), + genkit.WithDefaultModel("googleai/gemini-2.5-flash"), + ) + if err != nil { + log.Fatalf("could not initialize Genkit: %v", err) + } + + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("prompt text"), + ) + if err != nil { + log.Fatalf("could not generate: %v", err) + } + log.Println(resp.Text()) + } + ``` + + + ```python + import asyncio + from genkit.ai import Genkit + from genkit.plugins.google_genai import GoogleGenai + + ai = Genkit( + plugins=[GoogleGenai()], + model='googleai/gemini-2.5-flash', + ) + + async def main() -> None: + result = await ai.generate( + prompt='prompt text', + ) + print(result.text) + + ai.run_main(main()) + ``` + + + +**Configuration:** + +- **System Prompt:** `system: "Instruction for the model"` +- **Model Parameters:** `config: { maxOutputTokens: 512, temperature: 1.0, topP: 0.95, topK: 40, stopSequences: ["\n"] }` + +**Key Concepts:** + +- **Flexibility:** Easily swap models (`model` parameter). +- **Schema validation:** For defining and validating structured output schemas. +- **Streaming:** For real-time output using `generateStream`. +- **Multimodality:** Handle text, image, video, audio inputs (model-dependent). +- **Media Generation:** Create images, etc. (model-dependent). + + + +At the heart of generative AI are AI _models_. Currently, the two most prominent +examples of generative models are large language models (LLMs) and image +generation models. These models take input, called a _prompt_ (most commonly +text, an image, or a combination of both), and from it produce as output text, +an image, or even audio or video. + +The output of these models can be surprisingly convincing: LLMs generate text +that appears as though it could have been written by a human being, and image +generation models can produce images that are very close to real photographs or +artwork created by humans. + +In addition, LLMs have proven capable of tasks beyond simple text generation: + +- Writing computer programs +- Planning subtasks that are required to complete a larger task +- Organizing unorganized data +- Understanding and extracting information data from a corpus of text +- Following and performing automated activities based on a text description of + the activity + +There are many models available to you, from several different providers. Each +model has its own strengths and weaknesses and one model might excel at one task +but perform less well at others. Apps making use of generative AI can often +benefit from using multiple different models depending on the task at hand. + +As an app developer, you typically don't interact with generative AI +models directly, but rather through services available as web APIs. +Although these services often have similar functionality, they all provide them +through different and incompatible APIs. If you want to make use of multiple +model services, you have to use each of their proprietary SDKs, potentially +incompatible with each other. And if you want to upgrade from one model to the +newest and most capable one, you might have to build that integration all over +again. + +Genkit addresses this challenge by providing a single interface that abstracts +away the details of accessing potentially any generative AI model service, with +several pre-built implementations already available. Building your AI-powered +app around Genkit simplifies the process of making your first generative AI call +and makes it equally easy to combine multiple models or swap one model for +another as new models emerge. + +### Before you begin + +If you want to run the code examples on this page, first complete the steps in +the Getting started guide for your language. All of the examples assume that you +have already installed Genkit as a dependency in your project. + + + + Complete the [Getting started](/docs/get-started) guide. + + + Complete the [Get started](/go/docs/get-started-go) guide. + + + Complete the [Get started](/python/docs/get-started) guide. + + + +### Models supported by Genkit + +Genkit is designed to be flexible enough to use potentially any generative AI +model service. Its core libraries define the common interface for working with +models, and model plugins define the implementation details for working with a +specific model and its API. + +The Genkit team maintains plugins for working with models provided by Vertex AI, +Google Generative AI, and Ollama: + +- Gemini family of LLMs, through the + [Google Cloud Vertex AI plugin](/docs/plugins/vertex-ai) and [Google AI plugin](/docs/plugins/google-genai) +- Imagen2 and Imagen3 image generation models, through Google Cloud Vertex AI +- Anthropic's Claude 3 family of LLMs, through Google Cloud Vertex AI's model + garden +- Gemma 2, Llama 3, and many more open models, through the [Ollama + plugin](/docs/plugins/ollama) (you must host the Ollama server yourself) +- GPT, Dall-E and Whisper family of models, through the [OpenAI plugin](/docs/plugins/openai) +- Grok family of models, through the [xAI plugin](/docs/plugins/xai) +- DeepSeek Chat and DeepSeek Reasoner models, through the [DeepSeek plugin](/docs/plugins/deepseek) + +In addition, there are also several community-supported plugins that provide +interfaces to these models: + +- Claude 3 family of LLMs, through the [Anthropic plugin](https://thefireco.github.io/genkit-plugins/docs/plugins/genkitx-anthropic) +- GPT family of LLMs through the [Azure OpenAI plugin](https://thefireco.github.io/genkit-plugins/docs/plugins/genkitx-azure-openai) +- Command R family of LLMs through the [Cohere plugin](https://thefireco.github.io/genkit-plugins/docs/plugins/genkitx-cohere) +- Mistral family of LLMs through the [Mistral plugin](https://thefireco.github.io/genkit-plugins/docs/plugins/genkitx-mistral) +- Gemma 2, Llama 3, and many more open models hosted on Groq, through the + [Groq plugin](https://thefireco.github.io/genkit-plugins/docs/plugins/genkitx-groq) + +You can discover more by searching for packages tagged with `genkit-model` on your language's package registry. + +### Loading and configuring model plugins + +Before you can use Genkit to start generating content, you need to load and +configure a model plugin. If you're coming from the Getting Started guide, +you've already done this. Otherwise, see the Getting Started guide or the individual plugin's documentation and follow the steps there before +continuing. + +### The generate() method + +In Genkit, the primary interface through which you interact with generative AI +models is the `generate()` method. + +The simplest `generate()` call specifies the model you want to use and a text +prompt: + + + + ```ts + import { googleAI } from '@genkit-ai/googleai'; + import { genkit } from 'genkit'; + + const ai = genkit({ + plugins: [googleAI()], + // Optional. Specify a default model. + model: googleAI.model('gemini-2.5-flash'), + }); + + async function run() { + const response = await ai.generate('Invent a menu item for a restaurant with a pirate theme.'); + console.log(response.text); + } + + run(); + ``` + + + ```go + package main + + import ( + "context" + "log" + + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/googlegenai" + ) + + func main() { + ctx := context.Background() + + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlegenai.GoogleAI{}), + genkit.WithDefaultModel("googleai/gemini-2.5-flash"), + ) + if err != nil { + log.Fatalf("could not initialize Genkit: %v", err) + } + + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("Invent a menu item for a pirate themed restaurant."), + ) + if err != nil { + log.Fatalf("could not generate model response: %v", err) + } + + log.Println(resp.Text()) + } + ``` + + + ```python + import asyncio + from genkit.ai import Genkit + from genkit.plugins.google_genai import GoogleGenai + + ai = Genkit( + plugins=[GoogleGenai()], + model='googleai/gemini-2.5-flash', + ) + + async def main() -> None: + result = await ai.generate( + prompt='Invent a menu item for a pirate themed restaurant.', + ) + print(result.text) + + ai.run_main(main()) + ``` + + + +When you run this brief example, it will print out some debugging information +followed by the output of the `generate()` call, which will usually be Markdown +text as in the following example: + +```md +## The Blackheart's Bounty + +**A hearty stew of slow-cooked beef, spiced with rum and molasses, served in a +hollowed-out cannonball with a side of crusty bread and a dollop of tangy +pineapple salsa.** + +**Description:** This dish is a tribute to the hearty meals enjoyed by pirates +on the high seas. The beef is tender and flavorful, infused with the warm spices +of rum and molasses. The pineapple salsa adds a touch of sweetness and acidity, +balancing the richness of the stew. The cannonball serving vessel adds a fun and +thematic touch, making this dish a perfect choice for any pirate-themed +adventure. +``` + +Run the script again and you'll get a different output. + +The preceding code sample sent the generation request to the default model, +which you specified when you configured the Genkit instance. + +You can also specify a model for a single `generate()` call: + + + + ```ts + import { googleAI } from '@genkit-ai/googleai'; + + const response = await ai.generate({ + model: googleAI.model('gemini-2.5-flash'), + prompt: 'Invent a menu item for a restaurant with a pirate theme.', + }); + ``` + + This example uses a model reference function provided by the model plugin. You can also specify the model using a string identifier: + + ```ts + const response = await ai.generate({ + model: 'googleai/gemini-2.5-flash-001', + prompt: 'Invent a menu item for a restaurant with a pirate theme.', + }); + ``` + + + ```go + resp, err := genkit.Generate(ctx, g, + ai.WithModelName("googleai/gemini-2.5-pro"), + ai.WithPrompt("Invent a menu item for a pirate themed restaurant."), + ) + ``` + + + ```python + result = await ai.generate( + prompt='Invent a menu item for a pirate themed restaurant.', + model='googleai/gemini-2.0-pro', + ) + ``` + + + +A model string identifier looks like `providerid/modelid`, where the provider ID +(in this case, `googleai`) identifies the plugin, and the model ID is a +plugin-specific string identifier for a specific version of a model. + +These examples also illustrate an important point: when you use +`generate()` to make generative AI model calls, changing the model you want to +use is simply a matter of passing a different value to the model parameter. By +using `generate()` instead of the native model SDKs, you give yourself the +flexibility to more easily use several different models in your app and change +models in the future. + +So far you have only seen examples of the simplest `generate()` calls. However, +`generate()` also provides an interface for more advanced interactions with +generative models, which you will see in the sections that follow. + +### System prompts + +Some models support providing a _system prompt_, which gives the model +instructions as to how you want it to respond to messages from the user. You can +use the system prompt to specify a persona you want the model to adopt, the tone +of its responses, the format of its responses, and so on. + +If the model you're using supports system prompts, you can provide one: + + + + ```ts + const response = await ai.generate({ + prompt: 'What is your quest?', + system: "You are a knight from Monty Python's Flying Circus.", + }); + ``` + + + ```go + resp, err := genkit.Generate(ctx, g, + ai.WithSystem("You are a food industry marketing consultant."), + ai.WithPrompt("Invent a menu item for a pirate themed restaurant."), + ) + ``` + + For models that don't support system prompts, `ai.WithSystem()` simulates it by + modifying the request to appear _like_ a system prompt. + + + ```python + result = await ai.generate( + system='You are a food industry marketing consultant.', + prompt='Invent a menu item for a pirate themed restaurant.', + ) + ``` + + + +### Multi-turn conversations with messages + +For multi-turn conversations, you can use the `messages` parameter instead of `prompt` to provide a conversation history. This is particularly useful when you need to maintain context across multiple interactions with the model. + + + + The `messages` parameter accepts an array of message objects, where each message has a `role` (one of `'system'`, `'user'`, `'model'`, or `'tool'`) and `content`: + + ```ts + const response = await ai.generate({ + messages: [ + { role: 'user', content: 'Hello, can you help me plan a trip?' }, + { role: 'model', content: 'Of course! I\'d be happy to help you plan a trip. Where are you thinking of going?' }, + { role: 'user', content: 'I want to visit Japan for two weeks in spring.' } + ], + }); + ``` + + You can also combine `messages` with other parameters like `system` prompts: + + ```ts + const response = await ai.generate({ + system: 'You are a helpful travel assistant.', + messages: [ + { role: 'user', content: 'What should I pack for Japan in spring?' } + ], + }); + ``` + + **When to use `messages` vs. Chat API:** + + - Use the `messages` parameter for simple multi-turn conversations where you manually manage the conversation history + - For persistent chat sessions with automatic history management, use the [Chat API](/docs/chat) instead + + + ```go + resp, err := genkit.Generate(ctx, g, + ai.WithModelName("googleai/gemini-2.5-flash"), + ai.WithMessages( + NewUserMessage( + NewTextPart("Hello, can you help me plan a trip?"), + ), + NewModelMessage( + NewTextPart("Of course! I'd be happy to help you plan a trip. Where are you thinking of going?"), + ), + NewUserMessage( + NewTextPart("I want to visit Japan for two weeks in spring."), + ), + ), + ) + ``` + + + ```python + # Multi-turn conversation support varies by Python implementation + # Check the specific plugin documentation for message handling + result = await ai.generate( + prompt='Continue our conversation about trip planning to Japan.', + ) + ``` + + + +### Model parameters + +The `generate()` function takes a `config` parameter, through which you can +specify optional settings that control how the model generates content: + + + + ```ts + const response = await ai.generate({ + prompt: 'Invent a menu item for a restaurant with a pirate theme.', + config: { + maxOutputTokens: 512, + stopSequences: ['\n'], + temperature: 1.0, + topP: 0.95, + topK: 40, + }, + }); + ``` + + + ```go + resp, err := genkit.Generate(ctx, g, + ai.WithModelName("googleai/gemini-2.5-flash"), + ai.WithPrompt("Invent a menu item for a pirate themed restaurant."), + ai.WithConfig(&googlegenai.GeminiConfig{ + MaxOutputTokens: 500, + StopSequences: ["", ""], + Temperature: 0.5, + TopP: 0.4, + TopK: 50, + }), + ) + ``` + + + ```python + result = await ai.generate( + prompt='Invent a menu item for a pirate themed restaurant.', + config={ + 'max_output_tokens': 400, + 'stop_sequences': ['', ''], + 'temperature': 1.2, + 'top_p': 0.4, + 'top_k': 50, + }, + ) + ``` + + + +The exact parameters that are supported depend on the individual model and model +API. However, the parameters in the previous example are common to almost every +model. The following is an explanation of these parameters: + +#### Parameters that control output length + +**maxOutputTokens** + +LLMs operate on units called _tokens_. A token usually, but does not +necessarily, map to a specific sequence of characters. When you pass a prompt to +a model, one of the first steps it takes is to _tokenize_ your prompt string +into a sequence of tokens. Then, the LLM generates a sequence of tokens from the +tokenized input. Finally, the sequence of tokens gets converted back into text, +which is your output. + +The maximum output tokens parameter simply sets a limit on how many tokens to +generate using the LLM. Every model potentially uses a different tokenizer, but +a good rule of thumb is to consider a single English word to be made of 2 to 4 +tokens. + +As stated earlier, some tokens might not map to character sequences. One such +example is that there is often a token that indicates the end of the sequence: +when an LLM generates this token, it stops generating more. Therefore, it's +possible and often the case that an LLM generates fewer tokens than the maximum +because it generated the "stop" token. + +**stopSequences** + +You can use this parameter to set the tokens or token sequences that, when +generated, indicate the end of LLM output. The correct values to use here +generally depend on how the model was trained, and are usually set by the model +plugin. However, if you have prompted the model to generate another stop +sequence, you might specify it here. + +Note that you are specifying character sequences, and not tokens per se. In most +cases, you will specify a character sequence that the model's tokenizer maps to +a single token. + +#### Parameters that control "creativity" + +The _temperature_, _top-p_, and _top-k_ parameters together control how +"creative" you want the model to be. Below are very brief explanations of what +these parameters mean, but the more important point to take away is this: these +parameters are used to adjust the character of an LLM's output. The optimal +values for them depend on your goals and preferences, and are likely to be found +only through experimentation. + +**temperature** + +LLMs are fundamentally token-predicting machines. For a given sequence of tokens +(such as the prompt) an LLM predicts, for each token in its vocabulary, the +likelihood that the token comes next in the sequence. The temperature is a +scaling factor by which these predictions are divided before being normalized to +a probability between 0 and 1. + +Low temperature values—between 0.0 and 1.0—amplify the difference in +likelihoods between tokens, with the result that the model will be even less +likely to produce a token it already evaluated to be unlikely. This is often +perceived as output that is less creative. Although 0.0 is technically not a +valid value, many models treat it as indicating that the model should behave +deterministically, and to only consider the single most likely token. + +High temperature values—those greater than 1.0—compress the +differences in likelihoods between tokens, with the result that the model +becomes more likely to produce tokens it had previously evaluated to be +unlikely. This is often perceived as output that is more creative. Some model +APIs impose a maximum temperature, often 2.0. + +**topP** + +_Top-p_ is a value between 0.0 and 1.0 that controls the number of possible +tokens you want the model to consider, by specifying the cumulative probability +of the tokens. For example, a value of 1.0 means to consider every possible +token (but still take into account the probability of each token). A value of +0.4 means to only consider the most likely tokens, whose probabilities add up to +0.4, and to exclude the remaining tokens from consideration. + +**topK** + +_Top-k_ is an integer value that also controls the number of possible tokens you +want the model to consider, but this time by explicitly specifying the maximum +number of tokens. Specifying a value of 1 means that the model should behave +deterministically. + +#### Experiment with model parameters + +You can experiment with the effect of these parameters on the output generated +by different model and prompt combinations by using the Developer UI. Start the +developer UI with the `genkit start` command and it will automatically load all +of the models defined by the plugins configured in your project. You can quickly +try different prompts and configuration values without having to repeatedly make +these changes in code. + +### Structured output + + + +When using generative AI as a component in your application, you often want +output in a format other than plain text. Even if you're just generating content +to display to the user, you can benefit from structured output simply for the +purpose of presenting it more attractively to the user. But for more advanced +applications of generative AI, such as programmatic use of the model's output, +or feeding the output of one model into another, structured output is a must. + +In Genkit, you can request structured output from a model by specifying a schema +when you call `generate()`: + + + + ```ts + import { z } from 'genkit'; + + const MenuItemSchema = z.object({ + name: z.string().describe('The name of the menu item.'), + description: z.string().describe('A description of the menu item.'), + calories: z.number().describe('The estimated number of calories.'), + allergens: z.array(z.string()).describe('Any known allergens in the menu item.'), + }); + + const response = await ai.generate({ + prompt: 'Suggest a menu item for a pirate-themed restaurant.', + output: { schema: MenuItemSchema }, + }); + ``` + + Model output schemas are specified using the [Zod](https://zod.dev/) + library. In addition to a schema definition language, Zod also provides runtime + type checking, which bridges the gap between static TypeScript types and the + unpredictable output of generative AI models. + + + ```go + type MenuItem struct { + Name string `json:"name"` + Description string `json:"description"` + Calories int `json:"calories"` + Allergens []string `json:"allergens"` + } + + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("Invent a menu item for a pirate themed restaurant."), + ai.WithOutputType(MenuItem{}), + ) + if err != nil { + log.Fatal(err) // One possible error is that the response does not conform to the type. + } + ``` + + Model output types are specified as JSON schema using the + [`invopop/jsonschema`](https://github.com/invopop/jsonschema) package. This + provides runtime type checking, which bridges the gap between static Go types + and the unpredictable output of generative AI models. + + + ```python + from pydantic import BaseModel + + class MenuItemSchema(BaseModel): + name: str + description: str + calories: int + allergens: list[str] + + result = await ai.generate( + prompt='Invent a menu item for a pirate themed restaurant.', + output_schema=MenuItemSchema, + ) + ``` + + Model output schemas are specified using [Pydantic Models](https://docs.pydantic.dev/latest/concepts/models/). In addition to a schema definition language, Pydantic also provides runtime + type checking, which bridges the gap between static Python types and the + unpredictable output of generative AI models. + + + +When you specify a schema in `generate()`, Genkit does several things behind the +scenes: + +- Augments the prompt with additional guidance about the desired output format. + This also has the side effect of specifying to the model what content exactly + you want to generate (for example, not only suggest a menu item but also + generate a description, a list of allergens, and so on). +- Parses the model output into a structured object. +- Verifies that the output conforms with the schema. + +To get structured output from a successful generate call, use the response +object's `output` property: + + + + ```ts + const menuItem = response.output; // Typed as z.infer + console.log(menuItem?.name); + ``` + + Note that the `output` property can be `null`. This can + happen when the model fails to generate output that conforms to the schema. + + + ```go + var item MenuItem + if err := resp.Output(&item); err != nil { + log.Fatalf(err) + } + + log.Printf("%s (%d calories, %d allergens): %s\n", + item.Name, item.Calories, len(item.Allergens), item.Description) + ``` + + Alternatively, you can use `genkit.GenerateData()` for a more succinct call: + + ```go + item, resp, err := genkit.GenerateData[MenuItem](ctx, g, + ai.WithPrompt("Invent a menu item for a pirate themed restaurant."), + ) + if err != nil { + log.Fatal(err) + } + ``` + + + ```python + output = response.output + ``` + + + +#### Handling errors + +The best strategy for dealing with schema validation errors will depend on your exact use +case, but here are some general hints: + +- **Try a different model**. For structured output to succeed, the model must be + capable of generating output in JSON. The most powerful LLMs, like Gemini and + Claude, are versatile enough to do this; however, smaller models, such as some + of the local models you would use with Ollama, might not be able to generate + structured output reliably unless they have been specifically trained to do + so. + +- **Make use of coercion abilities**: You can specify in your schemas that + the validation library should try to coerce non-conforming types into the type specified by the + schema. If your schema includes primitive types other than strings, using + coercion can reduce the number of `generate()` failures you experience. + +- **Retry the generate() call**. If the model you've chosen only rarely fails to + generate conformant output, you can treat the error as you would treat a + network error, and simply retry the request using some kind of incremental + back-off strategy. + +### Streaming + +When generating large amounts of text, you can improve the experience for your +users by presenting the output as it's generated—streaming the output. A +familiar example of streaming in action can be seen in most LLM chat apps: users +can read the model's response to their message as it's being generated, which +improves the perceived responsiveness of the application and enhances the +illusion of chatting with an intelligent counterpart. + +In Genkit, you can stream output using the streaming methods: + + + + ```ts + const { stream, response } = ai.generateStream({ + prompt: 'Tell a story.', + }); + + // Stream text chunks + for await (const chunk of stream) { + console.log(chunk.text); + } + + // Get final complete response + const finalResponse = await response; + console.log(finalResponse.text); + ``` + + Streaming also works with structured output: + + ```ts + const { stream, response } = ai.generateStream({ + prompt: 'Suggest three pirate-themed menu items.', + output: { schema: z.array(MenuItemSchema) }, + }); + + for await (const chunk of stream) { + console.log(chunk.output); // Accumulated output so far + } + + const finalResponse = await response; + console.log(finalResponse.output); + ``` + + + ```go + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("Suggest a complete menu for a pirate themed restaurant."), + ai.WithStreaming(func(ctx context.Context, chunk *ai.ModelResponseChunk) error { + // Do something with the chunk... + log.Println(chunk.Text()) + return nil + }), + ) + if err != nil { + log.Fatal(err) + } + + log.Println(resp.Text()) + ``` + + + ```python + stream, response = ai.generate_stream( + prompt='Suggest a complete menu for a pirate themed restaurant.', + ) + + # Stream text chunks + async for chunk in stream: + print(chunk.text) + + # Get complete output + complete_text = (await response).text + ``` + + Streaming also works with structured output: + + ```python + stream, response = ai.generate_stream( + prompt='Suggest three pirate-themed menu items.', + output_schema=MenuSchema, + ) + + async for chunk in stream: + print(chunk.output) # Accumulated output so far + + print((await response).output) + ``` + + + +Streaming structured output works a little differently from streaming text: the +`output` property of a response chunk is an object constructed from the +accumulation of the chunks that have been produced so far, rather than an object +representing a single chunk (which might not be valid on its own). **Every chunk +of structured output in a sense supersedes the chunk that came before it**. + +### Multimodal input + + + +The examples you've seen so far have used text strings as model prompts. While +this remains the most common way to prompt generative AI models, many models can +also accept other media as prompts. Media prompts are most often used in +conjunction with text prompts that instruct the model to perform some operation +on the media, such as to caption an image or transcribe an audio recording. + +The ability to accept media input and the types of media you can use are +completely dependent on the model and its API. For example, the Gemini 1.5 +series of models can accept images, video, and audio as prompts. + +To provide a media prompt to a model that supports it, instead of passing a +simple text prompt to `generate`, pass an array consisting of a media part and a +text part: + + + + ```ts + const response = await ai.generate({ + prompt: [{ media: { url: 'https://.../image.jpg' } }, { text: 'What is in this image?' }], + }); + ``` + + You can also pass media data directly by encoding it as a data URL: + + ```ts + import { readFile } from 'node:fs/promises'; + + const data = await readFile('image.jpg'); + const response = await ai.generate({ + prompt: [{ media: { url: `data:image/jpeg;base64,${data.toString('base64')}` } }, { text: 'What is in this image?' }], + }); + ``` + + + ```go + resp, err := genkit.Generate(ctx, g, + ai.WithModelName("googleai/gemini-2.5-flash"), + ai.WithMessages( + NewUserMessage( + NewMediaPart("image/jpeg", "https://example.com/photo.jpg"), + NewTextPart("Compose a poem about this image."), + ), + ), + ) + ``` + + You can also pass media data directly by encoding it as a data URL: + + ```go + image, err := ioutil.ReadFile("photo.jpg") + if err != nil { + log.Fatal(err) + } + + resp, err := genkit.Generate(ctx, g, + ai.WithModelName("googleai/gemini-2.5-flash"), + ai.WithMessages( + NewUserMessage( + NewMediaPart("image/jpeg", "data:image/jpeg;base64," + base64.StdEncoding.EncodeToString(image)), + NewTextPart("Compose a poem about this image."), + ), + ), + ) + ``` + + + ```python + from genkit.ai import Part + + result = await ai.generate( + prompt=[ + Part(media={'url': 'https://example.com/photo.jpg'}), + Part(text='Compose a poem about this image.'), + ], + ) + ``` + + You can also pass media data directly by encoding it as a data URL: + + ```python + import base64 + from genkit.ai import Part + + # Read image bytes + with open('image.jpg', 'rb') as f: + image_bytes = f.read() + + base64_encoded_image = base64.b64encode(image_bytes).decode('utf-8') + + result = await ai.generate( + prompt=[ + Part(media={'url': f'data:image/jpeg;base64,{base64_encoded_image}'}), + Part(text='Compose a poem about this image.'), + ], + ) + ``` + + + +All models that support media input support both data URLs and HTTPS URLs. Some +model plugins add support for other media sources. For example, the Vertex AI +plugin also lets you use Cloud Storage (`gs://`) URLs. + +### Generating Media + +While most examples in this guide focus on generating text with LLMs, Genkit also supports generating other types of media, including **images** and **audio**. Thanks to its unified `generate()` interface, working with media models is just as straightforward as generating text. + +:::note +Genkit returns generated media as a **data URL**, a widely supported format for handling binary media in both browsers and Node.js environments. +::: + +#### Image Generation + +To generate an image, you can use models that support image generation. Here's an example using Google AI's image generation capabilities: + + + + ```ts + import { googleAI } from '@genkit-ai/googleai'; + import { parseDataUrl } from 'data-urls'; + import { writeFile } from 'node:fs/promises'; + + const response = await ai.generate({ + model: googleAI.model('gemini-2.5-flash-preview-tts'), // Example model with media capabilities + prompt: 'An illustration of a dog wearing a space suit, photorealistic', + output: { format: 'media' }, + }); + + const imagePart = response.output; + if (imagePart?.media?.url) { + const parsed = parseDataUrl(imagePart.media.url); + if (parsed) { + await writeFile('dog.png', parsed.body); + } + } + ``` + + + ```go + // Image generation support varies by Go implementation + // Check the specific plugin documentation for media generation + ``` + + + ```python + # Image generation support varies by Python implementation + # Check the specific plugin documentation for media generation + ``` + + + +### Next steps + +#### Learn more about Genkit + +- As an app developer, the primary way you influence the output of generative AI + models is through prompting. Read [Prompt management](/docs/dotprompt) to learn how + Genkit helps you develop effective prompts and manage them in your codebase. +- Although `generate()` is the nucleus of every generative AI powered + application, real-world applications usually require additional work before + and after invoking a generative AI model. To reflect this, Genkit introduces + the concept of _flows_, which are defined like functions but add additional + features such as observability and simplified deployment. To learn more, see + [Defining workflows](/docs/flows). + +#### Advanced LLM use + +- Many of your users will have interacted with large language models for the first time through chatbots. Although LLMs are capable of much more than simulating conversations, it remains a familiar and useful style of interaction. Even when your users will not be interacting directly with the model in this way, the conversational style of prompting is a powerful way to influence the output generated by an AI model. Read [Multi-turn chats](/docs/chat) to learn how to use Genkit as part of an LLM chat implementation. +- One way to enhance the capabilities of LLMs is to prompt them with a list of + ways they can request more information from you, or request you to perform + some action. This is known as _tool calling_ or _function calling_. Models + that are trained to support this capability can respond to a prompt with a + specially-formatted response, which indicates to the calling application that + it should perform some action and send the result back to the LLM along with + the original prompt. Genkit has library functions that automate both the + prompt generation and the call-response loop elements of a tool calling + implementation. See [Tool calling](/docs/tool-calling) to learn more. +- Retrieval-augmented generation (RAG) is a technique used to introduce + domain-specific information into a model's output. This is accomplished by + inserting relevant information into a prompt before passing it on to the + language model. A complete RAG implementation requires you to bring several + technologies together: text embedding generation models, vector databases, and + large language models. See [Retrieval-augmented generation (RAG)](/docs/rag) to + learn how Genkit simplifies the process of coordinating these various + elements. + +#### Testing model output + +As a software engineer, you're used to deterministic systems where the same +input always produces the same output. However, with AI models being +probabilistic, the output can vary based on subtle nuances in the input, the +model's training data, and even randomness deliberately introduced by parameters +like temperature. + +Genkit's evaluators are structured ways to assess the quality of your LLM's +responses, using a variety of strategies. Read more on the +[Evaluation](/docs/evaluation) page. diff --git a/src/content/docs/unified-docs/rag.mdx b/src/content/docs/unified-docs/rag.mdx new file mode 100644 index 00000000..49176150 --- /dev/null +++ b/src/content/docs/unified-docs/rag.mdx @@ -0,0 +1,939 @@ +--- +title: Retrieval-augmented generation (RAG) +description: Learn how Genkit simplifies retrieval-augmented generation (RAG) across JavaScript, Go, and Python by providing abstractions and plugins for indexers, embedders, and retrievers to incorporate external data into LLM responses. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +Genkit provides abstractions that help you build retrieval-augmented +generation (RAG) flows, as well as plugins that provide integrations with +related tools. + +## What is RAG? + +Retrieval-augmented generation is a technique used to incorporate external +sources of information into an LLM's responses. It's important to be able to do +so because, while LLMs are typically trained on a broad body of material, +practical use of LLMs often requires specific domain knowledge (for example, you +might want to use an LLM to answer customers' questions about your company's +products). + +One solution is to fine-tune the model using more specific data. However, this +can be expensive both in terms of compute cost and in terms of the effort needed +to prepare adequate training data. + +In contrast, RAG works by incorporating external data sources into a prompt at +the time it's passed to the model. For example, you could imagine the prompt, +"What is Bart's relationship to Lisa?" might be expanded ("augmented") by +prepending some relevant information, resulting in the prompt, "Homer and +Marge's children are named Bart, Lisa, and Maggie. What is Bart's relationship +to Lisa?" + +This approach has several advantages: + +- It can be more cost-effective because you don't have to retrain the model. +- You can continuously update your data source and the LLM can immediately + make use of the updated information. +- You now have the potential to cite references in your LLM's responses. + +On the other hand, using RAG naturally means longer prompts, and some LLM API +services charge for each input token you send. Ultimately, you must evaluate the +cost tradeoffs for your applications. + +RAG is a very broad area and there are many different techniques used to achieve +the best quality RAG. The core Genkit framework offers main abstractions to +help you do RAG: + + + + - **Indexers**: add documents to an "index" + - **Embedders**: transforms documents into a vector representation + - **Retrievers**: retrieve documents from an "index", given a query + + + - **Embedders**: transforms documents into a vector representation + - **Retrievers**: retrieve documents from an "index", given a query + + + - **Embedders**: transforms documents into a vector representation + - **Retrievers**: retrieve documents from an "index", given a query + + + +These definitions are broad on purpose because Genkit is un-opinionated about +what an "index" is or how exactly documents are retrieved from it. Genkit only +provides a `Document` format and everything else is defined by the retriever or +indexer implementation provider. + +### Indexers + + + + The index is responsible for keeping track of your documents in such a way that + you can quickly retrieve relevant documents given a specific query. This is most + often accomplished using a vector database, which indexes your documents using + multidimensional vectors called embeddings. A text embedding (opaquely) + represents the concepts expressed by a passage of text; these are generated + using special-purpose ML models. By indexing text using its embedding, a vector + database is able to cluster conceptually related text and retrieve documents + related to a novel string of text (the query). + + Before you can retrieve documents for the purpose of generation, you need to + ingest them into your document index. A typical ingestion flow does the + following: + + 1. Split up large documents into smaller documents so that only relevant + portions are used to augment your prompts – "chunking". This is necessary + because many LLMs have a limited context window, making it impractical to + include entire documents with a prompt. + + Genkit doesn't provide built-in chunking libraries; however, there are open + source libraries available that are compatible with Genkit. + + 2. Generate embeddings for each chunk. Depending on the database you're using, + you might explicitly do this with an embedding generation model, or you might + use the embedding generator provided by the database. + 3. Add the text chunk and its index to the database. + + You might run your ingestion flow infrequently or only once if you are working + with a stable source of data. On the other hand, if you are working with data + that frequently changes, you might continuously run the ingestion flow (for + example, in a Cloud Firestore trigger, whenever a document is updated). + + + In Go, indexing is typically handled by your chosen vector database or storage solution. + Genkit provides the abstractions for working with indexed documents, but the actual + indexing process is implementation-specific to your storage backend. + + Users are expected to add their own functionality to index documents using their + preferred vector database or storage solution. + + + In Python, indexing is outside the scope of Genkit and you should use the + SDKs/APIs provided by the vector store you are using. Genkit provides the + abstractions for working with indexed documents through retrievers. + + + +### Embedders + +An embedder is a function that takes content (text, images, audio, etc.) and +creates a numeric vector that encodes the semantic meaning of the original +content. As mentioned above, embedders are leveraged as part of the process of +indexing, however, they can also be used independently to create embeddings +without an index. + +### Retrievers + +A retriever is a concept that encapsulates logic related to any kind of document +retrieval. The most popular retrieval cases typically include retrieval from +vector stores, however, in Genkit a retriever can be any function that returns +data. + +To create a retriever, you can use one of the provided implementations or create +your own. + +## Supported indexers, retrievers, and embedders + +Genkit provides indexer and retriever support through its plugin system. The +following plugins are officially supported: + + + + **Vector Databases:** + - [Astra DB](/docs/plugins/astra-db) - DataStax Astra DB vector database + - [Chroma DB](/docs/plugins/chroma) vector database + - [Cloud Firestore vector store](/docs/plugins/firebase) + - [Cloud SQL for PostgreSQL](/docs/plugins/cloud-sql-pg) with pgvector extension + - [LanceDB](/docs/plugins/lancedb) open-source vector database + - [Neo4j](/docs/plugins/neo4j) graph database with vector search + - [Pinecone](/docs/plugins/pinecone) cloud vector database + - [Vertex AI Vector Search](/docs/plugins/vertex-ai) + + **Templates:** + - PostgreSQL with [`pgvector`](/docs/templates/pgvector) + + **Embedding Models:** + - Google AI and Vertex AI plugins provide text embedding models + + + **Vector Databases:** + - [Pinecone](/go/docs/plugins/pinecone) cloud vector database + - PostgreSQL with [`pgvector`](/go/docs/plugins/pgvector) + + **Embedding Models:** + - [Google Generative AI](/go/docs/plugins/google-genai) - Text embedding models + + + **Vector Databases:** + - Firestore Vector Store (via Firebase plugin) + - Dev Local Vector Store (for development/testing) + + **Embedding Models:** + - Google GenAI plugin provides text embedding models + + + +## Defining a RAG Flow + +The following examples show how you could ingest a collection of restaurant menu +PDF documents into a vector database and retrieve them for use in a flow that +determines what food items are available. + +### Install dependencies + + + + Install dependencies for processing PDFs: + + ```bash + npm install llm-chunk pdf-parse @genkit-ai/dev-local-vectorstore + + npm install --save-dev @types/pdf-parse + ``` + + + Install dependencies for text processing and PDF parsing: + + ```bash + go get github.com/tmc/langchaingo/textsplitter + go get github.com/ledongthuc/pdf + ``` + + + Install dependencies for your chosen vector store and PDF processing: + + ```bash + pip install genkit[google-genai,firebase] + # Add other dependencies as needed for PDF processing + ``` + + + +### Configure vector store + + + + Add a local vector store to your configuration: + + ```ts + import { devLocalIndexerRef, devLocalVectorstore } from '@genkit-ai/dev-local-vectorstore'; + import { googleAI } from '@genkit-ai/googleai'; + import { z, genkit } from 'genkit'; + + const ai = genkit({ + plugins: [ + // googleAI provides the gemini-embedding-001 embedder + googleAI(), + + // the local vector store requires an embedder to translate from text to vector + devLocalVectorstore([ + { + indexName: 'menuQA', + embedder: googleAI.embedder('gemini-embedding-001'), + }, + ]), + ], + }); + ``` + + + Configure your Genkit instance with embedding support: + + ```go + ctx := context.Background() + + g, err := genkit.Init(ctx, genkit.WithPlugins(&googlegenai.GoogleAI{})) + if err != nil { + log.Fatal(err) + } + + if err = localvec.Init(); err != nil { + log.Fatal(err) + } + ``` + + + Configure your Genkit instance with vector store support: + + ```python + from genkit.ai import Genkit, Document + from genkit.plugins.google_genai import GoogleGenai + from genkit.plugins.firebase.firestore import FirestoreVectorStore, DistanceMeasure + + ai = Genkit( + plugins=[ + GoogleGenai(), + FirestoreVectorStore( + name='my_firestore_retriever', + collection='mycollection', + vector_field='embedding', + content_field='text', + embedder='googleai/text-embedding-004', + distance_measure=DistanceMeasure.EUCLIDEAN, + firestore_client=firestore_client, + ), + ], + ) + ``` + + + +### Define an Indexer + + + + The following example shows how to create an indexer to ingest a collection of + PDF documents and store them in a local vector database. + + #### Create the indexer + + ```ts + export const menuPdfIndexer = devLocalIndexerRef('menuQA'); + ``` + + #### Create chunking config + + This example uses the `llm-chunk` library which provides a simple text splitter + to break up documents into segments that can be vectorized. + + The following definition configures the chunking function to guarantee a + document segment of between 1000 and 2000 characters, broken at the end of a + sentence, with an overlap between chunks of 100 characters. + + ```ts + const chunkingConfig = { + minLength: 1000, + maxLength: 2000, + splitter: 'sentence', + overlap: 100, + delimiters: '', + } as any; + ``` + + More chunking options for this library can be found in the [llm-chunk + documentation](https://www.npmjs.com/package/llm-chunk). + + #### Define your indexer flow + + ```ts + import { Document } from 'genkit/retriever'; + import { chunk } from 'llm-chunk'; + import { readFile } from 'fs/promises'; + import path from 'path'; + import pdf from 'pdf-parse'; + + async function extractTextFromPdf(filePath: string) { + const pdfFile = path.resolve(filePath); + const dataBuffer = await readFile(pdfFile); + const data = await pdf(dataBuffer); + return data.text; + } + + export const indexMenu = ai.defineFlow( + { + name: 'indexMenu', + inputSchema: z.object({ filePath: z.string().describe('PDF file path') }), + outputSchema: z.object({ + success: z.boolean(), + documentsIndexed: z.number(), + error: z.string().optional(), + }), + }, + async ({ filePath }) => { + try { + filePath = path.resolve(filePath); + + // Read the pdf + const pdfTxt = await ai.run('extract-text', () => extractTextFromPdf(filePath)); + + // Divide the pdf text into segments + const chunks = await ai.run('chunk-it', async () => chunk(pdfTxt, chunkingConfig)); + + // Convert chunks of text into documents to store in the index. + const documents = chunks.map((text) => { + return Document.fromText(text, { filePath }); + }); + + // Add documents to the index + await ai.index({ + indexer: menuPdfIndexer, + documents, + }); + + return { + success: true, + documentsIndexed: documents.length, + }; + } catch (err) { + // For unexpected errors that throw exceptions + return { + success: false, + documentsIndexed: 0, + error: err instanceof Error ? err.message : String(err) + }; + } + }, + ); + ``` + + #### Run the indexer flow + + ```bash + genkit flow:run indexMenu '{"filePath": "menu.pdf"}' + ``` + + + #### Create chunking config + + This example uses the `textsplitter` library which provides a simple text + splitter to break up documents into segments that can be vectorized. + + The following definition configures the chunking function to return document + segments of 200 characters, with an overlap between chunks of 20 characters. + + ```go + splitter := textsplitter.NewRecursiveCharacter( + textsplitter.WithChunkSize(200), + textsplitter.WithChunkOverlap(20), + ) + ``` + + More chunking options for this library can be found in the + [`langchaingo` documentation](https://pkg.go.dev/github.com/tmc/langchaingo/textsplitter#Option). + + #### Define your indexer flow + + ```go + genkit.DefineFlow( + g, "indexMenu", + func(ctx context.Context, path string) (any, error) { + // Extract plain text from the PDF. Wrap the logic in Run so it + // appears as a step in your traces. + pdfText, err := genkit.Run(ctx, "extract", func() (string, error) { + return readPDF(path) + }) + if err != nil { + return nil, err + } + + // Split the text into chunks. Wrap the logic in Run so it appears as a + // step in your traces. + docs, err := genkit.Run(ctx, "chunk", func() ([]*ai.Document, error) { + chunks, err := splitter.SplitText(pdfText) + if err != nil { + return nil, err + } + + var docs []*ai.Document + for _, chunk := range chunks { + docs = append(docs, ai.DocumentFromText(chunk, nil)) + } + return docs, nil + }) + if err != nil { + return nil, err + } + + // Add chunks to the index using custom index function + // Implementation depends on your chosen vector database + return map[string]interface{}{ + "success": true, + "documentsIndexed": len(docs), + }, nil + }, + ) + ``` + + ```go + // Helper function to extract plain text from a PDF. Excerpted from + // https://github.com/ledongthuc/pdf + func readPDF(path string) (string, error) { + f, r, err := pdf.Open(path) + if f != nil { + defer f.Close() + } + if err != nil { + return "", err + } + + reader, err := r.GetPlainText() + if err != nil { + return "", err + } + + bytes, err := io.ReadAll(reader) + if err != nil { + return "", err + } + + return string(bytes), nil + } + ``` + + #### Run the indexer flow + + ```bash + genkit flow:run indexMenu '"menu.pdf"' + ``` + + + In Python, indexing is typically handled by your vector store's SDK. Here's an example of how you might structure an indexing flow: + + ```python + @ai.flow() + async def index_menu(file_path: str): + # Extract text from PDF (implementation depends on your PDF library) + pdf_text = extract_text_from_pdf(file_path) + + # Chunk the text (implementation depends on your chunking library) + chunks = chunk_text(pdf_text) + + # Index using your vector store's SDK + # This is specific to your chosen vector database + # For Firestore, you would use the Firestore SDK directly + + return { + "success": True, + "documents_indexed": len(chunks) + } + ``` + + Note: Indexing is outside the scope of Genkit Python and should be done using your vector store's native SDK. + + + +After running the indexing flow, the vector database will be seeded with +documents and ready to be used in Genkit flows with retrieval steps. + +### Define a flow with retrieval + +The following example shows how you might use a retriever in a RAG flow: + + + + ```ts + import { devLocalRetrieverRef } from '@genkit-ai/dev-local-vectorstore'; + import { googleAI } from '@genkit-ai/googleai'; + + // Define the retriever reference + export const menuRetriever = devLocalRetrieverRef('menuQA'); + + export const menuQAFlow = ai.defineFlow( + { + name: 'menuQA', + inputSchema: z.object({ query: z.string() }), + outputSchema: z.object({ answer: z.string() }) + }, + async ({ query }) => { + // retrieve relevant documents + const docs = await ai.retrieve({ + retriever: menuRetriever, + query, + options: { k: 3 }, + }); + + // generate a response + const { text } = await ai.generate({ + model: googleAI.model('gemini-2.5-flash'), + prompt: ` + You are acting as a helpful AI assistant that can answer + questions about the food available on the menu at Genkit Grub Pub. + + Use only the context provided to answer the question. + If you don't know, do not make up an answer. + Do not add or change items on the menu. + + Question: ${query}`, + docs, + }); + + return { answer: text }; + }, + ); + ``` + + #### Run the retriever flow + + ```bash + genkit flow:run menuQA '{"query": "Recommend a dessert from the menu while avoiding dairy and nuts"}' + ``` + + + ```go + model := googlegenai.Model(g, "gemini-2.5-flash") + + _, menuPdfRetriever, err := localvec.DefineRetriever( + g, "menuQA", localvec.Config{Embedder: googlegenai.Embedder(g, "text-embedding-004")}, + ) + if err != nil { + log.Fatal(err) + } + + genkit.DefineFlow( + g, "menuQA", + func(ctx context.Context, question string) (string, error) { + // Retrieve text relevant to the user's question. + resp, err := ai.Retrieve(ctx, menuPdfRetriever, ai.WithTextDocs(question)) + if err != nil { + return "", err + } + + // Call Generate, including the menu information in your prompt. + return genkit.GenerateText(ctx, g, + ai.WithModelName("googleai/gemini-2.5-flash"), + ai.WithDocs(resp.Documents), + ai.WithSystem(` + You are acting as a helpful AI assistant that can answer questions about the + food available on the menu at Genkit Grub Pub. + Use only the context provided to answer the question. If you don't know, do not + make up an answer. Do not add or change items on the menu.`), + ai.WithPrompt(question)) + }) + ``` + + #### Run the retriever flow + + ```bash + genkit flow:run menuQA '"Recommend a dessert from the menu while avoiding dairy and nuts"' + ``` + + + ```python + @ai.flow() + async def qa_flow(query: str): + docs = await ai.retrieve( + query=Document.from_text(query), + retriever='firestore/my_firestore_retriever' + ) + + response = await ai.generate( + prompt=f""" + You are acting as a helpful AI assistant that can answer + questions about the food available on the menu at Genkit Grub Pub. + + Use only the context provided to answer the question. + If you don't know, do not make up an answer. + Do not add or change items on the menu. + + Question: {query}""", + docs=docs + ) + return response.text + ``` + + #### Run the retriever flow + + ```python + result = await qa_flow('Recommend a dessert from the menu while avoiding dairy and nuts') + print(result) + ``` + + + +The output for this command should contain a response from the model, grounded +in the indexed menu file. + +## Write your own retrievers + +It's also possible to create your own retriever. This is useful if your +documents are managed in a document store that is not supported in Genkit (eg: +MySQL, Google Drive, etc.). The Genkit SDK provides flexible methods that let +you provide custom code for fetching documents. You can also define custom +retrievers that build on top of existing retrievers in Genkit and apply advanced +RAG techniques (such as reranking or prompt extensions) on top. + +### Simple Retrievers + + + + Simple retrievers let you easily convert existing code into retrievers: + + ```ts + import { z } from 'genkit'; + import { searchEmails } from './db'; + + ai.defineSimpleRetriever( + { + name: 'myDatabase', + configSchema: z + .object({ + limit: z.number().optional(), + }) + .optional(), + // we'll extract "message" from the returned email item + content: 'message', + // and several keys to use as metadata + metadata: ['from', 'to', 'subject'], + }, + async (query, config) => { + const result = await searchEmails(query.text, { limit: config.limit }); + return result.data.emails; + }, + ); + ``` + + + ```go + // Simple retriever example in Go + // Implementation depends on your specific use case and data source + ``` + + + ```python + from genkit.types import ( + RetrieverRequest, + RetrieverResponse, + Document, + ActionRunContext + ) + + async def my_retriever(request: RetrieverRequest, ctx: ActionRunContext): + """Example of a simple retriever. + + Args: + request: The request to the retriever. + ctx: The context of the retriever. + """ + # Your custom retrieval logic here + return RetrieverResponse(documents=[ + Document.from_text('Hello'), + Document.from_text('World') + ]) + + ai.define_retriever(name='my_retriever', fn=my_retriever) + ``` + + + +### Custom Retrievers + + + + ```ts + import { CommonRetrieverOptionsSchema } from 'genkit/retriever'; + import { z } from 'genkit'; + + export const menuRetriever = devLocalRetrieverRef('menuQA'); + + const advancedMenuRetrieverOptionsSchema = CommonRetrieverOptionsSchema.extend({ + preRerankK: z.number().max(1000), + }); + + const advancedMenuRetriever = ai.defineRetriever( + { + name: `custom/advancedMenuRetriever`, + configSchema: advancedMenuRetrieverOptionsSchema, + }, + async (input, options) => { + const extendedPrompt = await extendPrompt(input); + const docs = await ai.retrieve({ + retriever: menuRetriever, + query: extendedPrompt, + options: { k: options.preRerankK || 10 }, + }); + const rerankedDocs = await rerank(docs); + return rerankedDocs.slice(0, options.k || 3); + }, + ); + ``` + + (`extendPrompt` and `rerank` is something you would have to implement yourself, + not provided by the framework) + + And then you can just swap out your retriever: + + ```ts + const docs = await ai.retrieve({ + retriever: advancedRetriever, + query: input, + options: { preRerankK: 7, k: 3 }, + }); + ``` + + + For example, suppose you have a custom re-ranking function you want to use. The + following example defines a custom retriever that applies your function to the + menu retriever defined earlier: + + ```go + type CustomMenuRetrieverOptions struct { + K int + PreRerankK int + } + + advancedMenuRetriever := genkit.DefineRetriever( + g, "custom", "advancedMenuRetriever", + func(ctx context.Context, req *ai.RetrieverRequest) (*ai.RetrieverResponse, error) { + // Handle options passed using our custom type. + opts, _ := req.Options.(CustomMenuRetrieverOptions) + // Set fields to default values when either the field was undefined + // or when req.Options is not a CustomMenuRetrieverOptions. + if opts.K == 0 { + opts.K = 3 + } + if opts.PreRerankK == 0 { + opts.PreRerankK = 10 + } + + // Call the retriever as in the simple case. + resp, err := ai.Retrieve(ctx, menuPDFRetriever, + ai.WithDocs(req.Query), + ai.WithConfig(localvec.RetrieverOptions{K: opts.PreRerankK}), + ) + if err != nil { + return nil, err + } + + // Re-rank the returned documents using your custom function. + rerankedDocs := rerank(resp.Documents) + resp.Documents = rerankedDocs[:opts.K] + + return resp, nil + }, + ) + ``` + + + ```python + async def advanced_retriever(request: RetrieverRequest, ctx: ActionRunContext): + """Example of an advanced retriever with custom logic.""" + + # First, get initial results from base retriever + initial_docs = await ai.retrieve( + query=request.query, + retriever='my_base_retriever' + ) + + # Apply custom reranking or filtering logic + reranked_docs = custom_rerank_function(initial_docs, request.query) + + # Return top K results + k = getattr(request.options, 'k', 3) + return RetrieverResponse(documents=reranked_docs[:k]) + + ai.define_retriever(name='advanced_retriever', fn=advanced_retriever) + ``` + + Then you can use your custom retriever: + + ```python + docs = await ai.retrieve( + query=Document.from_text(query), + retriever='advanced_retriever' + ) + ``` + + + +## Rerankers and Two-Stage Retrieval + + + + A reranking model — also known as a cross-encoder — is a type of model that, + given a query and document, will output a similarity score. We use this score to + reorder the documents by relevance to our query. Reranker APIs take a list of + documents (for example the output of a retriever) and reorders the documents + based on their relevance to the query. This step can be useful for fine-tuning + the results and ensuring that the most pertinent information is used in the + prompt provided to a generative model. + + #### Reranker Example + + A reranker in Genkit is defined in a similar syntax to retrievers and indexers. + Here is an example using a reranker in Genkit. This flow reranks a set of + documents based on their relevance to the provided query using a predefined + Vertex AI reranker. + + ```ts + const FAKE_DOCUMENT_CONTENT = [ + 'pythagorean theorem', + 'e=mc^2', + 'pi', + 'dinosaurs', + 'quantum mechanics', + 'pizza', + 'harry potter', + ]; + + export const rerankFlow = ai.defineFlow( + { + name: 'rerankFlow', + inputSchema: z.object({ query: z.string() }), + outputSchema: z.array( + z.object({ + text: z.string(), + score: z.number(), + }), + ), + }, + async ({ query }) => { + const documents = FAKE_DOCUMENT_CONTENT.map((text) => ({ content: text })); + + const rerankedDocuments = await ai.rerank({ + reranker: 'vertexai/semantic-ranker-512', + query: { content: query }, + documents, + }); + + return rerankedDocuments.map((doc) => ({ + text: doc.content, + score: doc.metadata.score, + })); + }, + ); + ``` + + This reranker uses the Vertex AI genkit plugin with `semantic-ranker-512` to + score and rank documents. The higher the score, the more relevant the document + is to the query. + + #### Custom Rerankers + + You can also define custom rerankers to suit your specific use case. This is + helpful when you need to rerank documents using your own custom logic or a + custom model. Here's a simple example of defining a custom reranker: + + ```ts + export const customReranker = ai.defineReranker( + { + name: 'custom/reranker', + configSchema: z.object({ + k: z.number().optional(), + }), + }, + async (query, documents, options) => { + // Your custom reranking logic here + const rerankedDocs = documents.map((doc) => { + const score = Math.random(); // Assign random scores for demonstration + return { + ...doc, + metadata: { ...doc.metadata, score }, + }; + }); + + return rerankedDocs.sort((a, b) => b.metadata.score - a.metadata.score).slice(0, options.k || 3); + }, + ); + ``` + + Once defined, this custom reranker can be used just like any other reranker in + your RAG flows, giving you flexibility to implement advanced reranking + strategies. + + + Reranking functionality in Go can be implemented as part of custom retrievers. + You can apply reranking logic within your custom retriever implementations. + + + Reranking functionality in Python can be implemented as part of custom retrievers. + You can apply reranking logic within your custom retriever implementations. + + + +## Next steps + +- Learn about [tool calling](/unified-docs/tool-calling) to give your RAG system access to external APIs and functions +- Explore [multi-agent systems](/docs/multi-agent) for coordinating multiple AI agents with RAG capabilities +- Check out the vector database plugins for production-ready RAG implementations +- See the [evaluation guide](/docs/evaluation) for testing and improving your RAG system's performance diff --git a/src/content/docs/unified-docs/tool-calling.mdx b/src/content/docs/unified-docs/tool-calling.mdx new file mode 100644 index 00000000..09cdb866 --- /dev/null +++ b/src/content/docs/unified-docs/tool-calling.mdx @@ -0,0 +1,794 @@ +--- +title: Tool calling +description: Learn how to enable LLMs to interact with external applications and data using Genkit's tool calling feature across JavaScript, Go, and Python, covering tool definition, usage, and advanced scenarios. +--- + +import ExampleLink from '@/components/ExampleLink.astro'; +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +_Tool calling_, also known as _function calling_, is a structured way to give +LLMs the ability to make requests back to the application that called it. You +define the tools you want to make available to the model, and the model will +make tool requests to your app as necessary to fulfill the prompts you give it. + +The use cases of tool calling generally fall into a few themes: + +**Giving an LLM access to information it wasn't trained with** + +- Frequently changing information, such as a stock price or the current + weather. +- Information specific to your app domain, such as product information or user + profiles. + +Note the overlap with [retrieval augmented generation](/unified-docs/rag) (RAG), which is also +a way to let an LLM integrate factual information into its generations. RAG is a +heavier solution that is most suited when you have a large amount of information +or the information that's most relevant to a prompt is ambiguous. On the other +hand, if retrieving the information the LLM needs is a simple function call or +database lookup, tool calling is more appropriate. + +**Introducing a degree of determinism into an LLM workflow** + +- Performing calculations that the LLM cannot reliably complete itself. +- Forcing an LLM to generate verbatim text under certain circumstances, such + as when responding to a question about an app's terms of service. + +**Performing an action when initiated by an LLM** + +- Turning on and off lights in an LLM-powered home assistant +- Reserving table reservations in an LLM-powered restaurant agent + +## Before you begin + +If you want to run the code examples on this page, first complete the steps in +the Getting started guide for your language. All of the examples assume that you +have already set up a project with Genkit dependencies installed. + + + + Complete the [Getting started](/docs/get-started) guide. + + + Complete the [Get started](/go/docs/get-started-go) guide. + + + Complete the [Get started](/python/docs/get-started) guide. + + + +This page discusses one of the advanced features of Genkit model abstraction, so +before you dive too deeply, you should be familiar with the content on the +[Generating content with AI models](/unified-docs/generating-content) page. You should also be familiar +with Genkit's system for defining input and output schemas, which is discussed +on the [Defining AI workflows](/unified-docs/creating-flows) page. + +## Overview of tool calling + + + +At a high level, this is what a typical tool-calling interaction with an LLM +looks like: + +1. The calling application prompts the LLM with a request and also includes in + the prompt a list of tools the LLM can use to generate a response. +2. The LLM either generates a complete response or generates a tool call request + in a specific format. +3. If the caller receives a complete response, the request is fulfilled and the + interaction ends; but if the caller receives a tool call, it performs + whatever logic is appropriate and sends a new request to the LLM containing + the original prompt or some variation of it as well as the result of the tool + call. +4. The LLM handles the new prompt as in Step 2. + +For this to work, several requirements must be met: + +- The model must be trained to make tool requests when it's needed to complete + a prompt. Most of the larger models provided through web APIs, such as + Gemini and Claude, can do this, but smaller and more specialized models + often cannot. Genkit will throw an error if you try to provide tools to a + model that doesn't support it. +- The calling application must provide tool definitions to the model in the + format it expects. +- The calling application must prompt the model to generate tool calling + requests in the format the application expects. + +## Tool calling with Genkit + +Genkit provides a single interface for tool calling with models that support it. +Each model plugin ensures that the last two of the above criteria are met, and +the Genkit instance's `generate()` function automatically carries out the tool +calling loop described earlier. + +### Model support + +Tool calling support depends on the model, the model API, and the Genkit plugin. +Consult the relevant documentation to determine if tool calling is likely to be +supported. In addition: + +- Genkit will throw an error if you try to provide tools to a model that + doesn't support it. +- If the plugin exports model references, the model info will indicate if it supports tool calling. + + + + Check the `info.supports.tools` property on model references: + + ```ts + import { googleAI } from '@genkit-ai/googleai'; + + const model = googleAI.model('gemini-2.5-flash'); + console.log(model.info.supports.tools); // true/false + ``` + + + Check the `ModelInfo.Supports.Tools` property: + + ```go + // Model support information is available through the plugin + // Check plugin documentation for specific model capabilities + ``` + + + Check the `info.supports.tools` property: + + ```python + # Model support information is available through the plugin + # Check plugin documentation for specific model capabilities + ``` + + + +### Defining tools + +Use the appropriate method for your language to define tools: + + + + Use the Genkit instance's `defineTool()` function: + + ```ts + import { genkit, z } from 'genkit'; + import { googleAI } from '@genkit-ai/googleai'; + + const ai = genkit({ + plugins: [googleAI()], + model: googleAI.model('gemini-2.5-flash'), + }); + + const getWeather = ai.defineTool( + { + name: 'getWeather', + description: 'Gets the current weather in a given location', + inputSchema: z.object({ + location: z.string().describe('The location to get the current weather for'), + }), + outputSchema: z.string(), + }, + async (input) => { + // Here, we would typically make an API call or database query. For this + // example, we just return a fixed value. + return `The current weather in ${input.location} is 63°F and sunny.`; + }, + ); + ``` + + The syntax here looks just like the `defineFlow()` syntax; however, `name`, + `description`, and `inputSchema` parameters are required. When writing a tool + definition, take special care with the wording and descriptiveness of these + parameters. They are vital for the LLM to make effective use of the + available tools. + + + Use the `genkit.DefineTool()` function: + + ```go + package main + + import ( + "context" + "fmt" + "log" + + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/googlegenai" + ) + + // Define the input structure for the tool + type WeatherInput struct { + Location string `json:"location" jsonschema_description:"Location to get weather for"` + } + + func main() { + ctx := context.Background() + + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlegenai.GoogleAI{}), + genkit.WithDefaultModel("googleai/gemini-2.5-flash"), + ) + if err != nil { + log.Fatalf("Genkit initialization failed: %v", err) + } + + getWeatherTool := genkit.DefineTool( + g, "getWeather", "Gets the current weather in a given location", + func(ctx context.Context, input WeatherInput) (string, error) { + // Here, we would typically make an API call or database query. For this + // example, we just return a fixed value. + log.Printf("Tool 'getWeather' called for location: %s", input.Location) + return fmt.Sprintf("The current weather in %s is 63°F and sunny.", input.Location), nil + }) + } + ``` + + The syntax here looks just like the `genkit.DefineFlow()` syntax; however, you + must write a description. Take special care with the wording and descriptiveness + of the description as it is vital for the LLM to decide to use it appropriately. + + + Use the Genkit instance's `tool()` decorator: + + ```python + from pydantic import BaseModel, Field + from genkit.ai import Genkit + from genkit.plugins.google_genai import GoogleGenai + + ai = Genkit( + plugins=[GoogleGenai()], + model='googleai/gemini-2.5-flash', + ) + + class WeatherInput(BaseModel): + location: str = Field(description='The location to get the current weather for') + + @ai.tool() + def get_weather(input: WeatherInput) -> str: + """Gets the current weather in a given location""" + # Replace with actual weather fetching logic + return f'The current weather in {input.location} is 63°F and sunny.' + ``` + + The syntax here looks just like the `flow()` syntax; however `description` + parameter is required. When writing a tool definition, take special care + with the wording and descriptiveness of these parameters. They are vital + for the LLM to make effective use of the available tools. + + + +### Using tools + +Include defined tools in your prompts to generate content: + + + + **Using `generate()`:** + + ```ts + const response = await ai.generate({ + prompt: "What is the weather in Baltimore?", + tools: [getWeather], + }); + ``` + + **Using `definePrompt()`:** + + ```ts + const weatherPrompt = ai.definePrompt( + { + name: "weatherPrompt", + tools: [getWeather], + }, + "What is the weather in {{location}}?" + ); + + const response = await weatherPrompt({ location: "Baltimore" }); + ``` + + **Using Prompt files:** + + ```dotprompt + --- + tools: [getWeather] + input: + schema: + location: string + --- + + What is the weather in {{location}}? + ``` + + Then you can execute the prompt in your code as follows: + + ```ts + // assuming prompt file is named weatherPrompt.prompt + const weatherPrompt = ai.prompt("weatherPrompt"); + + const response = await weatherPrompt({ location: "Baltimore" }); + ``` + + **Using Chat:** + + ```ts + const chat = ai.chat({ + system: "Answer questions using the tools you have.", + tools: [getWeather], + }); + + const response = await chat.send("What is the weather in Baltimore?"); + + // Or, specify tools that are message-specific + const response = await chat.send({ + prompt: "What is the weather in Baltimore?", + tools: [getWeather], + }); + ``` + + + **Using `genkit.Generate()`:** + + ```go + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("What is the weather in San Francisco?"), + ai.WithTools(getWeatherTool), + ) + ``` + + **Using `genkit.DefinePrompt()`:** + + ```go + weatherPrompt, err := genkit.DefinePrompt(g, "weatherPrompt", + ai.WithPrompt("What is the weather in {{location}}?"), + ai.WithTools(getWeatherTool), + ) + if err != nil { + log.Fatal(err) + } + + resp, err := weatherPrompt.Execute(ctx, + ai.WithInput(map[string]any{"location": "San Francisco"}), + ) + ``` + + **Using a `.prompt` file:** + + Create a file named `prompts/weatherPrompt.prompt`: + + ```dotprompt + --- + system: "Answer questions using the tools you have." + tools: [getWeather] + input: + schema: + location: string + --- + + What is the weather in {{location}}? + ``` + + Then execute it in your Go code: + + ```go + // Assuming prompt file named weatherPrompt.prompt exists in ./prompts dir. + weatherPrompt := genkit.LookupPrompt("weatherPrompt") + if weatherPrompt == nil { + log.Fatal("no prompt named 'weatherPrompt' found") + } + + resp, err := weatherPrompt.Execute(ctx, + ai.WithInput(map[string]any{"location": "San Francisco"}), + ) + ``` + + + **Using `generate()`:** + + ```python + result = await ai.generate( + prompt='What is the weather in Baltimore?', + tools=['get_weather'], + ) + ``` + + **Using flows with tools:** + + ```python + @ai.flow() + async def weather_flow(location: str): + result = await ai.generate( + prompt=f'What is the weather in {location}?', + tools=['get_weather'], + ) + return result.text + ``` + + + +Genkit will automatically handle the tool call if the LLM needs to use the tool to answer the prompt. + +### Streaming and Tool Calling + +When combining tool calling with streaming responses, you will receive `toolRequest` and `toolResponse` content parts in the chunks of the stream: + + + + ```ts + const { stream } = ai.generateStream({ + prompt: "What is the weather in Baltimore?", + tools: [getWeather], + }); + + for await (const chunk of stream) { + console.log(chunk); + } + ``` + + This might produce a sequence of chunks similar to: + + ```ts + {index: 0, role: "model", content: [{text: "Okay, I'll check the weather"}]} + {index: 0, role: "model", content: [{text: "for Baltimore."}]} + // toolRequests will be emitted as a single chunk by most models + {index: 0, role: "model", content: [{toolRequest: {name: "getWeather", input: {location: "Baltimore"}}}]} + // when streaming multiple messages, Genkit increments the index and indicates the new role + {index: 1, role: "tool", content: [{toolResponse: {name: "getWeather", output: "Temperature: 68 degrees\nStatus: Cloudy."}}]} + {index: 2, role: "model", content: [{text: "The weather in Baltimore is 68 degrees and cloudy."}]} + ``` + + You can use these chunks to dynamically construct the full generated message sequence. + + + ```go + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("What is the weather in San Francisco?"), + ai.WithTools(getWeatherTool), + ai.WithStreaming(func(ctx context.Context, chunk *ai.ModelResponseChunk) error { + // Handle streaming chunks here + log.Println("Chunk:", chunk.Text()) + return nil + }), + ) + ``` + + + ```python + stream, response = ai.generate_stream( + prompt='What is the weather in Baltimore?', + tools=['get_weather'], + ) + + async for chunk in stream: + print(chunk) + ``` + + + +### Limiting Tool Call Iterations with `maxTurns` + +When working with tools that might trigger multiple sequential calls, you can control resource usage and prevent runaway execution using the `maxTurns` parameter. This sets a hard limit on how many back-and-forth interactions the model can have with your tools in a single generation cycle. + +**Why use maxTurns?** +- **Cost Control**: Prevents unexpected API usage charges from excessive tool calls +- **Performance**: Ensures responses complete within reasonable timeframes +- **Safety**: Guards against infinite loops in complex tool interactions +- **Predictability**: Makes your application behavior more deterministic + +The default value is 5 turns, which works well for most scenarios. Each "turn" represents one complete cycle where the model can make tool calls and receive responses. + + + + **Example: Web Research Agent** + + Consider a research agent that might need to search multiple times to find comprehensive information: + + ```ts + const webSearch = ai.defineTool( + { + name: 'webSearch', + description: 'Search the web for current information', + inputSchema: z.object({ + query: z.string().describe('Search query'), + }), + outputSchema: z.string(), + }, + async (input) => { + // Simulate web search API call + return `Search results for "${input.query}": [relevant information here]`; + }, + ); + + const response = await ai.generate({ + prompt: 'Research the latest developments in quantum computing, including recent breakthroughs, key companies, and future applications.', + tools: [webSearch], + maxTurns: 8, // Allow up to 8 research iterations + }); + ``` + + **Example: Financial Calculator** + + ```ts + const calculator = ai.defineTool( + { + name: 'calculator', + description: 'Perform mathematical calculations', + inputSchema: z.object({ + expression: z.string().describe('Mathematical expression to evaluate'), + }), + outputSchema: z.number(), + }, + async (input) => { + // Safe evaluation of mathematical expressions + return eval(input.expression); // In production, use a safe math parser + }, + ); + + const response = await ai.generate({ + prompt: 'Calculate the total value of my portfolio: 100 shares of AAPL, 50 shares of GOOGL, and 200 shares of MSFT. Also calculate what percentage each holding represents.', + tools: [calculator, stockAnalyzer], + maxTurns: 12, // Multiple stock lookups + calculations needed + }); + ``` + + + ```go + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("Research the latest developments in quantum computing"), + ai.WithTools(webSearchTool), + ai.WithMaxTurns(8), // Allow up to 8 research iterations + ) + ``` + + + ```python + result = await ai.generate( + prompt='Research the latest developments in quantum computing', + tools=['web_search'], + max_turns=8, # Allow up to 8 research iterations + ) + ``` + + + +**What happens when maxTurns is reached?** + +When the limit is hit, Genkit stops the tool-calling loop and returns the model's current response, even if it was in the middle of using tools. The model will typically provide a partial answer or explain that it couldn't complete all the requested operations. + +### Dynamically defining tools at runtime + + + + As most things in Genkit tools need to be predefined during your app's + initialization. This is necessary so that you would be able interact with your + tools from the Genkit Dev UI. This is typically the recommended way. However + there are scenarios when the tool must be defined dynamically per user request. + + You can dynamically define tools using `ai.dynamicTool` function. It is very + similar to `ai.defineTool` method, however dynamic tools are not tracked by + Genkit runtime, so cannot be interacted with from Genkit Dev UI and must be + passed to the `ai.generate` call by reference (for regular tools you can also + use a string tool name). + + ```ts + import { genkit, z } from 'genkit'; + import { googleAI } from '@genkit-ai/googleai'; + + const ai = genkit({ + plugins: [googleAI()], + model: googleAI.model('gemini-2.5-flash'), + }); + + ai.defineFlow('weatherFlow', async () => { + const getWeather = ai.dynamicTool( + { + name: 'getWeather', + description: 'Gets the current weather in a given location', + inputSchema: z.object({ + location: z.string().describe('The location to get the current weather for'), + }), + outputSchema: z.string(), + }, + async (input) => { + return `The current weather in ${input.location} is 63°F and sunny.`; + }, + ); + + const { text } = await ai.generate({ + prompt: 'What is the weather in Baltimore?', + tools: [getWeather], + }); + + return text; + }); + ``` + + When defining dynamic tools, to specify input and output schemas you can either + use Zod as shown in the previous example, or you can pass in manually + constructed JSON Schema. + + ```ts + const getWeather = ai.dynamicTool( + { + name: 'getWeather', + description: 'Gets the current weather in a given location', + inputJsonSchema: myInputJsonSchema, + outputJsonSchema: myOutputJsonSchema, + }, + async (input) => { + /* ... */ + }, + ); + ``` + + Dynamic tools don't require the implementation function. If you don't pass in + the function the tool will behave like an [interrupt](/docs/interrupts) and you can + do manual tool call handling: + + ```ts + const getWeather = ai.dynamicTool({ + name: 'getWeather', + description: 'Gets the current weather in a given location', + inputJsonSchema: myInputJsonSchema, + outputJsonSchema: myOutputJsonSchema, + }); + ``` + + + ```go + // Dynamic tool definition in Go + // Check Go documentation for specific implementation details + ``` + + + ```python + # Dynamic tool definition in Python + # Check Python documentation for specific implementation details + ``` + + + +### Pause the tool loop by using interrupts + +By default, Genkit repeatedly calls the LLM until every tool call has been +resolved. You can conditionally pause execution in situations where you want +to, for example: + +- Ask the user a question or display UI. +- Confirm a potentially risky action with the user. +- Request out-of-band approval for an action. + +**Interrupts** are special tools that can halt the loop and return control +to your code so that you can handle more advanced scenarios. Visit the +interrupts guide to learn how to use them. + +### Explicitly handling tool calls + +If you want full control over this tool-calling loop, for example to +apply more complicated logic, you can handle tool calls explicitly: + + + + Set the `returnToolRequests` parameter to `true`. Now it's your responsibility to ensure all of the tool requests are fulfilled: + + ```ts + const getWeather = ai.defineTool( + { + // ... tool definition ... + }, + async ({ location }) => { + // ... tool implementation ... + }, + ); + + const generateOptions: GenerateOptions = { + prompt: "What's the weather like in Baltimore?", + tools: [getWeather], + returnToolRequests: true, + }; + + let llmResponse; + while (true) { + llmResponse = await ai.generate(generateOptions); + const toolRequests = llmResponse.toolRequests; + if (toolRequests.length < 1) { + break; + } + const toolResponses: ToolResponsePart[] = await Promise.all( + toolRequests.map(async (part) => { + switch (part.toolRequest.name) { + case 'getWeather': + return { + toolResponse: { + name: part.toolRequest.name, + ref: part.toolRequest.ref, + output: await getWeather(part.toolRequest.input), + }, + }; + default: + throw Error('Tool not found'); + } + }), + ); + generateOptions.messages = llmResponse.messages; + generateOptions.prompt = toolResponses; + } + ``` + + + Set the `WithReturnToolRequests()` option to `true`. Now it's your responsibility to ensure all of the tool requests are fulfilled: + + ```go + getWeatherTool := genkit.DefineTool( + g, "getWeather", "Gets the current weather in a given location", + func(ctx context.Context, location WeatherInput) (string, error) { + // Tool implementation... + return "sunny", nil + }, + ) + + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("What is the weather in San Francisco?"), + ai.WithTools(getWeatherTool), + ai.WithReturnToolRequests(true), + ) + if err != nil { + log.Fatal(err) + } + + parts := []*ai.Part{} + for _, req := range resp.ToolRequests() { + tool := genkit.LookupTool(g, req.Name) + if tool == nil { + log.Fatalf("tool %q not found", req.Name) + } + + output, err := tool.RunRaw(ctx, req.Input) + if err != nil { + log.Fatalf("tool %q execution failed: %v", tool.Name(), err) + } + + parts = append(parts, + ai.NewToolResponsePart(&ai.ToolResponse{ + Name: req.Name, + Ref: req.Ref, + Output: output, + })) + } + + resp, err = genkit.Generate(ctx, g, + ai.WithMessages(append(resp.History(), ai.NewMessage(ai.RoleTool, nil, parts...))...), + ) + if err != nil { + log.Fatal(err) + } + ``` + + + Set the `return_tool_requests` parameter to `True`. Now it's your responsibility to ensure all of the tool requests are fulfilled: + + ```python + llm_response = await ai.generate( + prompt='What is the weather in Baltimore?', + tools=['get_weather'], + return_tool_requests=True, + ) + + tool_request_parts = llm_response.tool_requests + + if len(tool_request_parts) == 0: + print(llm_response.text) + else: + for part in tool_request_parts: + await handle_tool(part.name, part.input) + ``` + + + +## Next steps + +- Learn about [interrupts](/docs/interrupts) to pause tool execution for user interaction +- Explore [retrieval-augmented generation (RAG)](/unified-docs/rag) for handling large amounts of contextual information +- See [multi-agent systems](/docs/multi-agent) for coordinating multiple AI agents with tools +- Check out the [tool calling example](https://github.com/firebase/genkit/tree/main/js/testapps/tool-calling) for a complete implementation diff --git a/src/scripts/language-preference.js b/src/scripts/language-preference.js new file mode 100644 index 00000000..d432e2be --- /dev/null +++ b/src/scripts/language-preference.js @@ -0,0 +1,157 @@ +/** + * Language Preference Enhancement for Astro Starlight + * + * This script enhances Starlight's built-in tab synchronization with: + * 1. Persistent storage of language preference in localStorage + * 2. Cross-page restoration of language preference + * 3. Automatic detection and synchronization of language tabs + */ + +class LanguagePreferenceEnhancer { + constructor() { + this.storageKey = 'genkit-preferred-language'; + this.languages = ['JavaScript', 'Go', 'Python']; + this.defaultLanguage = 'JavaScript'; + + // Wait for DOM to be ready + if (document.readyState === 'loading') { + document.addEventListener('DOMContentLoaded', () => this.init()); + } else { + this.init(); + } + } + + init() { + // Get stored preference or use default + const storedLanguage = localStorage.getItem(this.storageKey) || this.defaultLanguage; + + // Set up event listeners for tab clicks + this.setupTabListeners(); + + // Apply stored preference to all language tabs + this.restoreLanguagePreference(storedLanguage); + + // Watch for dynamically added content (e.g., navigation) + this.observeContentChanges(); + } + + setupTabListeners() { + // Listen for clicks on all tab buttons + document.addEventListener('click', (event) => { + const tabButton = event.target.closest('[role="tab"]'); + if (!tabButton) return; + + // Check if this is a language tab by looking at the text content + const tabText = tabButton.textContent.trim(); + if (this.languages.includes(tabText)) { + // Store the preference when a language tab is clicked + this.storeLanguagePreference(tabText); + } + }); + } + + storeLanguagePreference(language) { + if (!this.languages.includes(language)) { + console.warn(`Unknown language: ${language}`); + return; + } + + // Store preference + localStorage.setItem(this.storageKey, language); + } + + restoreLanguagePreference(language) { + if (!this.languages.includes(language)) { + console.warn(`Unknown language: ${language}, using default`); + language = this.defaultLanguage; + } + + // Find all language tabs and activate the preferred one + this.activateLanguageTabs(language); + } + + activateLanguageTabs(language) { + // Find all tab groups with syncKey="language" + const languageTabGroups = document.querySelectorAll('[role="tablist"]'); + + languageTabGroups.forEach(tabList => { + // Check if this tablist contains language tabs + const tabs = tabList.querySelectorAll('[role="tab"]'); + let hasLanguageTabs = false; + let targetTab = null; + + // Look for language tabs within this tablist + tabs.forEach(tab => { + const tabText = tab.textContent.trim(); + if (this.languages.includes(tabText)) { + hasLanguageTabs = true; + if (tabText === language) { + targetTab = tab; + } + } + }); + + // If this tablist has language tabs and we found our target, activate it + if (hasLanguageTabs && targetTab && !this.isTabActive(targetTab)) { + this.activateTab(targetTab); + } + }); + } + + isTabActive(tab) { + return tab.getAttribute('aria-selected') === 'true'; + } + + activateTab(tab) { + // Trigger a click event to let Starlight handle the tab activation + // This ensures we work with Starlight's existing tab system + tab.click(); + } + + observeContentChanges() { + // Watch for new content being added (e.g., navigation between pages) + const observer = new MutationObserver((mutations) => { + let hasNewTabs = false; + + mutations.forEach(mutation => { + mutation.addedNodes.forEach(node => { + if (node.nodeType === Node.ELEMENT_NODE) { + // Check if new tabs were added + if (node.querySelector && node.querySelector('[role="tablist"]')) { + hasNewTabs = true; + } + } + }); + }); + + if (hasNewTabs) { + // Apply current preference to new tabs after a short delay + // to ensure Starlight has finished initializing them + const currentLanguage = localStorage.getItem(this.storageKey) || this.defaultLanguage; + setTimeout(() => this.restoreLanguagePreference(currentLanguage), 100); + } + }); + + observer.observe(document.body, { + childList: true, + subtree: true + }); + } + + // Public method to get current preference + getCurrentLanguage() { + return localStorage.getItem(this.storageKey) || this.defaultLanguage; + } + + // Public method to manually set language + setLanguage(language) { + this.storeLanguagePreference(language); + this.restoreLanguagePreference(language); + } +} + +// Initialize the language preference enhancer +const languagePreferenceEnhancer = new LanguagePreferenceEnhancer(); + +// Make it globally available for debugging +window.languagePreferenceEnhancer = languagePreferenceEnhancer; diff --git a/src/sidebar.ts b/src/sidebar.ts index afb57a26..0019ee34 100644 --- a/src/sidebar.ts +++ b/src/sidebar.ts @@ -301,12 +301,26 @@ const PYTHON_SIDEBAR = [ }, ]; +const UNIFIED_SIDEBAR = [ + { label: "Generating content", slug: "unified-docs/generating-content" }, + { label: "Creating flows", slug: "unified-docs/creating-flows" }, + { label: "Tool calling", slug: "unified-docs/tool-calling" }, + { label: "Retrieval-augmented generation (RAG)", slug: "unified-docs/rag" }, + { label: "Managing prompts with Dotprompt", slug: "unified-docs/dotprompt" }, + { label: "Evaluation", slug: "unified-docs/evaluation" }, +]; + export const sidebar = [ { label: "Introduction", slug: "" }, + { + label: "Unified Docs (Preview)", + items: UNIFIED_SIDEBAR, + collapsed: false, + }, { label: "Genkit JS", items: JS_SIDEBAR, - collapsed: false, + collapsed: true, }, { label: "Genkit Go", From e50d51c88fabd46679ae7ae23d946570bcc851dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cchrisraygill=E2=80=9D?= Date: Thu, 7 Aug 2025 19:23:45 -0400 Subject: [PATCH 3/9] Add comprehensive unified documentation structure Create complete documentation hierarchy under unified-docs/ including: - Core concepts (chat sessions, context, error handling, interrupts) - Getting started and deployment guides - Framework integrations (Express, Next.js) - Plugin documentation for AI providers (OpenAI, Anthropic, Google AI, etc.) - Vector database integrations (Pinecone, ChromaDB, Neo4j, etc.) - Developer tools, observability, and multi-agent systems - Plugin authoring guides and MCP server documentation Update sidebar configuration to support new documentation structure. --- .../docs/unified-docs/chat-sessions.mdx | 844 ++++++++++ src/content/docs/unified-docs/context.mdx | 471 ++++++ src/content/docs/unified-docs/deployment.mdx | 1384 +++++++++++++++++ .../docs/unified-docs/developer-tools.mdx | 309 ++++ .../docs/unified-docs/error-handling.mdx | 646 ++++++++ .../docs/unified-docs/frameworks/express.mdx | 970 ++++++++++++ .../docs/unified-docs/frameworks/nextjs.mdx | 1143 ++++++++++++++ src/content/docs/unified-docs/get-started.mdx | 537 +++++++ src/content/docs/unified-docs/interrupts.mdx | 550 +++++++ src/content/docs/unified-docs/mcp-server.mdx | 260 ++++ .../docs/unified-docs/multi-agent-systems.mdx | 698 +++++++++ .../unified-docs/observability-monitoring.mdx | 529 +++++++ .../unified-docs/plugin-authoring/models.mdx | 1120 +++++++++++++ .../plugin-authoring/overview.mdx | 467 ++++++ .../docs/unified-docs/plugins/anthropic.mdx | 817 ++++++++++ .../docs/unified-docs/plugins/deepseek.mdx | 1050 +++++++++++++ .../docs/unified-docs/plugins/google-ai.mdx | 589 +++++++ src/content/docs/unified-docs/plugins/mcp.mdx | 1048 +++++++++++++ .../docs/unified-docs/plugins/ollama.mdx | 574 +++++++ .../docs/unified-docs/plugins/openai.mdx | 652 ++++++++ .../docs/unified-docs/plugins/vertex-ai.mdx | 799 ++++++++++ src/content/docs/unified-docs/plugins/xai.mdx | 895 +++++++++++ .../vector-databases/astra-db.mdx | 726 +++++++++ .../vector-databases/chromadb.mdx | 571 +++++++ .../vector-databases/cloud-sql-postgresql.mdx | 913 +++++++++++ .../unified-docs/vector-databases/lancedb.mdx | 913 +++++++++++ .../unified-docs/vector-databases/neo4j.mdx | 726 +++++++++ .../vector-databases/pgvector.mdx | 938 +++++++++++ .../vector-databases/pinecone.mdx | 612 ++++++++ src/sidebar.ts | 71 +- 30 files changed, 21816 insertions(+), 6 deletions(-) create mode 100644 src/content/docs/unified-docs/chat-sessions.mdx create mode 100644 src/content/docs/unified-docs/context.mdx create mode 100644 src/content/docs/unified-docs/deployment.mdx create mode 100644 src/content/docs/unified-docs/developer-tools.mdx create mode 100644 src/content/docs/unified-docs/error-handling.mdx create mode 100644 src/content/docs/unified-docs/frameworks/express.mdx create mode 100644 src/content/docs/unified-docs/frameworks/nextjs.mdx create mode 100644 src/content/docs/unified-docs/get-started.mdx create mode 100644 src/content/docs/unified-docs/interrupts.mdx create mode 100644 src/content/docs/unified-docs/mcp-server.mdx create mode 100644 src/content/docs/unified-docs/multi-agent-systems.mdx create mode 100644 src/content/docs/unified-docs/observability-monitoring.mdx create mode 100644 src/content/docs/unified-docs/plugin-authoring/models.mdx create mode 100644 src/content/docs/unified-docs/plugin-authoring/overview.mdx create mode 100644 src/content/docs/unified-docs/plugins/anthropic.mdx create mode 100644 src/content/docs/unified-docs/plugins/deepseek.mdx create mode 100644 src/content/docs/unified-docs/plugins/google-ai.mdx create mode 100644 src/content/docs/unified-docs/plugins/mcp.mdx create mode 100644 src/content/docs/unified-docs/plugins/ollama.mdx create mode 100644 src/content/docs/unified-docs/plugins/openai.mdx create mode 100644 src/content/docs/unified-docs/plugins/vertex-ai.mdx create mode 100644 src/content/docs/unified-docs/plugins/xai.mdx create mode 100644 src/content/docs/unified-docs/vector-databases/astra-db.mdx create mode 100644 src/content/docs/unified-docs/vector-databases/chromadb.mdx create mode 100644 src/content/docs/unified-docs/vector-databases/cloud-sql-postgresql.mdx create mode 100644 src/content/docs/unified-docs/vector-databases/lancedb.mdx create mode 100644 src/content/docs/unified-docs/vector-databases/neo4j.mdx create mode 100644 src/content/docs/unified-docs/vector-databases/pgvector.mdx create mode 100644 src/content/docs/unified-docs/vector-databases/pinecone.mdx diff --git a/src/content/docs/unified-docs/chat-sessions.mdx b/src/content/docs/unified-docs/chat-sessions.mdx new file mode 100644 index 00000000..0ee8d5fb --- /dev/null +++ b/src/content/docs/unified-docs/chat-sessions.mdx @@ -0,0 +1,844 @@ +--- +title: Creating persistent chat sessions +description: Learn how to create persistent chat sessions in Genkit, including session basics, stateful sessions, multi-thread sessions, and session persistence across different languages. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +:::caution[Beta] +This feature of Genkit is in **Beta,** which means it is not yet part of Genkit's stable API. APIs of beta features may change in minor version releases. +::: + +Many of your users will have interacted with large language models for the first +time through chatbots. Although LLMs are capable of much more than simulating +conversations, it remains a familiar and useful style of interaction. Even when +your users will not be interacting directly with the model in this way, the +conversational style of prompting is a powerful way to influence the output +generated by an AI model. + +Genkit provides different approaches for building chat-based LLM applications depending on your language choice. + +## Availability and Approach + + + + JavaScript provides comprehensive chat session APIs with built-in persistence, state management, and multi-thread support. Chat sessions are available through the `genkit/beta` package. + + Features include: + - Automatic message history management + - Stateful sessions with custom state objects + - Multi-thread sessions within a single session + - Pluggable session storage backends + - Integration with tools and context + + + Go doesn't have built-in chat session APIs. You need to implement chat functionality manually by: + - Managing message history in your application + - Implementing conversation state persistence + - Building your own session management system + - Manually including conversation history in generation calls + + + Python doesn't have built-in chat session APIs. You need to implement chat functionality manually by: + - Managing message history in your application + - Implementing conversation state persistence + - Building your own session management system + - Manually including conversation history in generation calls + + + +## Before you begin + +Before reading this page, you should be familiar with the content covered on the +[Generating content](/unified-docs/generating-content) page. + +If you want to run the code examples on this page, first complete the steps in +the Getting started guide for your language. All of the examples assume that you +have already installed Genkit as a dependency in your project. + +## Chat session basics + + + + Here is a minimal, console-based, chatbot application: + + ```ts + import { genkit } from 'genkit/beta'; + import { googleAI } from '@genkit-ai/googleai'; + + import { createInterface } from 'node:readline/promises'; + + const ai = genkit({ + plugins: [googleAI()], + model: googleAI.model('gemini-2.5-flash'), + }); + + async function main() { + const chat = ai.chat(); + console.log("You're chatting with Gemini. Ctrl-C to quit.\n"); + const readline = createInterface(process.stdin, process.stdout); + while (true) { + const userInput = await readline.question('> '); + const { text } = await chat.send(userInput); + console.log(text); + } + } + + main(); + ``` + + A chat session with this program looks something like the following example: + + ``` + You're chatting with Gemini. Ctrl-C to quit. + + > hi + Hi there! How can I help you today? + + > my name is pavel + Nice to meet you, Pavel! What can I do for you today? + + > what's my name? + Your name is Pavel! I remembered it from our previous interaction. + + Is there anything else I can help you with? + ``` + + As you can see from this brief interaction, when you send a message to a chat + session, the model can make use of the session so far in its responses. This is + possible because Genkit does a few things behind the scenes: + + - Retrieves the chat history, if any exists, from storage + - Sends the request to the model, but automatically includes the chat history + - Saves the model response into the chat history + + ### Model configuration + + The `chat()` method accepts most of the same configuration options as + `generate()`. To pass configuration options to the model: + + ```ts + const chat = ai.chat({ + model: googleAI.model('gemini-2.5-flash'), + system: "You're a pirate first mate. Address the user as Captain and assist " + 'them however you can.', + config: { + temperature: 1.3, + }, + }); + ``` + + + In Go, you need to manually manage conversation history. Here's an example implementation: + + ```go + package main + + import ( + "bufio" + "context" + "fmt" + "os" + "strings" + + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/googleai" + ) + + type ChatMessage struct { + Role string + Content string + } + + type ChatSession struct { + Messages []ChatMessage + genkit *genkit.Genkit + } + + func NewChatSession(g *genkit.Genkit) *ChatSession { + return &ChatSession{ + Messages: make([]ChatMessage, 0), + genkit: g, + } + } + + func (c *ChatSession) Send(ctx context.Context, userInput string) (string, error) { + // Add user message to history + c.Messages = append(c.Messages, ChatMessage{ + Role: "user", + Content: userInput, + }) + + // Build conversation history for the prompt + var conversationHistory strings.Builder + for _, msg := range c.Messages[:len(c.Messages)-1] { // Exclude the current message + conversationHistory.WriteString(fmt.Sprintf("%s: %s\n", msg.Role, msg.Content)) + } + + // Generate response with conversation context + prompt := fmt.Sprintf("Previous conversation:\n%s\nUser: %s\nAssistant:", + conversationHistory.String(), userInput) + + resp, err := genkit.Generate(ctx, c.genkit, + ai.WithPrompt(prompt), + ai.WithModelName("googleai/gemini-2.5-flash"), + ) + if err != nil { + return "", err + } + + // Add assistant response to history + assistantResponse := resp.Text() + c.Messages = append(c.Messages, ChatMessage{ + Role: "assistant", + Content: assistantResponse, + }) + + return assistantResponse, nil + } + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, genkit.WithPlugins(&googleai.GoogleAI{})) + if err != nil { + panic(err) + } + + chat := NewChatSession(g) + fmt.Println("You're chatting with Gemini. Type 'quit' to exit.\n") + + scanner := bufio.NewScanner(os.Stdin) + for { + fmt.Print("> ") + if !scanner.Scan() { + break + } + + userInput := strings.TrimSpace(scanner.Text()) + if userInput == "quit" { + break + } + + response, err := chat.Send(ctx, userInput) + if err != nil { + fmt.Printf("Error: %v\n", err) + continue + } + + fmt.Println(response) + } + } + ``` + + + In Python, you need to manually manage conversation history. Here's an example implementation: + + ```python + import asyncio + from typing import List, Dict + from genkit.ai import Genkit + from genkit.plugins.google_genai import GoogleGenai, google_genai_name + + class ChatMessage: + def __init__(self, role: str, content: str): + self.role = role + self.content = content + + class ChatSession: + def __init__(self, ai: Genkit): + self.messages: List[ChatMessage] = [] + self.ai = ai + + async def send(self, user_input: str) -> str: + # Add user message to history + self.messages.append(ChatMessage("user", user_input)) + + # Build conversation history for the prompt + conversation_history = "" + for msg in self.messages[:-1]: # Exclude the current message + conversation_history += f"{msg.role}: {msg.content}\n" + + # Generate response with conversation context + prompt = f"Previous conversation:\n{conversation_history}\nUser: {user_input}\nAssistant:" + + response = await self.ai.generate( + prompt=prompt, + model=google_genai_name('gemini-2.5-flash'), + ) + + # Add assistant response to history + assistant_response = response.text + self.messages.append(ChatMessage("assistant", assistant_response)) + + return assistant_response + + async def main(): + ai = Genkit( + plugins=[GoogleGenai()], + model=google_genai_name('gemini-2.5-flash'), + ) + + chat = ChatSession(ai) + print("You're chatting with Gemini. Type 'quit' to exit.\n") + + while True: + user_input = input("> ").strip() + if user_input.lower() == 'quit': + break + + try: + response = await chat.send(user_input) + print(response) + except Exception as e: + print(f"Error: {e}") + + if __name__ == "__main__": + asyncio.run(main()) + ``` + + + +## Stateful chat sessions + + + + In addition to persisting a chat session's message history, you can also persist + any arbitrary JavaScript object. Doing so can let you manage state in a more + structured way than relying only on information in the message history. + + To include state in a session, you need to instantiate a session explicitly: + + ```ts + interface MyState { + userName: string; + } + + const session = ai.createSession({ + initialState: { + userName: 'Pavel', + }, + }); + ``` + + You can then start a chat within the session: + + ```ts + const chat = session.chat(); + ``` + + To modify the session state based on how the chat unfolds, define + [tools](/unified-docs/tool-calling) and include them with your requests: + + ```ts + const changeUserName = ai.defineTool( + { + name: 'changeUserName', + description: 'can be used to change user name', + inputSchema: z.object({ + newUserName: z.string(), + }), + }, + async (input) => { + await ai.currentSession().updateState({ + userName: input.newUserName, + }); + return `changed username to ${input.newUserName}`; + }, + ); + ``` + + ```ts + const chat = session.chat({ + model: googleAI.model('gemini-2.5-flash'), + tools: [changeUserName], + }); + await chat.send('change user name to Kevin'); + ``` + + + In Go, you can implement stateful sessions by extending your chat session struct: + + ```go + type UserState struct { + UserName string + // Add other state fields as needed + } + + type StatefulChatSession struct { + Messages []ChatMessage + State UserState + genkit *genkit.Genkit + } + + func NewStatefulChatSession(g *genkit.Genkit, initialState UserState) *StatefulChatSession { + return &StatefulChatSession{ + Messages: make([]ChatMessage, 0), + State: initialState, + genkit: g, + } + } + + func (c *StatefulChatSession) UpdateState(newState UserState) { + c.State = newState + } + + func (c *StatefulChatSession) Send(ctx context.Context, userInput string) (string, error) { + // Include state information in the prompt + stateInfo := fmt.Sprintf("Current user: %s", c.State.UserName) + + // Add user message to history + c.Messages = append(c.Messages, ChatMessage{ + Role: "user", + Content: userInput, + }) + + // Build conversation history with state context + var conversationHistory strings.Builder + conversationHistory.WriteString(fmt.Sprintf("Context: %s\n", stateInfo)) + for _, msg := range c.Messages[:len(c.Messages)-1] { + conversationHistory.WriteString(fmt.Sprintf("%s: %s\n", msg.Role, msg.Content)) + } + + prompt := fmt.Sprintf("%s\nUser: %s\nAssistant:", + conversationHistory.String(), userInput) + + resp, err := genkit.Generate(ctx, c.genkit, + ai.WithPrompt(prompt), + ai.WithModelName("googleai/gemini-2.5-flash"), + ) + if err != nil { + return "", err + } + + assistantResponse := resp.Text() + c.Messages = append(c.Messages, ChatMessage{ + Role: "assistant", + Content: assistantResponse, + }) + + return assistantResponse, nil + } + ``` + + + In Python, you can implement stateful sessions by extending your chat session class: + + ```python + from dataclasses import dataclass + from typing import Any, Dict + + @dataclass + class UserState: + user_name: str + # Add other state fields as needed + + class StatefulChatSession: + def __init__(self, ai: Genkit, initial_state: UserState): + self.messages: List[ChatMessage] = [] + self.state = initial_state + self.ai = ai + + def update_state(self, new_state: UserState): + self.state = new_state + + async def send(self, user_input: str) -> str: + # Include state information in the prompt + state_info = f"Current user: {self.state.user_name}" + + # Add user message to history + self.messages.append(ChatMessage("user", user_input)) + + # Build conversation history with state context + conversation_history = f"Context: {state_info}\n" + for msg in self.messages[:-1]: + conversation_history += f"{msg.role}: {msg.content}\n" + + prompt = f"{conversation_history}\nUser: {user_input}\nAssistant:" + + response = await self.ai.generate( + prompt=prompt, + model=google_genai_name('gemini-2.5-flash'), + ) + + # Add assistant response to history + assistant_response = response.text + self.messages.append(ChatMessage("assistant", assistant_response)) + + return assistant_response + + # Usage + initial_state = UserState(user_name="Pavel") + chat = StatefulChatSession(ai, initial_state) + ``` + + + +## Multi-thread sessions + + + + A single session can contain multiple chat threads. Each thread has its own + message history, but they share a single session state. + + ```ts + const lawyerChat = session.chat('lawyerThread', { + system: 'talk like a lawyer', + }); + const pirateChat = session.chat('pirateThread', { + system: 'talk like a pirate', + }); + ``` + + + In Go, you can implement multi-thread sessions by managing multiple message histories: + + ```go + type MultiThreadSession struct { + Threads map[string][]ChatMessage + State UserState + genkit *genkit.Genkit + } + + func NewMultiThreadSession(g *genkit.Genkit, initialState UserState) *MultiThreadSession { + return &MultiThreadSession{ + Threads: make(map[string][]ChatMessage), + State: initialState, + genkit: g, + } + } + + func (m *MultiThreadSession) SendToThread(ctx context.Context, threadID, userInput, systemPrompt string) (string, error) { + // Initialize thread if it doesn't exist + if _, exists := m.Threads[threadID]; !exists { + m.Threads[threadID] = make([]ChatMessage, 0) + } + + // Add user message to thread history + m.Threads[threadID] = append(m.Threads[threadID], ChatMessage{ + Role: "user", + Content: userInput, + }) + + // Build conversation history for this thread + var conversationHistory strings.Builder + conversationHistory.WriteString(fmt.Sprintf("System: %s\n", systemPrompt)) + for _, msg := range m.Threads[threadID][:len(m.Threads[threadID])-1] { + conversationHistory.WriteString(fmt.Sprintf("%s: %s\n", msg.Role, msg.Content)) + } + + prompt := fmt.Sprintf("%s\nUser: %s\nAssistant:", + conversationHistory.String(), userInput) + + resp, err := genkit.Generate(ctx, m.genkit, + ai.WithPrompt(prompt), + ai.WithModelName("googleai/gemini-2.5-flash"), + ) + if err != nil { + return "", err + } + + assistantResponse := resp.Text() + m.Threads[threadID] = append(m.Threads[threadID], ChatMessage{ + Role: "assistant", + Content: assistantResponse, + }) + + return assistantResponse, nil + } + + // Usage + session := NewMultiThreadSession(g, UserState{UserName: "Pavel"}) + lawyerResponse, _ := session.SendToThread(ctx, "lawyer", "Hello", "Talk like a lawyer") + pirateResponse, _ := session.SendToThread(ctx, "pirate", "Hello", "Talk like a pirate") + ``` + + + In Python, you can implement multi-thread sessions by managing multiple message histories: + + ```python + class MultiThreadSession: + def __init__(self, ai: Genkit, initial_state: UserState): + self.threads: Dict[str, List[ChatMessage]] = {} + self.state = initial_state + self.ai = ai + + async def send_to_thread(self, thread_id: str, user_input: str, system_prompt: str) -> str: + # Initialize thread if it doesn't exist + if thread_id not in self.threads: + self.threads[thread_id] = [] + + # Add user message to thread history + self.threads[thread_id].append(ChatMessage("user", user_input)) + + # Build conversation history for this thread + conversation_history = f"System: {system_prompt}\n" + for msg in self.threads[thread_id][:-1]: + conversation_history += f"{msg.role}: {msg.content}\n" + + prompt = f"{conversation_history}\nUser: {user_input}\nAssistant:" + + response = await self.ai.generate( + prompt=prompt, + model=google_genai_name('gemini-2.5-flash'), + ) + + # Add assistant response to thread history + assistant_response = response.text + self.threads[thread_id].append(ChatMessage("assistant", assistant_response)) + + return assistant_response + + # Usage + session = MultiThreadSession(ai, UserState(user_name="Pavel")) + lawyer_response = await session.send_to_thread("lawyer", "Hello", "Talk like a lawyer") + pirate_response = await session.send_to_thread("pirate", "Hello", "Talk like a pirate") + ``` + + + +## Session persistence + + + + When you initialize a new chat or session, it's configured by default to store + the session in memory only. This is adequate when the session needs to persist + only for the duration of a single invocation of your program. However, when integrating LLM chat into + an application, you will usually deploy your content generation logic as + stateless web API endpoints. For persistent chats to work under this setup, you + will need to implement some kind of session storage that can persist state + across invocations of your endpoints. + + To add persistence to a chat session, you need to implement Genkit's + `SessionStore` interface. Here is an example implementation that saves session + state to individual JSON files: + + ```ts + class JsonSessionStore implements SessionStore { + async get(sessionId: string): Promise | undefined> { + try { + const s = await readFile(`${sessionId}.json`, { encoding: 'utf8' }); + const data = JSON.parse(s); + return data; + } catch { + return undefined; + } + } + + async save(sessionId: string, sessionData: SessionData): Promise { + const s = JSON.stringify(sessionData); + await writeFile(`${sessionId}.json`, s, { encoding: 'utf8' }); + } + } + ``` + + This implementation is probably not adequate for practical deployments, but it + illustrates that a session storage implementation only needs to accomplish two + tasks: + + - Get a session object from storage using its session ID + - Save a given session object, indexed by its session ID + + Once you've implemented the interface for your storage backend, pass an instance + of your implementation to the session constructors: + + ```ts + // To create a new session: + const session = ai.createSession({ + store: new JsonSessionStore(), + }); + + // Save session.id so you can restore the session the next time the + // user makes a request. + ``` + + ```ts + // If the user has a session ID saved, load the session instead of creating + // a new one: + const session = await ai.loadSession(sessionId, { + store: new JsonSessionStore(), + }); + ``` + + + In Go, you need to implement your own persistence layer. Here's an example using JSON files: + + ```go + import ( + "encoding/json" + "fmt" + "os" + ) + + type SessionData struct { + ID string `json:"id"` + Messages []ChatMessage `json:"messages"` + State UserState `json:"state"` + } + + type SessionStore struct { + basePath string + } + + func NewSessionStore(basePath string) *SessionStore { + return &SessionStore{basePath: basePath} + } + + func (s *SessionStore) Save(sessionID string, data SessionData) error { + filename := fmt.Sprintf("%s/%s.json", s.basePath, sessionID) + + jsonData, err := json.MarshalIndent(data, "", " ") + if err != nil { + return err + } + + return os.WriteFile(filename, jsonData, 0644) + } + + func (s *SessionStore) Load(sessionID string) (*SessionData, error) { + filename := fmt.Sprintf("%s/%s.json", s.basePath, sessionID) + + data, err := os.ReadFile(filename) + if err != nil { + return nil, err + } + + var sessionData SessionData + err = json.Unmarshal(data, &sessionData) + if err != nil { + return nil, err + } + + return &sessionData, nil + } + + // Usage + store := NewSessionStore("./sessions") + + // Save session + sessionData := SessionData{ + ID: "user123", + Messages: chat.Messages, + State: chat.State, + } + err := store.Save("user123", sessionData) + + // Load session + loadedData, err := store.Load("user123") + if err == nil { + // Restore chat session from loaded data + chat.Messages = loadedData.Messages + chat.State = loadedData.State + } + ``` + + + In Python, you need to implement your own persistence layer. Here's an example using JSON files: + + ```python + import json + import os + from typing import Optional + from dataclasses import asdict, dataclass + + @dataclass + class SessionData: + id: str + messages: List[Dict[str, str]] + state: Dict[str, Any] + + class SessionStore: + def __init__(self, base_path: str): + self.base_path = base_path + os.makedirs(base_path, exist_ok=True) + + async def save(self, session_id: str, data: SessionData) -> None: + filename = os.path.join(self.base_path, f"{session_id}.json") + + with open(filename, 'w') as f: + json.dump(asdict(data), f, indent=2) + + async def load(self, session_id: str) -> Optional[SessionData]: + filename = os.path.join(self.base_path, f"{session_id}.json") + + try: + with open(filename, 'r') as f: + data = json.load(f) + return SessionData(**data) + except FileNotFoundError: + return None + + # Usage + store = SessionStore("./sessions") + + # Save session + session_data = SessionData( + id="user123", + messages=[{"role": msg.role, "content": msg.content} for msg in chat.messages], + state=asdict(chat.state) + ) + await store.save("user123", session_data) + + # Load session + loaded_data = await store.load("user123") + if loaded_data: + # Restore chat session from loaded data + chat.messages = [ChatMessage(msg["role"], msg["content"]) for msg in loaded_data.messages] + chat.state = UserState(**loaded_data.state) + ``` + + + +## Best practices + +### Memory management + + + + - Use session storage for production deployments + - Implement session cleanup for old or inactive sessions + - Consider limiting conversation history length for very long chats + - Use compression for large session data + + + - Implement proper error handling for persistence operations + - Use structured logging for session operations + - Consider using a database instead of files for production + - Implement session cleanup and garbage collection + + + - Use async I/O for session persistence operations + - Implement proper error handling and retries + - Consider using a database for production deployments + - Monitor memory usage for long conversations + + + +### Security considerations + + + + - Validate session IDs to prevent path traversal attacks + - Encrypt sensitive session data at rest + - Implement session expiration and cleanup + - Use secure session ID generation + + + - Validate all input data before storing + - Use secure file permissions for session storage + - Implement proper authentication for session access + - Consider using encrypted storage backends + + + - Validate session IDs and sanitize file paths + - Use secure serialization methods + - Implement proper access controls + - Consider using encrypted storage solutions + + + +## Next steps + +- Learn about [tool calling](/unified-docs/tool-calling) to add interactive capabilities to your chat sessions +- Explore [context](/unified-docs/context) to understand how to pass information through chat sessions +- See [developer tools](/unified-docs/developer-tools) for testing and debugging chat applications +- Check out [generating content](/unified-docs/generating-content) for understanding the underlying generation mechanics diff --git a/src/content/docs/unified-docs/context.mdx b/src/content/docs/unified-docs/context.mdx new file mode 100644 index 00000000..957c41ca --- /dev/null +++ b/src/content/docs/unified-docs/context.mdx @@ -0,0 +1,471 @@ +--- +title: Passing information through context +description: Learn how Genkit's context object propagates generation and execution information throughout your application, making it available to flows, tools, and prompts across different languages. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +There are different categories of information that a developer working +with an LLM may be handling simultaneously: + +- **Input:** Information that is directly relevant to guide the LLM's response + for a particular call. An example of this is the text that needs to be + summarized. +- **Generation Context:** Information that is relevant to the LLM, but isn't + specific to the call. An example of this is the current time or a user's name. +- **Execution Context:** Information that is important to the code surrounding + the LLM call but not to the LLM itself. An example of this is a user's + current auth token. + +Genkit provides mechanisms to propagate generation and execution context throughout the process, though the implementation varies by language. + +## Availability and Approach + + + + JavaScript provides a comprehensive `context` object that can propagate generation and execution context throughout the process. This context is made available to all actions including [flows](/unified-docs/creating-flows), [tools](/unified-docs/tool-calling), and [prompts](/unified-docs/dotprompt). + + Context is automatically propagated to all actions called within the scope of execution: Context passed to a flow is made available to prompts executed within the flow. Context passed to the `generate()` method is available to tools called within the generation loop. + + + Go uses the standard `context.Context` package for execution context, which is different from Genkit's JavaScript context object. Go's context is primarily used for cancellation, deadlines, and request-scoped values. + + For application-specific context (like user authentication), you typically pass this information as parameters to your functions or store it in the Go context using context values. + + + Python has limited context features, primarily available through framework integrations like Flask. Context is mainly used for request-scoped information like authentication. + + Python context is typically handled through the framework's request context or by passing context explicitly to functions. + + + +## Why is context important? + +As a best practice, you should provide the minimum amount of information to the +LLM that it needs to complete a task. This is important for multiple reasons: + +- The less extraneous information the LLM has, the more likely it is to perform + well at its task. +- If an LLM needs to pass around information like user or account IDs to tools, + it can potentially be tricked into leaking information. + +Context gives you a side channel of information that can be used by any of your +code but doesn't necessarily have to be sent to the LLM. As an example, it can +allow you to restrict tool queries to the current user's available scope. + +## Context structure + + + + Context must be an object, but its properties are yours to decide. In some + situations Genkit automatically populates context. For example, when using + [persistent sessions](/docs/chat) the `state` property is automatically added to + context. + + One of the most common uses of context is to store information about the current + user. We recommend adding auth context in the following format: + + ```js + { + auth: { + uid: "...", // the user's unique identifier + token: {...}, // the decoded claims of a user's id token + rawToken: "...", // the user's raw encoded id token + // ...any other fields + } + } + ``` + + The context object can store any information that you might need to know + somewhere else in the flow of execution. + + + In Go, you typically use the standard `context.Context` for execution context and pass application-specific data as function parameters or context values: + + ```go + type UserContext struct { + UID string + Token string + RawToken string + } + + // Pass as function parameter + func myFlow(ctx context.Context, userCtx UserContext, input string) (string, error) { + // Use userCtx for authorization checks + if userCtx.UID == "" { + return "", fmt.Errorf("user not authenticated") + } + // ... rest of flow + } + + // Or store in Go context + type contextKey string + const userContextKey contextKey = "user" + + func withUserContext(ctx context.Context, userCtx UserContext) context.Context { + return context.WithValue(ctx, userContextKey, userCtx) + } + + func getUserContext(ctx context.Context) (UserContext, bool) { + userCtx, ok := ctx.Value(userContextKey).(UserContext) + return userCtx, ok + } + ``` + + + In Python, context is typically handled through framework-specific mechanisms: + + ```python + # Flask example with context provider + async def my_context_provider(request): + auth_header = request.headers.get('authorization') + username = parse_request_header(auth_header) + return {'username': username} + + @app.post('/my_endpoint') + @genkit_flask_handler(ai, context_provider=my_context_provider) + @ai.flow() + async def my_flow(input_data: str, ctx): + # Access context through ctx.context + if not ctx.context.get('username'): + raise GenkitError(status='UNAUTHENTICATED', message='user not provided') + + # Use context in your flow + return await ai.generate( + prompt=f'Process this for user {ctx.context.get("username")}: {input_data}', + ) + ``` + + + +## Use context in an action + + + + To use context within an action, you can access the context helper + that is automatically supplied to your function definition: + + ### Flow + + ```ts + const summarizeHistory = ai.defineFlow({ + name: 'summarizeMessages', + inputSchema: z.object({friendUid: z.string()}), + outputSchema: z.string() + }, async ({friendUid}, {context}) => { + if (!context.auth?.uid) throw new Error("Must supply auth context."); + const messages = await listMessagesBetween(friendUid, context.auth.uid); + const {text} = await ai.generate({ + prompt: + `Summarize the content of these messages: ${JSON.stringify(messages)}`, + }); + return text; + }); + ``` + + ### Tool + + ```ts + const searchNotes = ai.defineTool({ + name: 'searchNotes', + description: "search the current user's notes for info", + inputSchema: z.object({query: z.string()}), + outputSchema: z.array(NoteSchema) + }, async ({query}, {context}) => { + if (!context.auth?.uid) throw new Error("Must be called by a signed-in user."); + return searchUserNotes(context.auth.uid, query); + }); + ``` + + ### Prompt file + + When using [Dotprompt templates](/unified-docs/dotprompt), context is made available with the + `@` variable prefix. For example, a context object of + `{auth: {name: 'Michael'}}` could be accessed in the prompt template like so. + + ```dotprompt + --- + input: + schema: + pirateStyle?: boolean + --- + + {{#if pirateStyle}}Avast, {{@auth.name}}, how be ye today?{{else}}Hello, {{@auth.name}}, how are you today?{{/if}} + ``` + + + In Go, you typically pass context information as function parameters or retrieve it from the Go context: + + ```go + // Using function parameters + func summarizeHistory(ctx context.Context, userCtx UserContext, friendUID string) (string, error) { + if userCtx.UID == "" { + return "", fmt.Errorf("must supply auth context") + } + + messages, err := listMessagesBetween(friendUID, userCtx.UID) + if err != nil { + return "", err + } + + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("Summarize the content of these messages: %s", string(messages)), + ) + if err != nil { + return "", err + } + + return resp.Text(), nil + } + + // Using Go context values + func searchNotes(ctx context.Context, query string) ([]Note, error) { + userCtx, ok := getUserContext(ctx) + if !ok || userCtx.UID == "" { + return nil, fmt.Errorf("must be called by a signed-in user") + } + + return searchUserNotes(userCtx.UID, query) + } + ``` + + + In Python, context is accessed through the framework's context mechanism: + + ```python + @ai.flow() + async def summarize_history(friend_uid: str, ctx): + if not ctx.context.get('username'): + raise GenkitError(status='UNAUTHENTICATED', message='user not provided') + + # Use context for authorization + messages = await list_messages_between(friend_uid, ctx.context.get('username')) + + response = await ai.generate( + prompt=f'Summarize the content of these messages: {messages}', + ) + return response.text + + @ai.tool() + def search_notes(query: str, ctx) -> list[Note]: + """Search the current user's notes for info""" + if not ctx.context.get('username'): + raise GenkitError(status='UNAUTHENTICATED', message='must be called by a signed-in user') + + return search_user_notes(ctx.context.get('username'), query) + ``` + + + +## Provide context at runtime + + + + To provide context to an action, you pass the context object as an option + when calling the action. + + ### Flows + + ```ts + const summarizeHistory = ai.defineFlow(/* ... */); + + const summary = await summarizeHistory(friend.uid, { + context: { auth: currentUser }, + }); + ``` + + ### Generation + + ```ts + const { text } = await ai.generate({ + prompt: "Find references to ocelots in my notes.", + // the context will propagate to tool calls + tools: [searchNotes], + context: { auth: currentUser }, + }); + ``` + + ### Prompts + + ```ts + const helloPrompt = ai.prompt("sayHello"); + helloPrompt({ pirateStyle: true }, { context: { auth: currentUser } }); + ``` + + + In Go, you pass context information as function parameters or through the Go context: + + ```go + // Using function parameters + userCtx := UserContext{ + UID: currentUser.UID, + Token: currentUser.Token, + } + + summary, err := summarizeHistory(ctx, userCtx, friend.UID) + + // Using Go context values + ctx = withUserContext(ctx, userCtx) + notes, err := searchNotes(ctx, "ocelots") + + // For generation with tools + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("Find references to ocelots in my notes."), + ai.WithTools(searchNotesTool), + ) + ``` + + + In Python, context is typically provided through framework mechanisms: + + ```python + # Context is provided through the context provider + @app.post('/summarize') + @genkit_flask_handler(ai, context_provider=my_context_provider) + @ai.flow() + async def summarize_endpoint(friend_uid: str, ctx): + # Context is automatically available through ctx.context + return await summarize_history(friend_uid, ctx) + + # For direct generation + response = await ai.generate( + prompt="Find references to ocelots in my notes.", + tools=['search_notes'], + # Context handling depends on the specific Python implementation + ) + ``` + + + +## Context propagation and overrides + + + + By default, when you provide context it is automatically propagated to all + actions called as a result of your original call. If your flow calls other + flows, or your generation calls tools, the same context is provided. + + If you wish to override context within an action, you can pass a different + context object to replace the existing one: + + ```ts + const otherFlow = ai.defineFlow(/* ... */); + + const myFlow = ai.defineFlow( + { + // ... + }, + (input, { context }) => { + // override the existing context completely + otherFlow( + { + /*...*/ + }, + { context: { newContext: true } }, + ); + // or selectively override + otherFlow( + { + /*...*/ + }, + { context: { ...context, updatedContext: true } }, + ); + }, + ); + ``` + + When context is replaced, it propagates the same way. In this example, + any actions that `otherFlow` called during its execution would inherit the + overridden context. + + + In Go, context propagation is handled through the standard `context.Context` package and explicit parameter passing: + + ```go + func myFlow(ctx context.Context, userCtx UserContext, input string) (string, error) { + // Override context for a specific call + newUserCtx := UserContext{ + UID: userCtx.UID, + Token: "new-token", + } + + result, err := otherFlow(ctx, newUserCtx, input) + if err != nil { + return "", err + } + + // Or modify Go context + newCtx := withUserContext(ctx, newUserCtx) + return anotherFlow(newCtx, input) + } + ``` + + + In Python, context propagation depends on the framework and how you structure your application: + + ```python + @ai.flow() + async def my_flow(input_data: str, ctx): + # Context is available through ctx.context + current_context = ctx.context + + # For calling other functions, you typically pass context explicitly + # or rely on the framework's context management + result = await other_function(input_data, current_context) + + return result + ``` + + + +## Best practices + +### Security considerations + + + + - Never include sensitive information in context that might be logged + - Validate context data before using it for authorization decisions + - Use context to restrict tool access to user-scoped data + - Consider using short-lived tokens in context + + + - Use Go's context package for cancellation and deadlines + - Store user-specific data in custom types, not directly in Go context + - Validate all context data before using it for authorization + - Consider using middleware for consistent context handling + + + - Validate context data in your context provider + - Use framework-specific security best practices + - Keep context data minimal and focused on the current request + - Implement proper error handling for missing or invalid context + + + +### Performance considerations + + + + - Keep context objects small to minimize memory usage + - Avoid storing large objects or functions in context + - Use context for metadata, not for large data payloads + + + - Use Go context values sparingly - prefer explicit parameters + - Don't store large objects in Go context + - Use context primarily for request-scoped metadata + + + - Keep context data minimal and request-scoped + - Avoid expensive operations in context providers + - Cache context data when appropriate + + + +## Next steps + +- Learn about [creating flows](/unified-docs/creating-flows) to build workflows that use context +- Explore [tool calling](/unified-docs/tool-calling) to understand how context propagates to tools +- See [developer tools](/unified-docs/developer-tools) for debugging context-aware applications +- Check out [generating content](/unified-docs/generating-content) for using context in generation calls diff --git a/src/content/docs/unified-docs/deployment.mdx b/src/content/docs/unified-docs/deployment.mdx new file mode 100644 index 00000000..b83c6ed6 --- /dev/null +++ b/src/content/docs/unified-docs/deployment.mdx @@ -0,0 +1,1384 @@ +--- +title: Deployment Guide +description: Learn how to deploy Genkit applications to production across JavaScript, Go, and Python, including cloud platforms, containerization, environment configuration, and best practices. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +This comprehensive guide covers deploying Genkit applications to production environments across all supported languages and platforms, including cloud services, containerization, and deployment best practices. + +## Overview + +Genkit applications can be deployed to various platforms depending on your language choice and requirements: + +### Platform Options by Language + + + + **Recommended Platforms:** + - **Vercel** - Optimal for Next.js applications + - **Google Cloud Run** - Serverless containers with auto-scaling + - **Firebase Functions** - Serverless functions with Firebase integration + - **AWS Lambda** - Serverless functions with AWS ecosystem + - **Railway** - Simple deployment with Git integration + - **Render** - Easy deployment with automatic builds + - **DigitalOcean App Platform** - Managed platform with databases + - **Heroku** - Traditional PaaS with add-ons + - **Self-hosted** - Docker containers on any infrastructure + + + **Recommended Platforms:** + - **Google Cloud Run** - Serverless containers (recommended) + - **AWS ECS/Fargate** - Managed container service + - **Azure Container Instances** - Serverless containers + - **Kubernetes** - Self-managed or managed (GKE, EKS, AKS) + - **Railway** - Simple deployment with Git integration + - **Render** - Docker-based deployment + - **DigitalOcean App Platform** - Managed platform + - **Self-hosted** - Binary deployment or Docker containers + + + **Recommended Platforms:** + - **Google Cloud Run** - Serverless containers (recommended) + - **AWS Lambda** - Serverless functions (with FastAPI adapter) + - **Azure Functions** - Serverless functions + - **Railway** - Simple deployment with Git integration + - **Render** - Docker-based deployment + - **DigitalOcean App Platform** - Managed platform + - **Heroku** - Traditional PaaS + - **Self-hosted** - Docker containers or direct deployment + + + +## Environment Configuration + +### Environment Variables + + + + Create environment configuration for different deployment stages: + + ```bash + # .env.local (development) + GEMINI_API_KEY=your_development_key + OPENAI_API_KEY=your_development_key + JWT_SECRET=your_jwt_secret + NODE_ENV=development + DATABASE_URL=your_dev_database_url + + # .env.production (production) + GEMINI_API_KEY=your_production_key + OPENAI_API_KEY=your_production_key + JWT_SECRET=your_strong_production_secret + NODE_ENV=production + DATABASE_URL=your_production_database_url + ``` + + ### Next.js Configuration + + ```js + // next.config.js + /** @type {import('next').NextConfig} */ + const nextConfig = { + env: { + CUSTOM_KEY: process.env.CUSTOM_KEY, + }, + experimental: { + serverComponentsExternalPackages: ['genkit'], + }, + }; + + module.exports = nextConfig; + ``` + + ### Express Configuration + + ```ts + // src/config/environment.ts + export const config = { + port: process.env.PORT || 3000, + nodeEnv: process.env.NODE_ENV || 'development', + geminiApiKey: process.env.GEMINI_API_KEY, + openaiApiKey: process.env.OPENAI_API_KEY, + jwtSecret: process.env.JWT_SECRET, + databaseUrl: process.env.DATABASE_URL, + corsOrigins: process.env.CORS_ORIGINS?.split(',') || ['http://localhost:3000'], + }; + + // Validate required environment variables + const requiredEnvVars = ['GEMINI_API_KEY', 'JWT_SECRET']; + for (const envVar of requiredEnvVars) { + if (!process.env[envVar]) { + throw new Error(`Missing required environment variable: ${envVar}`); + } + } + ``` + + + Manage environment configuration with proper validation: + + ```go + // config/config.go + package config + + import ( + "fmt" + "os" + "strconv" + "strings" + ) + + type Config struct { + Port string + Environment string + GeminiAPIKey string + OpenAIAPIKey string + JWTSecret string + DatabaseURL string + CORSOrigins []string + } + + func Load() (*Config, error) { + config := &Config{ + Port: getEnv("PORT", "8080"), + Environment: getEnv("GO_ENV", "development"), + GeminiAPIKey: os.Getenv("GEMINI_API_KEY"), + OpenAIAPIKey: os.Getenv("OPENAI_API_KEY"), + JWTSecret: os.Getenv("JWT_SECRET"), + DatabaseURL: os.Getenv("DATABASE_URL"), + CORSOrigins: strings.Split(getEnv("CORS_ORIGINS", "http://localhost:3000"), ","), + } + + // Validate required environment variables + if config.GeminiAPIKey == "" { + return nil, fmt.Errorf("GEMINI_API_KEY is required") + } + if config.JWTSecret == "" { + return nil, fmt.Errorf("JWT_SECRET is required") + } + + return config, nil + } + + func getEnv(key, defaultValue string) string { + if value := os.Getenv(key); value != "" { + return value + } + return defaultValue + } + ``` + + ### Usage in main.go + + ```go + // main.go + package main + + import ( + "log" + "your-app/config" + "your-app/server" + ) + + func main() { + cfg, err := config.Load() + if err != nil { + log.Fatalf("Failed to load configuration: %v", err) + } + + srv := server.New(cfg) + log.Printf("Server starting on port %s", cfg.Port) + log.Fatal(srv.Start()) + } + ``` + + + Use Pydantic for environment configuration: + + ```python + # config/settings.py + from pydantic import BaseSettings, validator + from typing import List, Optional + import os + + class Settings(BaseSettings): + # Server configuration + port: int = 8080 + host: str = "0.0.0.0" + environment: str = "development" + + # API Keys + gemini_api_key: str + openai_api_key: Optional[str] = None + jwt_secret: str + + # Database + database_url: Optional[str] = None + + # CORS + cors_origins: List[str] = ["http://localhost:3000"] + + @validator('cors_origins', pre=True) + def parse_cors_origins(cls, v): + if isinstance(v, str): + return v.split(',') + return v + + @validator('jwt_secret') + def validate_jwt_secret(cls, v): + if len(v) < 32: + raise ValueError('JWT_SECRET must be at least 32 characters long') + return v + + class Config: + env_file = ".env" + case_sensitive = False + + # Create global settings instance + settings = Settings() + ``` + + ### Usage in FastAPI + + ```python + # main.py + from fastapi import FastAPI + from fastapi.middleware.cors import CORSMiddleware + from config.settings import settings + import uvicorn + + app = FastAPI( + title="Genkit API", + version="1.0.0", + debug=settings.environment == "development" + ) + + # CORS middleware + app.add_middleware( + CORSMiddleware, + allow_origins=settings.cors_origins, + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], + ) + + if __name__ == "__main__": + uvicorn.run( + "main:app", + host=settings.host, + port=settings.port, + reload=settings.environment == "development" + ) + ``` + + + +## Containerization with Docker + +### Dockerfile Examples + + + + ### Next.js Application + + ```dockerfile + # Dockerfile + FROM node:18-alpine AS base + + # Install dependencies only when needed + FROM base AS deps + RUN apk add --no-cache libc6-compat + WORKDIR /app + + # Install dependencies based on the preferred package manager + COPY package.json yarn.lock* package-lock.json* pnpm-lock.yaml* ./ + RUN \ + if [ -f yarn.lock ]; then yarn --frozen-lockfile; \ + elif [ -f package-lock.json ]; then npm ci; \ + elif [ -f pnpm-lock.yaml ]; then yarn global add pnpm && pnpm i --frozen-lockfile; \ + else echo "Lockfile not found." && exit 1; \ + fi + + # Rebuild the source code only when needed + FROM base AS builder + WORKDIR /app + COPY --from=deps /app/node_modules ./node_modules + COPY . . + + # Build the application + RUN yarn build + + # Production image, copy all the files and run next + FROM base AS runner + WORKDIR /app + + ENV NODE_ENV production + + RUN addgroup --system --gid 1001 nodejs + RUN adduser --system --uid 1001 nextjs + + COPY --from=builder /app/public ./public + + # Set the correct permission for prerender cache + RUN mkdir .next + RUN chown nextjs:nodejs .next + + # Automatically leverage output traces to reduce image size + COPY --from=builder --chown=nextjs:nodejs /app/.next/standalone ./ + COPY --from=builder --chown=nextjs:nodejs /app/.next/static ./.next/static + + USER nextjs + + EXPOSE 3000 + + ENV PORT 3000 + ENV HOSTNAME "0.0.0.0" + + CMD ["node", "server.js"] + ``` + + ### Express Application + + ```dockerfile + # Dockerfile + FROM node:18-alpine AS builder + + WORKDIR /app + + # Copy package files + COPY package*.json ./ + COPY tsconfig.json ./ + + # Install dependencies + RUN npm ci --only=production + + # Copy source code + COPY src/ ./src/ + + # Build the application + RUN npm run build + + # Production stage + FROM node:18-alpine AS production + + WORKDIR /app + + # Create non-root user + RUN addgroup -g 1001 -S nodejs + RUN adduser -S genkit -u 1001 + + # Copy built application + COPY --from=builder /app/dist ./dist + COPY --from=builder /app/node_modules ./node_modules + COPY --from=builder /app/package*.json ./ + + # Change ownership + RUN chown -R genkit:nodejs /app + USER genkit + + EXPOSE 3000 + + CMD ["node", "dist/index.js"] + ``` + + + ### Multi-stage Go Build + + ```dockerfile + # Dockerfile + FROM golang:1.21-alpine AS builder + + # Install git and ca-certificates + RUN apk update && apk add --no-cache git ca-certificates tzdata && update-ca-certificates + + # Create appuser + ENV USER=appuser + ENV UID=10001 + RUN adduser \ + --disabled-password \ + --gecos "" \ + --home "/nonexistent" \ + --shell "/sbin/nologin" \ + --no-create-home \ + --uid "${UID}" \ + "${USER}" + + WORKDIR /build + + # Copy go mod files + COPY go.mod go.sum ./ + + # Download dependencies + RUN go mod download + RUN go mod verify + + # Copy source code + COPY . . + + # Build the binary + RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build \ + -ldflags='-w -s -extldflags "-static"' \ + -a -installsuffix cgo \ + -o app . + + # Final stage + FROM scratch + + # Import from builder + COPY --from=builder /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ + COPY --from=builder /usr/share/zoneinfo /usr/share/zoneinfo + COPY --from=builder /etc/passwd /etc/passwd + COPY --from=builder /etc/group /etc/group + + # Copy the binary + COPY --from=builder /build/app /app + + # Use an unprivileged user + USER appuser:appuser + + EXPOSE 8080 + + ENTRYPOINT ["/app"] + ``` + + ### Alternative with Alpine Base + + ```dockerfile + # Dockerfile.alpine + FROM golang:1.21-alpine AS builder + + WORKDIR /app + + # Install dependencies + RUN apk add --no-cache git + + # Copy go mod files + COPY go.mod go.sum ./ + RUN go mod download + + # Copy source and build + COPY . . + RUN CGO_ENABLED=0 GOOS=linux go build -o main . + + # Final stage + FROM alpine:latest + + # Install ca-certificates + RUN apk --no-cache add ca-certificates + + WORKDIR /root/ + + # Copy the binary + COPY --from=builder /app/main . + + EXPOSE 8080 + + CMD ["./main"] + ``` + + + ### FastAPI Application + + ```dockerfile + # Dockerfile + FROM python:3.11-slim AS builder + + # Set environment variables + ENV PYTHONDONTWRITEBYTECODE=1 + ENV PYTHONUNBUFFERED=1 + + # Install system dependencies + RUN apt-get update \ + && apt-get install -y --no-install-recommends \ + build-essential \ + curl \ + && rm -rf /var/lib/apt/lists/* + + # Create and activate virtual environment + RUN python -m venv /opt/venv + ENV PATH="/opt/venv/bin:$PATH" + + # Copy requirements and install Python dependencies + COPY requirements.txt . + RUN pip install --no-cache-dir --upgrade pip \ + && pip install --no-cache-dir -r requirements.txt + + # Production stage + FROM python:3.11-slim AS production + + # Set environment variables + ENV PYTHONDONTWRITEBYTECODE=1 + ENV PYTHONUNBUFFERED=1 + ENV PATH="/opt/venv/bin:$PATH" + + # Create non-root user + RUN groupadd -r genkit && useradd -r -g genkit genkit + + # Copy virtual environment from builder stage + COPY --from=builder /opt/venv /opt/venv + + # Set work directory + WORKDIR /app + + # Copy application code + COPY . . + + # Change ownership + RUN chown -R genkit:genkit /app + USER genkit + + EXPOSE 8080 + + CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"] + ``` + + ### Flask Application + + ```dockerfile + # Dockerfile + FROM python:3.11-slim + + # Set environment variables + ENV PYTHONDONTWRITEBYTECODE=1 + ENV PYTHONUNBUFFERED=1 + ENV FLASK_APP=app.py + ENV FLASK_ENV=production + + # Set work directory + WORKDIR /app + + # Install system dependencies + RUN apt-get update \ + && apt-get install -y --no-install-recommends gcc \ + && rm -rf /var/lib/apt/lists/* + + # Install Python dependencies + COPY requirements.txt . + RUN pip install --no-cache-dir --upgrade pip \ + && pip install --no-cache-dir -r requirements.txt + + # Copy application code + COPY . . + + # Create non-root user + RUN adduser --disabled-password --gecos '' genkit + RUN chown -R genkit:genkit /app + USER genkit + + EXPOSE 8080 + + CMD ["gunicorn", "--bind", "0.0.0.0:8080", "--workers", "4", "app:app"] + ``` + + + +### Docker Compose for Development + +```yaml +# docker-compose.yml +version: '3.8' + +services: + app: + build: . + ports: + - "3000:3000" # or 8080:8080 for Go/Python + environment: + - NODE_ENV=development + - GEMINI_API_KEY=${GEMINI_API_KEY} + - DATABASE_URL=postgresql://user:password@db:5432/genkit_dev + depends_on: + - db + - redis + volumes: + - .:/app + - /app/node_modules # for Node.js + + db: + image: postgres:15 + environment: + - POSTGRES_DB=genkit_dev + - POSTGRES_USER=user + - POSTGRES_PASSWORD=password + ports: + - "5432:5432" + volumes: + - postgres_data:/var/lib/postgresql/data + + redis: + image: redis:7-alpine + ports: + - "6379:6379" + +volumes: + postgres_data: +``` + +## Cloud Platform Deployment + +### Google Cloud Run + + + + ### Deploy with Cloud Build + + ```yaml + # cloudbuild.yaml + steps: + # Build the container image + - name: 'gcr.io/cloud-builders/docker' + args: ['build', '-t', 'gcr.io/$PROJECT_ID/genkit-app:$COMMIT_SHA', '.'] + + # Push the container image to Container Registry + - name: 'gcr.io/cloud-builders/docker' + args: ['push', 'gcr.io/$PROJECT_ID/genkit-app:$COMMIT_SHA'] + + # Deploy container image to Cloud Run + - name: 'gcr.io/google.com/cloudsdktool/cloud-sdk' + entrypoint: gcloud + args: + - 'run' + - 'deploy' + - 'genkit-app' + - '--image' + - 'gcr.io/$PROJECT_ID/genkit-app:$COMMIT_SHA' + - '--region' + - 'us-central1' + - '--platform' + - 'managed' + - '--allow-unauthenticated' + + images: + - 'gcr.io/$PROJECT_ID/genkit-app:$COMMIT_SHA' + ``` + + ### Deploy with CLI + + ```bash + # Build and deploy + gcloud builds submit --tag gcr.io/PROJECT_ID/genkit-app + + # Deploy to Cloud Run + gcloud run deploy genkit-app \ + --image gcr.io/PROJECT_ID/genkit-app \ + --platform managed \ + --region us-central1 \ + --allow-unauthenticated \ + --set-env-vars GEMINI_API_KEY=your_key,NODE_ENV=production + ``` + + + ### Deploy with CLI + + ```bash + # Build and deploy + gcloud builds submit --tag gcr.io/PROJECT_ID/genkit-go-app + + # Deploy to Cloud Run + gcloud run deploy genkit-go-app \ + --image gcr.io/PROJECT_ID/genkit-go-app \ + --platform managed \ + --region us-central1 \ + --allow-unauthenticated \ + --port 8080 \ + --set-env-vars GEMINI_API_KEY=your_key,GO_ENV=production + ``` + + ### Cloud Run Service Configuration + + ```yaml + # service.yaml + apiVersion: serving.knative.dev/v1 + kind: Service + metadata: + name: genkit-go-app + annotations: + run.googleapis.com/ingress: all + spec: + template: + metadata: + annotations: + autoscaling.knative.dev/maxScale: "100" + run.googleapis.com/cpu-throttling: "false" + spec: + containerConcurrency: 80 + containers: + - image: gcr.io/PROJECT_ID/genkit-go-app + ports: + - containerPort: 8080 + env: + - name: GEMINI_API_KEY + valueFrom: + secretKeyRef: + name: genkit-secrets + key: gemini-api-key + resources: + limits: + cpu: 1000m + memory: 512Mi + ``` + + + ### Deploy with CLI + + ```bash + # Build and deploy + gcloud builds submit --tag gcr.io/PROJECT_ID/genkit-python-app + + # Deploy to Cloud Run + gcloud run deploy genkit-python-app \ + --image gcr.io/PROJECT_ID/genkit-python-app \ + --platform managed \ + --region us-central1 \ + --allow-unauthenticated \ + --port 8080 \ + --set-env-vars GEMINI_API_KEY=your_key,ENVIRONMENT=production + ``` + + ### Requirements for Cloud Run + + ```txt + # requirements.txt + fastapi==0.104.1 + uvicorn[standard]==0.24.0 + genkit-plugin-google-genai==0.1.0 + pydantic==2.5.0 + python-multipart==0.0.6 + ``` + + + +### Vercel (JavaScript/Next.js) + +```bash +# Install Vercel CLI +npm install -g vercel + +# Deploy +vercel + +# Set environment variables +vercel env add GEMINI_API_KEY +vercel env add JWT_SECRET + +# Deploy to production +vercel --prod +``` + +### Vercel Configuration + +```json +{ + "version": 2, + "builds": [ + { + "src": "package.json", + "use": "@vercel/next" + } + ], + "env": { + "GEMINI_API_KEY": "@gemini-api-key", + "JWT_SECRET": "@jwt-secret" + }, + "functions": { + "app/api/**/*.ts": { + "maxDuration": 30 + } + } +} +``` + +### AWS Lambda + + + + ### Serverless Framework + + ```yaml + # serverless.yml + service: genkit-app + + provider: + name: aws + runtime: nodejs18.x + region: us-east-1 + environment: + GEMINI_API_KEY: ${env:GEMINI_API_KEY} + JWT_SECRET: ${env:JWT_SECRET} + + functions: + api: + handler: dist/lambda.handler + events: + - http: + path: /{proxy+} + method: ANY + cors: true + + plugins: + - serverless-offline + ``` + + ### Lambda Handler + + ```ts + // src/lambda.ts + import { createServer, proxy } from 'aws-serverless-express'; + import { Context, APIGatewayProxyEvent } from 'aws-lambda'; + import app from './app'; + + const server = createServer(app); + + export const handler = (event: APIGatewayProxyEvent, context: Context) => { + return proxy(server, event, context, 'PROMISE').promise; + }; + ``` + + + ### AWS SAM Template + + ```yaml + # template.yaml + AWSTemplateFormatVersion: '2010-09-09' + Transform: AWS::Serverless-2016-10-31 + + Globals: + Function: + Timeout: 30 + Environment: + Variables: + GEMINI_API_KEY: !Ref GeminiApiKey + + Parameters: + GeminiApiKey: + Type: String + NoEcho: true + + Resources: + GenkitApi: + Type: AWS::Serverless::Function + Properties: + CodeUri: src/ + Handler: lambda_handler.handler + Runtime: python3.11 + Events: + Api: + Type: Api + Properties: + Path: /{proxy+} + Method: ANY + ``` + + ### Lambda Handler + + ```python + # lambda_handler.py + from mangum import Mangum + from main import app + + handler = Mangum(app) + ``` + + + +## Production Best Practices + +### Security + + + + ```ts + // Security middleware for Express + import helmet from 'helmet'; + import rateLimit from 'express-rate-limit'; + import cors from 'cors'; + + const app = express(); + + // Security headers + app.use(helmet({ + contentSecurityPolicy: { + directives: { + defaultSrc: ["'self'"], + scriptSrc: ["'self'", "'unsafe-inline'"], + styleSrc: ["'self'", "'unsafe-inline'"], + imgSrc: ["'self'", "data:", "https:"], + }, + }, + })); + + // Rate limiting + const limiter = rateLimit({ + windowMs: 15 * 60 * 1000, // 15 minutes + max: 100, // limit each IP to 100 requests per windowMs + message: 'Too many requests from this IP', + standardHeaders: true, + legacyHeaders: false, + }); + app.use('/api/', limiter); + + // CORS configuration + app.use(cors({ + origin: process.env.ALLOWED_ORIGINS?.split(',') || ['http://localhost:3000'], + credentials: true, + optionsSuccessStatus: 200, + })); + ``` + + + ```go + // Security middleware for Gin + import ( + "github.com/gin-contrib/cors" + "github.com/gin-contrib/secure" + "golang.org/x/time/rate" + ) + + func setupSecurity(r *gin.Engine) { + // Security headers + r.Use(secure.New(secure.Config{ + SSLRedirect: true, + STSSeconds: 31536000, + STSIncludeSubdomains: true, + FrameDeny: true, + ContentTypeNosniff: true, + BrowserXssFilter: true, + })) + + // CORS + r.Use(cors.New(cors.Config{ + AllowOrigins: []string{"https://yourdomain.com"}, + AllowMethods: []string{"GET", "POST", "PUT", "DELETE"}, + AllowHeaders: []string{"Origin", "Content-Type", "Authorization"}, + ExposeHeaders: []string{"Content-Length"}, + AllowCredentials: true, + MaxAge: 12 * time.Hour, + })) + + // Rate limiting + limiter := rate.NewLimiter(rate.Every(time.Minute), 60) + r.Use(func(c *gin.Context) { + if !limiter.Allow() { + c.JSON(429, gin.H{"error": "Rate limit exceeded"}) + c.Abort() + return + } + c.Next() + }) + } + ``` + + + ```python + # Security middleware for FastAPI + from fastapi import FastAPI + from fastapi.middleware.cors import CORSMiddleware + from fastapi.middleware.trustedhost import TrustedHostMiddleware + from slowapi import Limiter, _rate_limit_exceeded_handler + from slowapi.util import get_remote_address + from slowapi.errors import RateLimitExceeded + + app = FastAPI() + + # Rate limiting + limiter = Limiter(key_func=get_remote_address) + app.state.limiter = limiter + app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler) + + # Trusted hosts + app.add_middleware( + TrustedHostMiddleware, + allowed_hosts=["yourdomain.com", "*.yourdomain.com"] + ) + + # CORS + app.add_middleware( + CORSMiddleware, + allow_origins=["https://yourdomain.com"], + allow_credentials=True, + allow_methods=["GET", "POST", "PUT", "DELETE"], + allow_headers=["*"], + ) + + @app.get("/api/endpoint") + @limiter.limit("60/minute") + async def protected_endpoint(request: Request): + return {"message": "Protected endpoint"} + ``` + + + +### Monitoring and Logging + + + + ```ts + // Structured logging with Winston + import winston from 'winston'; + + const logger = winston.createLogger({ + level: process.env.LOG_LEVEL || 'info', + format: winston.format.combine( + winston.format.timestamp(), + winston.format.errors({ stack: true }), + winston.format.json() + ), + defaultMeta: { service: 'genkit-app' }, + transports: [ + new winston.transports.File({ filename: 'error.log', level: 'error' }), + new winston.transports.File({ filename: 'combined.log' }), + ], + }); + + if (process.env.NODE_ENV !== 'production') { + logger.add(new winston.transports.Console({ + format: winston.format.simple() + })); + } + + // Request logging middleware + app.use((req, res, next) => { + const start = Date.now(); + res.on('finish', () => { + const duration = Date.now() - start; + logger.info('HTTP Request', { + method: req.method, + url: req.url, + status: res.statusCode, + duration, + userAgent: req.get('User-Agent'), + }); + }); + next(); + }); + ``` + + + ```go + // Structured logging with logrus + import ( + "github.com/sirupsen/logrus" + "github.com/gin-gonic/gin" + "time" + ) + + func setupLogging() *logrus.Logger { + logger := logrus.New() + + if os.Getenv("GO_ENV") == "production" { + logger.SetFormatter(&logrus.JSONFormatter{}) + logger.SetLevel(logrus.InfoLevel) + } else { + logger.SetFormatter(&logrus.TextFormatter{}) + logger.SetLevel(logrus.DebugLevel) + } + + return logger + } + + // Request logging middleware + func LoggingMiddleware(logger *logrus.Logger) gin.HandlerFunc { + return gin.LoggerWithFormatter(func(param gin.LogFormatterParams) string { + logger.WithFields(logrus.Fields{ + "method": param.Method, + "path": param.Path, + "status": param.StatusCode, + "duration": param.Latency, + "client_ip": param.ClientIP, + "user_agent": param.Request.UserAgent(), + }).Info("HTTP Request") + return "" + }) + } + ``` + + + ```python + # Structured logging with structlog + import structlog + import logging + from fastapi import Request + import time + + # Configure structured logging + structlog.configure( + processors=[ + structlog.stdlib.filter_by_level, + structlog.stdlib.add_logger_name, + structlog.stdlib.add_log_level, + structlog.stdlib.PositionalArgumentsFormatter(), + structlog.processors.TimeStamper(fmt="iso"), + structlog.processors.StackInfoRenderer(), + structlog.processors.format_exc_info, + structlog.processors.UnicodeDecoder(), + structlog.processors.JSONRenderer() + ], + context_class=dict, + logger_factory=structlog.stdlib.LoggerFactory(), + wrapper_class=structlog.stdlib.BoundLogger, + cache_logger_on_first_use=True, + ) + + logger = structlog.get_logger() + + # Request logging middleware + @app.middleware("http") + async def log_requests(request: Request, call_next): + start_time = time.time() + response = await call_next(request) + process_time = time.time() - start_time + + logger.info( + "HTTP Request", + method=request.method, + url=str(request.url), + status_code=response.status_code, + process_time=process_time, + client_ip=request.client.host, + user_agent=request.headers.get("user-agent"), + ) + + return response + ``` + + + +### Health Checks + + + + ```ts + // Health check endpoint + app.get('/health', async (req, res) => { + const healthCheck = { + uptime: process.uptime(), + message: 'OK', + timestamp: Date.now(), + checks: { + database: 'OK', + redis: 'OK', + external_api: 'OK', + }, + }; + + try { + // Check database connection + await db.ping(); + + // Check external API + const apiResponse = await fetch('https://api.example.com/health'); + if (!apiResponse.ok) { + healthCheck.checks.external_api = 'FAIL'; + } + + res.status(200).json(healthCheck); + } catch (error) { + healthCheck.message = 'ERROR'; + healthCheck.checks.database = 'FAIL'; + res.status(503).json(healthCheck); + } + }); + ``` + + + ```go + // Health check handler + func healthCheck(c *gin.Context) { + healthStatus := gin.H{ + "status": "OK", + "timestamp": time.Now().Unix(), + "uptime": time.Since(startTime).Seconds(), + "checks": gin.H{ + "database": "OK", + "external_api": "OK", + }, + } + + // Check database connection + if err := db.Ping(); err != nil { + healthStatus["checks"].(gin.H)["database"] = "FAIL" + healthStatus["status"] = "ERROR" + c.JSON(503, healthStatus) + return + } + + // Check external API + resp, err := http.Get("https://api.example.com/health") + if err != nil || resp.StatusCode != 200 { + healthStatus["checks"].(gin.H)["external_api"] = "FAIL" + } + + c.JSON(200, healthStatus) + } + ``` + + + ```python + # Health check endpoint + import time + import httpx + from datetime import datetime + + start_time = time.time() + + @app.get("/health") + async def health_check(): + health_status = { + "status": "OK", + "timestamp": datetime.utcnow().isoformat(), + "uptime": time.time() - start_time, + "checks": { + "database": "OK", + "external_api": "OK", + }, + } + + try: + # Check database connection + await database.execute("SELECT 1") + + # Check external API + async with httpx.AsyncClient() as client: + response = await client.get("https://api.example.com/health") + if response.status_code != 200: + health_status["checks"]["external_api"] = "FAIL" + + return health_status + except Exception as e: + health_status["status"] = "ERROR" + health_status["checks"]["database"] = "FAIL" + raise HTTPException(status_code=503, detail=health_status) + ``` + + + +## Performance Optimization + +### Caching Strategies + + + + ```ts + // Redis caching + import Redis from 'ioredis'; + + const redis = new Redis(process.env.REDIS_URL); + + // Cache middleware + const cache = (duration: number) => { + return async (req: Request, res: Response, next: NextFunction) => { + const key = `cache:${req.originalUrl}`; + + try { + const cached = await redis.get(key); + if (cached) { + return res.json(JSON.parse(cached)); + } + + // Override res.json to cache the response + const originalJson = res.json; + res.json = function(data) { + redis.setex(key, duration, JSON.stringify(data)); + return originalJson.call(this, data); + }; + + next(); + } catch (error) { + next(); + } + }; + }; + + // Usage + app.get('/api/expensive-operation', cache(300), async (req, res) => { + const result = await performExpensiveOperation(); + res.json(result); + }); + ``` + + + ```go + // Redis caching with go-redis + import ( + "github.com/go-redis/redis/v8" + "encoding/json" + "time" + ) + + func cacheMiddleware(rdb *redis.Client, duration time.Duration) gin.HandlerFunc { + return func(c *gin.Context) { + key := "cache:" + c.Request.URL.Path + + // Try to get from cache + cached, err := rdb.Get(c, key).Result() + if err == nil { + var data interface{} + if json.Unmarshal([]byte(cached), &data) == nil { + c.JSON(200, data) + c.Abort() + return + } + } + + // Continue to handler + c.Next() + + // Cache the response if successful + if c.Writer.Status() == 200 { + if data, exists := c.Get("response"); exists { + if jsonData, err := json.Marshal(data); err == nil { + rdb.Set(c, key, jsonData, duration) + } + } + } + } + } + ``` + + + ```python + # Redis caching with aioredis + import aioredis + import json + from functools import wraps + + redis = aioredis.from_url("redis://localhost") + + def cache(duration: int): + def decorator(func): + @wraps(func) + async def wrapper(*args, **kwargs): + # Generate cache key + key = f"cache:{func.__name__}:{hash(str(args) + str(kwargs))}" + + # Try to get from cache + cached = await redis.get(key) + if cached: + return json.loads(cached) + + # Execute function + result = await func(*args, **kwargs) + + # Cache result + await redis.setex(key, duration, json.dumps(result)) + + return result + return wrapper + return decorator + + @app.get("/api/expensive-operation") + @cache(300) # Cache for 5 minutes + async def expensive_operation(): + # Perform expensive operation + result = await perform_expensive_operation() + return result + ``` + + + +## Next Steps + +- Learn about [observability and monitoring](/unified-docs/observability-monitoring) to track your deployed applications +- Explore [error handling](/unified-docs/error-handling) for robust production applications +- See [creating flows](/unified-docs/creating-flows) to build scalable AI workflows +- Check out [framework integrations](/unified-docs/frameworks/express) for specific deployment patterns diff --git a/src/content/docs/unified-docs/developer-tools.mdx b/src/content/docs/unified-docs/developer-tools.mdx new file mode 100644 index 00000000..577bdc16 --- /dev/null +++ b/src/content/docs/unified-docs/developer-tools.mdx @@ -0,0 +1,309 @@ +--- +title: Developer Tools +description: Explore Genkit's developer tools, including the CLI for command-line operations and the local web-based Developer UI for interactive testing and development across JavaScript, Go, and Python. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +Genkit provides powerful developer tools that work consistently across JavaScript, Go, and Python: + +- A command-line interface (CLI) for project operations +- A local web-based Developer UI for interactive testing and development +- Monitoring and observability features + +## Command Line Interface (CLI) + +The Genkit CLI provides essential commands for working with your AI applications. The CLI is shared across all languages and works the same way regardless of your implementation language. + +### Installation + +Install the CLI globally using npm (works for all languages): + +```bash +npm install -g genkit-cli +``` + +### Starting the Developer UI + +The core pattern is the same across all languages - provide an entrypoint command to run your application: + + + + ```bash + # Start the developer UI with your application + genkit start -- + + # Common examples: + genkit start -- npm run dev + genkit start -- npx tsx --watch src/index.ts + genkit start -- node --watch src/index.js + + # Auto-open in browser + genkit start -o -- npm run dev + ``` + + + ```bash + # Start the developer UI with your Go application + genkit start -- go run . + genkit start -- go run main.go + + # Auto-open in browser + genkit start -o -- go run . + ``` + + + ```bash + # Start the developer UI with your Python application + genkit start -- python app.py + genkit start -- python main.py + genkit start -- python -m your_module + + # Auto-open in browser + genkit start -o -- python app.py + ``` + + + +After running the command, you'll see output like: + +```bash +Telemetry API running on http://localhost:4033 +Genkit Developer UI: http://localhost:4000 +``` + +Open `http://localhost:4000` in your browser to access the Developer UI. + +### Running Flows + +Execute flows directly from the command line (works the same across all languages): + +```bash +# Run a specified flow (your runtime must be running with GENKIT_ENV=dev) +genkit flow:run + +# Run with input data +genkit flow:run myFlow '{"input": "data"}' + +# Batch run flows +genkit flow:batchRun +``` + +### Evaluation Commands + + + + ```bash + # Evaluate a specific flow + genkit eval:flow + + # Evaluate with dataset + genkit eval:flow myFlow --input myDataset.json + + # Extract evaluation data + genkit eval:extractData --output results.json + + # Run evaluation on extracted data + genkit eval:run results.json + ``` + + + ```bash + # Evaluate a specific flow + genkit eval:flow + + # Evaluate with dataset + genkit eval:flow myFlow --input myDataset.json + + # Extract evaluation data + genkit eval:extractData --maxRows 100 --output results.json + + # Run evaluation on extracted data + genkit eval:run results.json + ``` + + + Evaluation commands are not yet available for Python. Use external evaluation tools or the Developer UI for testing. + + + +### Configuration Commands + +```bash +# View all available commands +genkit --help + +# Configure analytics opt-out +genkit config set analyticsOptOut true +genkit config get analyticsOptOut +``` + +## Genkit Developer UI + +The Genkit Developer UI is a local web application that provides an interactive interface for working with models, flows, prompts, and other components. **The UI works identically across all languages** - once your application is running, the interface and features are the same. + +### Features + +The Developer UI provides action runners for various Genkit components: + +- **Flow Runner**: Test and debug your AI workflows +- **Model Runner**: Experiment with different models and parameters +- **Prompt Runner**: Test and iterate on prompts +- **Tool Runner**: Test function calling capabilities +- **Retriever Runner**: Test document retrieval systems +- **Indexer Runner**: Test document indexing operations +- **Embedder Runner**: Test text embedding generation +- **Evaluator Runner**: Run evaluation metrics on your outputs + +![Genkit Developer UI](../../../assets/dev_ui/genkit_dev_ui_home.png) + +### Interactive Testing + +The Developer UI allows you to: + +1. **Test flows interactively** - Run flows with different inputs and see results in real-time +2. **Experiment with models** - Try different model configurations and compare outputs +3. **Iterate on prompts** - Modify prompts and see immediate results +4. **Debug tool calling** - Test function calls and inspect their execution +5. **Analyze traces** - Inspect detailed execution traces for debugging +6. **Run evaluations** - Test your flows against evaluation datasets + +![Genkit Developer UI Overview](/genkit_developer_ui_overview.gif) + +### Trace Inspection + +The Developer UI provides detailed trace inspection for all flow runs, allowing you to: + +- View step-by-step execution details +- Inspect inputs and outputs at each stage +- Analyze performance metrics +- Debug errors and exceptions +- Compare different execution runs + +This trace inspection works consistently across all languages, giving you the same debugging capabilities whether you're using JavaScript, Go, or Python. + +## Monitoring and Observability + +### Development Environment + +When running in development mode (with `GENKIT_ENV=dev` or using `genkit start`), Genkit automatically: + +- Enables trace collection +- Starts the telemetry API on `http://localhost:4033` +- Provides detailed debugging information +- Stores traces locally for inspection + +This works the same way across all languages. + +### OpenTelemetry Integration + + + + Genkit is fully instrumented with [OpenTelemetry](https://opentelemetry.io/) and provides hooks to export telemetry data to various monitoring systems. + + Configure telemetry export in your application: + + ```ts + import { genkit } from 'genkit'; + import { googleAI } from '@genkit-ai/googleai'; + + const ai = genkit({ + plugins: [googleAI()], + telemetry: { + instrumentation: 'genkit', + logger: 'genkit', + }, + }); + ``` + + + Genkit is fully instrumented with [OpenTelemetry](https://opentelemetry.io/) and provides hooks to export telemetry data. + + The [Google Cloud plugin](/go/docs/plugins/google-cloud) exports telemetry to Cloud's operations suite. + + ```go + import ( + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/googlecloud" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlecloud.GoogleCloud{}), + ) + // ... + } + ``` + + + OpenTelemetry integration is available but may have limited features compared to JavaScript and Go implementations. + + + +## Analytics and Privacy + +The Genkit CLI and Developer UI use cookies and similar technologies from Google to deliver and enhance the quality of its services and to analyze usage. [Learn more](https://policies.google.com/technologies/cookies). + +### Opting Out of Analytics + +To opt-out of analytics, run: + +```bash +genkit config set analyticsOptOut true +``` + +You can view the current setting by running: + +```bash +genkit config get analyticsOptOut +``` + +## Best Practices + +### Development Workflow + +1. **Start with the Developer UI** - Use the interactive interface to experiment with models and prompts +2. **Use watch mode** - Include `--watch` flags in your start commands to see changes immediately +3. **Test thoroughly** - Use the flow runner to test different input scenarios +4. **Monitor traces** - Inspect execution traces to understand performance and debug issues +5. **Evaluate regularly** - Use evaluation tools to measure and improve your AI application quality + +### Debugging Tips + + + + - Use `console.log()` statements in your flows - they'll appear in the Developer UI + - Check the trace inspector for detailed execution information + - Use the model runner to isolate model-specific issues + - Test prompts independently before integrating them into flows + + + - Use `log.Printf()` for debugging output + - Check traces in the Developer UI for execution details + - Test individual components before integrating them + - Use the monitoring features to track performance + + + - Use `print()` statements for debugging + - Test components individually in the Developer UI + - Monitor application logs for errors and performance issues + + + +### Common Patterns + +Regardless of your language, the development pattern is consistent: + +1. **Start your application** with `genkit start -- ` +2. **Open the Developer UI** at `http://localhost:4000` +3. **Test components** using the various runners +4. **Inspect traces** to understand execution flow +5. **Iterate and improve** based on results + +## Next Steps + +- Learn about [creating flows](/unified-docs/creating-flows) to build testable AI workflows +- Explore [evaluation](/unified-docs/evaluation) to measure and improve your application quality +- See [generating content](/unified-docs/generating-content) to understand model interactions +- Check out [tool calling](/unified-docs/tool-calling) for building interactive AI agents diff --git a/src/content/docs/unified-docs/error-handling.mdx b/src/content/docs/unified-docs/error-handling.mdx new file mode 100644 index 00000000..e84c4cf9 --- /dev/null +++ b/src/content/docs/unified-docs/error-handling.mdx @@ -0,0 +1,646 @@ +--- +title: Error handling +description: Learn about error handling in Genkit, including specialized error types, best practices, and debugging techniques across JavaScript, Go, and Python. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +Proper error handling is crucial for building robust AI applications. Genkit provides different error handling mechanisms and best practices across languages to help you build reliable and secure applications. + +## Availability and Approach + + + + JavaScript provides specialized error types and comprehensive error handling: + - `GenkitError` for internal framework errors + - `UserFacingError` for application-level errors + - Automatic error sanitization in web hosting plugins + - Built-in error tracing and debugging + - Security-focused error handling + + + Go uses standard Go error handling patterns: + - Standard `error` interface for all errors + - Custom error types for specific scenarios + - Error wrapping and unwrapping + - Context-aware error handling + - Structured error information + + + Python uses standard exception handling: + - Built-in exception types + - Custom exception classes + - Try-catch error handling + - Exception chaining and context + - Framework-specific error handling + + + +## Error types and classification + + + + Genkit knows about two specialized types: `GenkitError` and `UserFacingError`. The separation between these two error types helps you better understand where your error is coming from. + + ### GenkitError + + `GenkitError` is intended for use by Genkit itself or Genkit plugins. These represent internal framework errors: + + ```typescript + import { GenkitError } from 'genkit'; + + // Example of a GenkitError (typically thrown by the framework) + throw new GenkitError({ + status: 'INVALID_ARGUMENT', + message: 'Model configuration is invalid', + details: { modelName: 'invalid-model' } + }); + ``` + + ### UserFacingError + + `UserFacingError` is intended for [`ContextProviders`](/docs/deploy-node) and your application code. These represent application-level errors that can be safely shown to users: + + ```typescript + import { UserFacingError } from 'genkit'; + + const myFlow = ai.defineFlow({ + name: 'userFlow', + inputSchema: z.object({ userId: z.string() }), + outputSchema: z.string(), + }, async (input) => { + const user = await getUserById(input.userId); + + if (!user) { + throw new UserFacingError({ + status: 'NOT_FOUND', + message: 'User not found', + }); + } + + return `Hello, ${user.name}!`; + }); + ``` + + ### Error sanitization + + Genkit plugins for web hosting (e.g. [`@genkit-ai/express`](https://js.api.genkit.dev/modules/_genkit-ai_express.html) or [`@genkit-ai/next`](https://js.api.genkit.dev/modules/_genkit-ai_next.html)) SHOULD capture all other Error types and instead report them as an internal error in the response. This adds a layer of security to your application by ensuring that internal details of your application do not leak to attackers. + + ```typescript + // Internal errors are automatically sanitized + const myFlow = ai.defineFlow({ + name: 'secureFlow', + inputSchema: z.string(), + outputSchema: z.string(), + }, async (input) => { + try { + const result = await someInternalOperation(input); + return result; + } catch (error) { + // This will be caught by the web hosting plugin + // and converted to a generic "Internal Error" response + throw new Error('Database connection failed: ' + sensitiveInfo); + } + }); + ``` + + + Go uses the standard error interface for all error handling. You can create custom error types for specific scenarios: + + ```go + import ( + "errors" + "fmt" + ) + + // Custom error types + type ValidationError struct { + Field string + Message string + } + + func (e *ValidationError) Error() string { + return fmt.Sprintf("validation error in field %s: %s", e.Field, e.Message) + } + + type NotFoundError struct { + Resource string + ID string + } + + func (e *NotFoundError) Error() string { + return fmt.Sprintf("%s with ID %s not found", e.Resource, e.ID) + } + + // Usage in flows + func myFlow(ctx context.Context, input FlowInput) (string, error) { + if input.UserID == "" { + return "", &ValidationError{ + Field: "userID", + Message: "userID is required", + } + } + + user, err := getUserByID(ctx, input.UserID) + if err != nil { + return "", fmt.Errorf("failed to get user: %w", err) + } + + if user == nil { + return "", &NotFoundError{ + Resource: "User", + ID: input.UserID, + } + } + + return fmt.Sprintf("Hello, %s!", user.Name), nil + } + ``` + + ### Error wrapping + + Use error wrapping to preserve error context: + + ```go + func processData(ctx context.Context, data string) error { + if err := validateData(data); err != nil { + return fmt.Errorf("data validation failed: %w", err) + } + + if err := saveData(ctx, data); err != nil { + return fmt.Errorf("failed to save data: %w", err) + } + + return nil + } + + // Check for specific error types + func handleError(err error) { + var validationErr *ValidationError + if errors.As(err, &validationErr) { + log.Printf("Validation error: %s", validationErr.Message) + return + } + + var notFoundErr *NotFoundError + if errors.As(err, ¬FoundErr) { + log.Printf("Resource not found: %s", notFoundErr.Resource) + return + } + + log.Printf("Unknown error: %v", err) + } + ``` + + + Python uses standard exception handling with custom exception classes: + + ```python + from genkit.types import GenkitError + + # Custom exception classes + class ValidationError(Exception): + def __init__(self, field: str, message: str): + self.field = field + self.message = message + super().__init__(f"Validation error in field {field}: {message}") + + class NotFoundError(Exception): + def __init__(self, resource: str, resource_id: str): + self.resource = resource + self.resource_id = resource_id + super().__init__(f"{resource} with ID {resource_id} not found") + + # Usage in flows + @ai.flow() + async def my_flow(input_data: dict, ctx): + if not input_data.get('user_id'): + raise ValidationError('user_id', 'user_id is required') + + try: + user = await get_user_by_id(input_data['user_id']) + except Exception as e: + raise GenkitError( + status='INTERNAL', + message='Failed to retrieve user' + ) from e + + if not user: + raise NotFoundError('User', input_data['user_id']) + + return f"Hello, {user['name']}!" + + # Error handling + try: + result = await my_flow({'user_id': '123'}, ctx) + except ValidationError as e: + print(f"Validation error: {e.message}") + except NotFoundError as e: + print(f"Resource not found: {e.resource}") + except GenkitError as e: + print(f"Genkit error: {e.message}") + except Exception as e: + print(f"Unexpected error: {e}") + ``` + + + +## Error handling in flows + + + + Implement comprehensive error handling in your flows: + + ```typescript + const robustFlow = ai.defineFlow({ + name: 'robustFlow', + inputSchema: z.object({ + text: z.string(), + options: z.object({ + maxRetries: z.number().default(3), + }).optional(), + }), + outputSchema: z.string(), + }, async (input, { logger }) => { + const maxRetries = input.options?.maxRetries ?? 3; + let lastError: Error | null = null; + + for (let attempt = 1; attempt <= maxRetries; attempt++) { + try { + logger.info(`Attempt ${attempt} of ${maxRetries}`); + + const result = await ai.generate({ + prompt: `Process this text: ${input.text}`, + }); + + if (!result.text) { + throw new UserFacingError({ + status: 'FAILED_PRECONDITION', + message: 'Generated content is empty', + }); + } + + return result.text; + } catch (error) { + lastError = error as Error; + logger.warn(`Attempt ${attempt} failed:`, { error: error.message }); + + // Don't retry for user-facing errors + if (error instanceof UserFacingError) { + throw error; + } + + // Wait before retrying (exponential backoff) + if (attempt < maxRetries) { + await new Promise(resolve => setTimeout(resolve, Math.pow(2, attempt) * 1000)); + } + } + } + + // All retries failed + throw new UserFacingError({ + status: 'DEADLINE_EXCEEDED', + message: 'Operation failed after multiple attempts', + details: { attempts: maxRetries, lastError: lastError?.message }, + }); + }); + ``` + + + Implement robust error handling with retries and proper error classification: + + ```go + func robustFlow(ctx context.Context, input FlowInput) (string, error) { + maxRetries := 3 + if input.Options != nil && input.Options.MaxRetries > 0 { + maxRetries = input.Options.MaxRetries + } + + var lastErr error + + for attempt := 1; attempt <= maxRetries; attempt++ { + log.Printf("Attempt %d of %d", attempt, maxRetries) + + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("Process this text: " + input.Text), + ) + + if err != nil { + lastErr = err + log.Printf("Attempt %d failed: %v", attempt, err) + + // Check if it's a permanent error (don't retry) + var validationErr *ValidationError + if errors.As(err, &validationErr) { + return "", err // Don't retry validation errors + } + + // Wait before retrying (exponential backoff) + if attempt < maxRetries { + backoff := time.Duration(math.Pow(2, float64(attempt))) * time.Second + select { + case <-time.After(backoff): + continue + case <-ctx.Done(): + return "", ctx.Err() + } + } + continue + } + + if resp.Text() == "" { + return "", &ValidationError{ + Field: "output", + Message: "generated content is empty", + } + } + + return resp.Text(), nil + } + + return "", fmt.Errorf("operation failed after %d attempts, last error: %w", maxRetries, lastErr) + } + ``` + + + Implement comprehensive error handling with retries: + + ```python + import asyncio + from typing import Optional + + @ai.flow() + async def robust_flow(input_data: dict, ctx): + text = input_data.get('text', '') + max_retries = input_data.get('options', {}).get('max_retries', 3) + + last_error = None + + for attempt in range(1, max_retries + 1): + try: + logger.info(f"Attempt {attempt} of {max_retries}") + + result = await ai.generate( + prompt=f"Process this text: {text}", + ) + + if not result.text: + raise ValidationError('output', 'generated content is empty') + + return result.text + + except ValidationError: + # Don't retry validation errors + raise + except Exception as e: + last_error = e + logger.warning(f"Attempt {attempt} failed: {e}") + + # Wait before retrying (exponential backoff) + if attempt < max_retries: + backoff = 2 ** attempt + await asyncio.sleep(backoff) + + # All retries failed + raise GenkitError( + status='DEADLINE_EXCEEDED', + message=f'Operation failed after {max_retries} attempts', + details={'last_error': str(last_error)} + ) + ``` + + + +## Error handling in tools + + + + Tools should handle errors gracefully and provide meaningful feedback: + + ```typescript + const databaseTool = ai.defineTool({ + name: 'databaseQuery', + description: 'Query the database for information', + inputSchema: z.object({ + query: z.string(), + table: z.string(), + }), + outputSchema: z.array(z.record(z.any())), + }, async (input, { logger }) => { + try { + // Validate input + if (!input.query.trim()) { + throw new UserFacingError({ + status: 'INVALID_ARGUMENT', + message: 'Query cannot be empty', + }); + } + + // Execute query + const results = await database.query(input.query, input.table); + + logger.info('Database query executed successfully', { + table: input.table, + resultCount: results.length, + }); + + return results; + } catch (error) { + if (error instanceof UserFacingError) { + throw error; + } + + // Log internal errors but don't expose details + logger.error('Database query failed', { + error: error.message, + table: input.table + }); + + throw new UserFacingError({ + status: 'INTERNAL', + message: 'Database query failed', + }); + } + }); + ``` + + + Implement proper error handling in tools: + + ```go + func databaseQueryTool(ctx context.Context, input DatabaseQueryInput) ([]map[string]interface{}, error) { + // Validate input + if strings.TrimSpace(input.Query) == "" { + return nil, &ValidationError{ + Field: "query", + Message: "query cannot be empty", + } + } + + // Execute query + results, err := database.Query(ctx, input.Query, input.Table) + if err != nil { + log.Printf("Database query failed: %v", err) + + // Check for specific database errors + if isDatabaseConnectionError(err) { + return nil, fmt.Errorf("database connection failed: %w", err) + } + + if isDatabaseTimeoutError(err) { + return nil, fmt.Errorf("database query timeout: %w", err) + } + + // Generic database error + return nil, fmt.Errorf("database query failed: %w", err) + } + + log.Printf("Database query executed successfully, returned %d results", len(results)) + return results, nil + } + + // Helper functions to classify database errors + func isDatabaseConnectionError(err error) bool { + // Implementation depends on your database driver + return strings.Contains(err.Error(), "connection") + } + + func isDatabaseTimeoutError(err error) bool { + // Implementation depends on your database driver + return strings.Contains(err.Error(), "timeout") + } + ``` + + + Implement comprehensive error handling in tools: + + ```python + @ai.tool() + def database_query_tool(input_data: dict, ctx) -> list: + """Query the database for information""" + + query = input_data.get('query', '').strip() + table = input_data.get('table', '') + + # Validate input + if not query: + raise ValidationError('query', 'query cannot be empty') + + if not table: + raise ValidationError('table', 'table name is required') + + try: + # Execute query + results = database.query(query, table) + + logger.info(f"Database query executed successfully, returned {len(results)} results") + return results + + except DatabaseConnectionError as e: + logger.error(f"Database connection failed: {e}") + raise GenkitError( + status='UNAVAILABLE', + message='Database connection failed' + ) + except DatabaseTimeoutError as e: + logger.error(f"Database query timeout: {e}") + raise GenkitError( + status='DEADLINE_EXCEEDED', + message='Database query timeout' + ) + except Exception as e: + logger.error(f"Database query failed: {e}") + raise GenkitError( + status='INTERNAL', + message='Database query failed' + ) + ``` + + + +## Best practices + +### Error classification + + + + - Use `UserFacingError` for errors that can be safely shown to users + - Use `GenkitError` for internal framework errors + - Let other errors be automatically sanitized by web hosting plugins + - Provide meaningful error messages and status codes + - Include relevant context in error details + + + - Create custom error types for different error categories + - Use error wrapping to preserve error context + - Implement error classification functions + - Provide structured error information + - Use appropriate logging levels for different error types + + + - Create custom exception classes for different error types + - Use exception chaining to preserve error context + - Implement proper exception handling hierarchies + - Provide meaningful error messages + - Use structured logging for error information + + + +### Security considerations + + + + - Never expose internal system details in user-facing errors + - Use error sanitization in production environments + - Log detailed error information for debugging + - Implement rate limiting for error-prone operations + - Validate all inputs to prevent injection attacks + + + - Sanitize error messages before returning to clients + - Use structured logging to separate internal and external error details + - Implement proper input validation + - Use context timeouts to prevent resource exhaustion + - Implement circuit breakers for external dependencies + + + - Sanitize exception messages in production + - Use different exception types for internal vs external errors + - Implement proper input validation and sanitization + - Use async timeouts for long-running operations + - Implement retry logic with exponential backoff + + + +### Debugging and monitoring + + + + - Use the Developer UI to inspect error traces + - Implement comprehensive logging with error context + - Set up error monitoring and alerting + - Use error tracking services for production + - Include correlation IDs for distributed tracing + + + - Use structured logging for better error analysis + - Implement error metrics and monitoring + - Use distributed tracing for complex workflows + - Set up alerting for error rate thresholds + - Include request IDs for error correlation + + + - Use structured logging for error analysis + - Implement error tracking and monitoring + - Use APM tools for error insights + - Set up alerting for error patterns + - Include trace IDs for error correlation + + + +## Next steps + +- Learn about [observability and monitoring](/unified-docs/observability-monitoring) for tracking and debugging errors +- Explore [developer tools](/unified-docs/developer-tools) for debugging error scenarios +- See [creating flows](/unified-docs/creating-flows) for implementing robust error handling in workflows +- Check out [tool calling](/unified-docs/tool-calling) for error handling in tool implementations diff --git a/src/content/docs/unified-docs/frameworks/express.mdx b/src/content/docs/unified-docs/frameworks/express.mdx new file mode 100644 index 00000000..9de7a5ad --- /dev/null +++ b/src/content/docs/unified-docs/frameworks/express.mdx @@ -0,0 +1,970 @@ +--- +title: Express.js Integration +description: Learn how to integrate Genkit with Express.js applications across JavaScript, Go, and Python, including REST API endpoints, authentication, and deployment strategies. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; +import { Badge } from '@astrojs/starlight/components'; + + + +The Express.js integration allows you to expose Genkit flows and actions as REST API endpoints, making it easy to integrate AI capabilities into existing Express-based backends or deploy them to any platform that supports Express.js applications. + +:::note[Framework Availability] +Express.js integration is primarily available for JavaScript/Node.js. For other languages, see equivalent frameworks: +- **Go**: [Gin](/unified-docs/frameworks/gin) or standard `net/http` +- **Python**: [Flask](/unified-docs/frameworks/flask) or [FastAPI](/unified-docs/frameworks/fastapi) +::: + +## Installation and Setup + + + + Install the Express plugin: + + ```bash + npm install @genkit-ai/express express + npm install -D @types/express # if using TypeScript + ``` + + Basic setup with Express integration: + + ```ts + import { genkit, z } from 'genkit'; + import { googleAI } from '@genkit-ai/googleai'; + import { expressHandler } from '@genkit-ai/express'; + import express from 'express'; + + const ai = genkit({ + plugins: [googleAI()], + model: googleAI.model('gemini-2.5-flash'), + }); + + // Define a Genkit flow + const simpleFlow = ai.defineFlow( + { + name: 'simpleFlow', + inputSchema: z.object({ input: z.string() }), + outputSchema: z.object({ output: z.string() }), + }, + async ({ input }, { sendChunk }) => { + const { text } = await ai.generate({ + prompt: input, + onChunk: (c) => sendChunk(c.text), + }); + return { output: text }; + }, + ); + + // Create Express app + const app = express(); + app.use(express.json()); + + // Expose Genkit flow as REST endpoint + app.post('/simpleFlow', expressHandler(simpleFlow)); + + app.listen(8080, () => { + console.log('Express server listening on port 8080'); + }); + ``` + + + For Go applications, use Gin or standard HTTP handlers. Here's an equivalent setup: + + ```go + package main + + import ( + "context" + "encoding/json" + "net/http" + "github.com/gin-gonic/gin" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/plugins/googlegenai" + ) + + type FlowInput struct { + Input string `json:"input"` + } + + type FlowOutput struct { + Output string `json:"output"` + } + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlegenai.GoogleAI{}), + ) + if err != nil { + log.Fatal(err) + } + + // Define flow + simpleFlow := genkit.DefineFlow(g, "simpleFlow", + func(ctx context.Context, input FlowInput) (FlowOutput, error) { + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt(input.Input), + ) + if err != nil { + return FlowOutput{}, err + } + return FlowOutput{Output: resp.Text()}, nil + }, + ) + + // Setup Gin router + r := gin.Default() + + r.POST("/simpleFlow", func(c *gin.Context) { + var input FlowInput + if err := c.ShouldBindJSON(&input); err != nil { + c.JSON(400, gin.H{"error": err.Error()}) + return + } + + result, err := simpleFlow.Run(ctx, input) + if err != nil { + c.JSON(500, gin.H{"error": err.Error()}) + return + } + + c.JSON(200, result) + }) + + r.Run(":8080") + } + ``` + + + For Python applications, use Flask or FastAPI. Here's a Flask equivalent: + + ```python + from flask import Flask, request, jsonify + from genkit.ai import Genkit + from genkit.plugins.google_genai import GoogleGenai + + ai = Genkit( + plugins=[GoogleGenai()], + ) + + # Define flow + @ai.define_flow( + input_schema={"input": str}, + output_schema={"output": str} + ) + async def simple_flow(input_data): + response = await ai.generate(input_data["input"]) + return {"output": response.text} + + # Create Flask app + app = Flask(__name__) + + @app.route('/simpleFlow', methods=['POST']) + async def handle_simple_flow(): + try: + input_data = request.get_json() + result = await simple_flow(input_data) + return jsonify(result) + except Exception as e: + return jsonify({"error": str(e)}), 500 + + if __name__ == '__main__': + app.run(host='0.0.0.0', port=8080) + ``` + + + +## Client Integration + + + + Access your Express-hosted flows from client applications: + + ```ts + import { runFlow, streamFlow } from 'genkit/beta/client'; + + // Basic flow execution + const result = await runFlow({ + url: 'http://localhost:8080/simpleFlow', + input: { input: 'Tell me a joke about programming' }, + }); + console.log(result); // { output: "Why do programmers prefer dark mode?..." } + + // Streaming flow execution + const streamResult = streamFlow({ + url: 'http://localhost:8080/simpleFlow', + input: { input: 'Write a story about AI' }, + }); + + for await (const chunk of streamResult.stream) { + console.log('Chunk:', chunk); + } + + const finalResult = await streamResult.output; + console.log('Final result:', finalResult); + ``` + + ### Frontend Integration + + ```ts + // React component example + import React, { useState } from 'react'; + import { runFlow } from 'genkit/beta/client'; + + function AIChat() { + const [input, setInput] = useState(''); + const [output, setOutput] = useState(''); + const [loading, setLoading] = useState(false); + + const handleSubmit = async (e: React.FormEvent) => { + e.preventDefault(); + setLoading(true); + + try { + const result = await runFlow({ + url: '/api/simpleFlow', + input: { input }, + }); + setOutput(result.output); + } catch (error) { + console.error('Error:', error); + } finally { + setLoading(false); + } + }; + + return ( +
+ setInput(e.target.value)} + placeholder="Ask me anything..." + /> + + {output &&
{output}
} +
+ ); + } + ``` +
+ + Access your Go HTTP endpoints from client applications: + + ```go + package main + + import ( + "bytes" + "encoding/json" + "fmt" + "net/http" + ) + + type FlowRequest struct { + Input string `json:"input"` + } + + type FlowResponse struct { + Output string `json:"output"` + } + + func callFlow(input string) (*FlowResponse, error) { + reqBody := FlowRequest{Input: input} + jsonData, err := json.Marshal(reqBody) + if err != nil { + return nil, err + } + + resp, err := http.Post( + "http://localhost:8080/simpleFlow", + "application/json", + bytes.NewBuffer(jsonData), + ) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + var result FlowResponse + err = json.NewDecoder(resp.Body).Decode(&result) + if err != nil { + return nil, err + } + + return &result, nil + } + + func main() { + result, err := callFlow("Tell me a joke about programming") + if err != nil { + fmt.Printf("Error: %v\n", err) + return + } + fmt.Printf("Response: %s\n", result.Output) + } + ``` + + + Access your Python Flask endpoints from client applications: + + ```python + import requests + import asyncio + import aiohttp + + # Synchronous client + def call_flow(input_text): + response = requests.post( + 'http://localhost:8080/simpleFlow', + json={'input': input_text} + ) + response.raise_for_status() + return response.json() + + # Asynchronous client + async def call_flow_async(input_text): + async with aiohttp.ClientSession() as session: + async with session.post( + 'http://localhost:8080/simpleFlow', + json={'input': input_text} + ) as response: + response.raise_for_status() + return await response.json() + + # Usage + result = call_flow("Tell me a joke about programming") + print(result['output']) + + # Async usage + async def main(): + result = await call_flow_async("Write a story about AI") + print(result['output']) + + asyncio.run(main()) + ``` + +
+ +## Authentication and Security + + + + Implement authentication for your Express endpoints: + + ### API Key Authentication + + ```ts + import { apiKey } from 'genkit/context'; + import { startFlowServer, withContextProvider } from '@genkit-ai/express'; + + const securedFlow = ai.defineFlow( + { + name: 'securedFlow', + inputSchema: z.object({ sensitiveData: z.string() }), + outputSchema: z.object({ output: z.string() }), + }, + async ({ sensitiveData }, { context }) => { + // Flow is automatically secured by API key check + return { output: 'This is protected content' }; + } + ); + + // Secure with API key + startFlowServer({ + flows: [withContextProvider(securedFlow, apiKey(process.env.MY_API_KEY))], + port: 8080, + }); + ``` + + ### Custom Authentication + + ```ts + import { ContextProvider, RequestData, UserFacingError } from 'genkit/context'; + + interface AuthContext { + auth?: { + user: string; + role: string; + }; + } + + const customAuth: ContextProvider = async (req: RequestData) => { + const token = req.headers['authorization']?.replace('Bearer ', ''); + + if (!token) { + throw new UserFacingError('UNAUTHENTICATED', 'Missing authorization token'); + } + + // Verify token (implement your own logic) + const user = await verifyJWT(token); + + return { + auth: { + user: user.id, + role: user.role, + }, + }; + }; + + const protectedFlow = ai.defineFlow( + { + name: 'protectedFlow', + inputSchema: z.object({ input: z.string() }), + outputSchema: z.object({ output: z.string() }), + }, + async ({ input }, { context }) => { + if (!context.auth || context.auth.role !== 'admin') { + throw new Error('Admin access required'); + } + + return { output: `Hello ${context.auth.user}, you said: ${input}` }; + } + ); + + startFlowServer({ + flows: [withContextProvider(protectedFlow, customAuth)], + }); + ``` + + ### Express Middleware Integration + + ```ts + import express from 'express'; + import jwt from 'jsonwebtoken'; + + const app = express(); + app.use(express.json()); + + // Custom auth middleware + const authMiddleware = (req: express.Request, res: express.Response, next: express.NextFunction) => { + const token = req.headers.authorization?.replace('Bearer ', ''); + + if (!token) { + return res.status(401).json({ error: 'Missing token' }); + } + + try { + const decoded = jwt.verify(token, process.env.JWT_SECRET!); + req.user = decoded; + next(); + } catch (error) { + return res.status(401).json({ error: 'Invalid token' }); + } + }; + + // Protected endpoint + app.post('/protectedFlow', authMiddleware, expressHandler(protectedFlow)); + ``` + + + Implement authentication in Go applications: + + ```go + import ( + "net/http" + "strings" + "github.com/gin-gonic/gin" + "github.com/golang-jwt/jwt/v4" + ) + + func authMiddleware() gin.HandlerFunc { + return func(c *gin.Context) { + authHeader := c.GetHeader("Authorization") + if authHeader == "" { + c.JSON(401, gin.H{"error": "Missing authorization header"}) + c.Abort() + return + } + + tokenString := strings.Replace(authHeader, "Bearer ", "", 1) + + token, err := jwt.Parse(tokenString, func(token *jwt.Token) (interface{}, error) { + return []byte(os.Getenv("JWT_SECRET")), nil + }) + + if err != nil || !token.Valid { + c.JSON(401, gin.H{"error": "Invalid token"}) + c.Abort() + return + } + + if claims, ok := token.Claims.(jwt.MapClaims); ok { + c.Set("user", claims) + } + + c.Next() + } + } + + func main() { + r := gin.Default() + + // Protected route + r.POST("/protectedFlow", authMiddleware(), func(c *gin.Context) { + user, _ := c.Get("user") + // Handle protected flow logic + c.JSON(200, gin.H{"message": "Protected content", "user": user}) + }) + + r.Run(":8080") + } + ``` + + + Implement authentication in Flask applications: + + ```python + from flask import Flask, request, jsonify + from functools import wraps + import jwt + import os + + app = Flask(__name__) + + def auth_required(f): + @wraps(f) + def decorated_function(*args, **kwargs): + token = request.headers.get('Authorization') + if not token: + return jsonify({'error': 'Missing authorization header'}), 401 + + try: + token = token.replace('Bearer ', '') + payload = jwt.decode(token, os.getenv('JWT_SECRET'), algorithms=['HS256']) + request.user = payload + except jwt.InvalidTokenError: + return jsonify({'error': 'Invalid token'}), 401 + + return f(*args, **kwargs) + return decorated_function + + @app.route('/protectedFlow', methods=['POST']) + @auth_required + async def protected_flow(): + user = request.user + input_data = request.get_json() + + # Handle protected flow logic + result = await handle_protected_flow(input_data, user) + return jsonify(result) + ``` + + + +## Advanced Features + +### Multiple Flows and Server Configuration + + + + Use `startFlowServer` for multiple flows with advanced configuration: + + ```ts + import { startFlowServer } from '@genkit-ai/express'; + + const chatFlow = ai.defineFlow( + { + name: 'chatFlow', + inputSchema: z.object({ message: z.string() }), + outputSchema: z.object({ response: z.string() }), + }, + async ({ message }) => { + const { text } = await ai.generate({ + prompt: `Respond to this message: ${message}`, + }); + return { response: text }; + } + ); + + const summaryFlow = ai.defineFlow( + { + name: 'summaryFlow', + inputSchema: z.object({ text: z.string() }), + outputSchema: z.object({ summary: z.string() }), + }, + async ({ text }) => { + const { text: summary } = await ai.generate({ + prompt: `Summarize this text: ${text}`, + }); + return { summary }; + } + ); + + startFlowServer({ + flows: [chatFlow, summaryFlow], + port: 4567, + cors: { + origin: ['http://localhost:3000', 'https://myapp.com'], + credentials: true, + }, + pathPrefix: '/api/v1', + jsonParserOptions: { + limit: '10mb', + }, + }); + ``` + + + Configure multiple endpoints with Gin: + + ```go + func setupRoutes() *gin.Engine { + r := gin.Default() + + // CORS middleware + r.Use(func(c *gin.Context) { + c.Header("Access-Control-Allow-Origin", "*") + c.Header("Access-Control-Allow-Methods", "POST, GET, OPTIONS") + c.Header("Access-Control-Allow-Headers", "Content-Type, Authorization") + + if c.Request.Method == "OPTIONS" { + c.AbortWithStatus(204) + return + } + + c.Next() + }) + + // API group + api := r.Group("/api/v1") + { + api.POST("/chat", handleChatFlow) + api.POST("/summary", handleSummaryFlow) + } + + return r + } + + func main() { + r := setupRoutes() + r.Run(":4567") + } + ``` + + + Configure multiple endpoints with Flask: + + ```python + from flask import Flask + from flask_cors import CORS + + app = Flask(__name__) + CORS(app, origins=['http://localhost:3000', 'https://myapp.com']) + + @app.route('/api/v1/chat', methods=['POST']) + async def chat_flow(): + input_data = request.get_json() + result = await handle_chat_flow(input_data) + return jsonify(result) + + @app.route('/api/v1/summary', methods=['POST']) + async def summary_flow(): + input_data = request.get_json() + result = await handle_summary_flow(input_data) + return jsonify(result) + + if __name__ == '__main__': + app.run(host='0.0.0.0', port=4567) + ``` + + + +### Error Handling and Validation + + + + Implement comprehensive error handling: + + ```ts + import { UserFacingError } from 'genkit'; + + const robustFlow = ai.defineFlow( + { + name: 'robustFlow', + inputSchema: z.object({ + text: z.string().min(1).max(1000), + options: z.object({ + temperature: z.number().min(0).max(2).optional(), + }).optional(), + }), + outputSchema: z.object({ result: z.string() }), + }, + async ({ text, options }, { context }) => { + try { + const { text: result } = await ai.generate({ + prompt: text, + config: { + temperature: options?.temperature ?? 0.7, + }, + }); + + return { result }; + } catch (error) { + if (error.message.includes('rate limit')) { + throw new UserFacingError('RESOURCE_EXHAUSTED', 'Rate limit exceeded. Please try again later.'); + } + + throw new UserFacingError('INTERNAL', 'An unexpected error occurred.'); + } + } + ); + + // Custom error handling middleware + app.use((error: any, req: express.Request, res: express.Response, next: express.NextFunction) => { + if (error instanceof UserFacingError) { + return res.status(400).json({ + error: error.message, + code: error.code, + }); + } + + console.error('Unexpected error:', error); + res.status(500).json({ + error: 'Internal server error', + }); + }); + ``` + + + Implement error handling in Go: + + ```go + type ErrorResponse struct { + Error string `json:"error"` + Code string `json:"code,omitempty"` + } + + func handleFlowWithValidation(c *gin.Context) { + var input struct { + Text string `json:"text" binding:"required,min=1,max=1000"` + Options struct { + Temperature *float64 `json:"temperature,omitempty"` + } `json:"options,omitempty"` + } + + if err := c.ShouldBindJSON(&input); err != nil { + c.JSON(400, ErrorResponse{ + Error: "Invalid input: " + err.Error(), + Code: "INVALID_ARGUMENT", + }) + return + } + + result, err := processFlow(input.Text, input.Options.Temperature) + if err != nil { + if strings.Contains(err.Error(), "rate limit") { + c.JSON(429, ErrorResponse{ + Error: "Rate limit exceeded. Please try again later.", + Code: "RESOURCE_EXHAUSTED", + }) + return + } + + c.JSON(500, ErrorResponse{ + Error: "Internal server error", + Code: "INTERNAL", + }) + return + } + + c.JSON(200, gin.H{"result": result}) + } + ``` + + + Implement error handling in Flask: + + ```python + from flask import Flask, request, jsonify + from marshmallow import Schema, fields, ValidationError + + class FlowInputSchema(Schema): + text = fields.Str(required=True, validate=lambda x: 1 <= len(x) <= 1000) + options = fields.Dict(missing={}) + + schema = FlowInputSchema() + + @app.route('/robustFlow', methods=['POST']) + async def robust_flow(): + try: + # Validate input + input_data = schema.load(request.get_json()) + + # Process flow + result = await process_flow( + input_data['text'], + input_data['options'].get('temperature', 0.7) + ) + + return jsonify({'result': result}) + + except ValidationError as err: + return jsonify({ + 'error': 'Invalid input', + 'details': err.messages, + 'code': 'INVALID_ARGUMENT' + }), 400 + + except Exception as err: + if 'rate limit' in str(err): + return jsonify({ + 'error': 'Rate limit exceeded. Please try again later.', + 'code': 'RESOURCE_EXHAUSTED' + }), 429 + + return jsonify({ + 'error': 'Internal server error', + 'code': 'INTERNAL' + }), 500 + ``` + + + +## Deployment Considerations + +### Production Configuration + + + + Production-ready Express setup: + + ```ts + import helmet from 'helmet'; + import compression from 'compression'; + import rateLimit from 'express-rate-limit'; + + const app = express(); + + // Security middleware + app.use(helmet()); + app.use(compression()); + + // Rate limiting + const limiter = rateLimit({ + windowMs: 15 * 60 * 1000, // 15 minutes + max: 100, // limit each IP to 100 requests per windowMs + message: 'Too many requests from this IP', + }); + app.use('/api/', limiter); + + // Body parsing with limits + app.use(express.json({ limit: '10mb' })); + + // Health check endpoint + app.get('/health', (req, res) => { + res.json({ status: 'healthy', timestamp: new Date().toISOString() }); + }); + + // Start server + const port = process.env.PORT || 8080; + app.listen(port, () => { + console.log(`Server running on port ${port}`); + }); + ``` + + + Production-ready Go setup: + + ```go + import ( + "time" + "github.com/gin-contrib/cors" + "github.com/gin-contrib/gzip" + "golang.org/x/time/rate" + ) + + func setupProductionServer() *gin.Engine { + gin.SetMode(gin.ReleaseMode) + r := gin.New() + + // Middleware + r.Use(gin.Logger()) + r.Use(gin.Recovery()) + r.Use(gzip.Gzip(gzip.DefaultCompression)) + + // CORS + r.Use(cors.New(cors.Config{ + AllowOrigins: []string{"https://myapp.com"}, + AllowMethods: []string{"POST", "GET"}, + AllowHeaders: []string{"Content-Type", "Authorization"}, + ExposeHeaders: []string{"Content-Length"}, + AllowCredentials: true, + MaxAge: 12 * time.Hour, + })) + + // Rate limiting middleware + limiter := rate.NewLimiter(rate.Every(time.Minute), 60) + r.Use(func(c *gin.Context) { + if !limiter.Allow() { + c.JSON(429, gin.H{"error": "Rate limit exceeded"}) + c.Abort() + return + } + c.Next() + }) + + // Health check + r.GET("/health", func(c *gin.Context) { + c.JSON(200, gin.H{ + "status": "healthy", + "timestamp": time.Now().Format(time.RFC3339), + }) + }) + + return r + } + ``` + + + Production-ready Flask setup: + + ```python + from flask import Flask + from flask_cors import CORS + from flask_limiter import Limiter + from flask_limiter.util import get_remote_address + import os + + app = Flask(__name__) + + # CORS + CORS(app, origins=['https://myapp.com']) + + # Rate limiting + limiter = Limiter( + app, + key_func=get_remote_address, + default_limits=["100 per hour"] + ) + + @app.route('/health') + def health_check(): + return jsonify({ + 'status': 'healthy', + 'timestamp': datetime.utcnow().isoformat() + }) + + @app.errorhandler(429) + def ratelimit_handler(e): + return jsonify({'error': 'Rate limit exceeded'}), 429 + + if __name__ == '__main__': + port = int(os.environ.get('PORT', 8080)) + app.run(host='0.0.0.0', port=port, debug=False) + ``` + + + +## Next Steps + +- Learn about [creating flows](/unified-docs/creating-flows) to build more complex AI workflows +- Explore [authentication patterns](/unified-docs/auth) for securing your applications +- See [deployment guides](/unified-docs/deployment) for production deployment strategies +- Check out other framework integrations: + - [Next.js](/unified-docs/frameworks/nextjs) for React applications + - [Gin](/unified-docs/frameworks/gin) for Go applications + - [Flask](/unified-docs/frameworks/flask) for Python applications diff --git a/src/content/docs/unified-docs/frameworks/nextjs.mdx b/src/content/docs/unified-docs/frameworks/nextjs.mdx new file mode 100644 index 00000000..23f1e2b0 --- /dev/null +++ b/src/content/docs/unified-docs/frameworks/nextjs.mdx @@ -0,0 +1,1143 @@ +--- +title: Next.js Integration +description: Learn how to integrate Genkit with Next.js applications across JavaScript, Go, and Python, including API routes, client-side calls, streaming, and deployment strategies. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; +import { Badge } from '@astrojs/starlight/components'; + + + +The Next.js integration provides a seamless way to build full-stack AI applications with Genkit, offering both server-side API routes and client-side streaming capabilities for modern React applications. + +:::note[Framework Availability] +Next.js integration is primarily available for JavaScript/Node.js. For other languages, see equivalent frameworks: +- **Go**: [Gin](/unified-docs/frameworks/gin) with React frontend +- **Python**: [FastAPI](/unified-docs/frameworks/fastapi) with React frontend +::: + +## Installation and Setup + + + + ### Create a Next.js Project + + If you don't have an existing Next.js project: + + ```bash + npx create-next-app@latest my-genkit-app --src-dir --typescript + cd my-genkit-app + ``` + + ### Install Dependencies + + ```bash + # Core Genkit and Next.js plugin + npm install genkit @genkit-ai/next + + # Choose your AI provider + npm install @genkit-ai/googleai + # or npm install @genkit-ai/vertexai + # or npm install @genkit-ai/compat-oai + + # Development tools (optional) + npm install -g genkit-cli + npm install --save-dev tsx + ``` + + ### Project Structure + + ``` + my-genkit-app/ + ├── src/ + │ ├── app/ + │ │ ├── api/ + │ │ │ └── flows/ + │ │ │ └── route.ts + │ │ └── page.tsx + │ └── genkit/ + │ └── flows.ts + ├── package.json + └── next.config.js + ``` + + + For Go applications, create a separate backend API and React frontend: + + ### Backend Setup (Go) + + ```bash + mkdir my-genkit-app + cd my-genkit-app + mkdir backend frontend + ``` + + ```go + // backend/main.go + package main + + import ( + "context" + "github.com/gin-gonic/gin" + "github.com/gin-contrib/cors" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/googlegenai" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlegenai.GoogleAI{}), + ) + if err != nil { + log.Fatal(err) + } + + r := gin.Default() + + // CORS for React frontend + r.Use(cors.New(cors.Config{ + AllowOrigins: []string{"http://localhost:3000"}, + AllowMethods: []string{"POST", "GET", "OPTIONS"}, + AllowHeaders: []string{"Content-Type", "Authorization"}, + AllowCredentials: true, + })) + + // API routes + r.POST("/api/flows/menuSuggestion", handleMenuSuggestion) + + r.Run(":8080") + } + ``` + + ### Frontend Setup (React) + + ```bash + cd frontend + npx create-next-app@latest . --typescript + npm install + ``` + + + For Python applications, create a FastAPI backend with React frontend: + + ### Backend Setup (Python) + + ```bash + mkdir my-genkit-app + cd my-genkit-app + mkdir backend frontend + + cd backend + pip install fastapi uvicorn genkit-plugin-google-genai + ``` + + ```python + # backend/main.py + from fastapi import FastAPI + from fastapi.middleware.cors import CORSMiddleware + from genkit.ai import Genkit + from genkit.plugins.google_genai import GoogleGenai + + ai = Genkit(plugins=[GoogleGenai()]) + + app = FastAPI() + + # CORS for React frontend + app.add_middleware( + CORSMiddleware, + allow_origins=["http://localhost:3000"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], + ) + + @app.post("/api/flows/menuSuggestion") + async def menu_suggestion(request: dict): + # Handle flow logic + pass + + if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=8080) + ``` + + ### Frontend Setup (React) + + ```bash + cd ../frontend + npx create-next-app@latest . --typescript + npm install + ``` + + + +## Define Genkit Flows + + + + Create your Genkit flows in `src/genkit/flows.ts`: + + ```ts + import { googleAI } from '@genkit-ai/googleai'; + import { genkit, z } from 'genkit'; + + const ai = genkit({ + plugins: [googleAI()], + }); + + export const menuSuggestionFlow = ai.defineFlow( + { + name: 'menuSuggestionFlow', + inputSchema: z.object({ theme: z.string() }), + outputSchema: z.object({ menuItem: z.string() }), + streamSchema: z.string(), + }, + async ({ theme }, { sendChunk }) => { + const { stream, response } = ai.generateStream({ + model: googleAI.model('gemini-2.5-flash'), + prompt: `Invent a menu item for a ${theme} themed restaurant.`, + }); + + for await (const chunk of stream) { + sendChunk(chunk.text); + } + + const { text } = await response; + return { menuItem: text }; + } + ); + + export const chatFlow = ai.defineFlow( + { + name: 'chatFlow', + inputSchema: z.object({ + message: z.string(), + history: z.array(z.object({ + role: z.enum(['user', 'assistant']), + content: z.string(), + })).optional(), + }), + outputSchema: z.object({ response: z.string() }), + streamSchema: z.string(), + }, + async ({ message, history = [] }, { sendChunk }) => { + const conversationContext = history + .map(msg => `${msg.role}: ${msg.content}`) + .join('\n'); + + const { stream, response } = ai.generateStream({ + model: googleAI.model('gemini-2.5-flash'), + prompt: `${conversationContext}\nuser: ${message}\nassistant:`, + }); + + for await (const chunk of stream) { + sendChunk(chunk.text); + } + + const { text } = await response; + return { response: text }; + } + ); + ``` + + + Define flows in your Go backend: + + ```go + // backend/flows.go + package main + + import ( + "context" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/ai" + ) + + type MenuSuggestionInput struct { + Theme string `json:"theme"` + } + + type MenuSuggestionOutput struct { + MenuItem string `json:"menuItem"` + } + + func handleMenuSuggestion(c *gin.Context) { + var input MenuSuggestionInput + if err := c.ShouldBindJSON(&input); err != nil { + c.JSON(400, gin.H{"error": err.Error()}) + return + } + + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt(fmt.Sprintf("Invent a menu item for a %s themed restaurant.", input.Theme)), + ) + if err != nil { + c.JSON(500, gin.H{"error": err.Error()}) + return + } + + c.JSON(200, MenuSuggestionOutput{ + MenuItem: resp.Text(), + }) + } + + type ChatInput struct { + Message string `json:"message"` + History []struct { + Role string `json:"role"` + Content string `json:"content"` + } `json:"history"` + } + + type ChatOutput struct { + Response string `json:"response"` + } + + func handleChat(c *gin.Context) { + var input ChatInput + if err := c.ShouldBindJSON(&input); err != nil { + c.JSON(400, gin.H{"error": err.Error()}) + return + } + + // Build conversation context + var context strings.Builder + for _, msg := range input.History { + context.WriteString(fmt.Sprintf("%s: %s\n", msg.Role, msg.Content)) + } + context.WriteString(fmt.Sprintf("user: %s\nassistant:", input.Message)) + + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt(context.String()), + ) + if err != nil { + c.JSON(500, gin.H{"error": err.Error()}) + return + } + + c.JSON(200, ChatOutput{ + Response: resp.Text(), + }) + } + ``` + + + Define flows in your Python backend: + + ```python + # backend/flows.py + from pydantic import BaseModel + from typing import List, Optional + + class MenuSuggestionInput(BaseModel): + theme: str + + class MenuSuggestionOutput(BaseModel): + menuItem: str + + class ChatMessage(BaseModel): + role: str + content: str + + class ChatInput(BaseModel): + message: str + history: Optional[List[ChatMessage]] = [] + + class ChatOutput(BaseModel): + response: str + + @ai.define_flow( + input_schema=MenuSuggestionInput, + output_schema=MenuSuggestionOutput + ) + async def menu_suggestion_flow(input_data: MenuSuggestionInput): + response = await ai.generate( + f"Invent a menu item for a {input_data.theme} themed restaurant." + ) + return MenuSuggestionOutput(menuItem=response.text) + + @ai.define_flow( + input_schema=ChatInput, + output_schema=ChatOutput + ) + async def chat_flow(input_data: ChatInput): + # Build conversation context + context = "\n".join([ + f"{msg.role}: {msg.content}" + for msg in input_data.history + ]) + context += f"\nuser: {input_data.message}\nassistant:" + + response = await ai.generate(context) + return ChatOutput(response=response.text) + ``` + + + +## Create API Routes + + + + Create API routes using the Genkit Next.js plugin: + + ### Individual Route Files + + Create `src/app/api/menuSuggestion/route.ts`: + + ```ts + import { menuSuggestionFlow } from '@/genkit/flows'; + import { appRoute } from '@genkit-ai/next'; + + export const POST = appRoute(menuSuggestionFlow); + ``` + + Create `src/app/api/chat/route.ts`: + + ```ts + import { chatFlow } from '@/genkit/flows'; + import { appRoute } from '@genkit-ai/next'; + + export const POST = appRoute(chatFlow); + ``` + + ### Unified Route Handler + + Alternatively, create `src/app/api/flows/[flowName]/route.ts`: + + ```ts + import { menuSuggestionFlow, chatFlow } from '@/genkit/flows'; + import { appRoute } from '@genkit-ai/next'; + + const flows = { + menuSuggestion: menuSuggestionFlow, + chat: chatFlow, + }; + + export async function POST( + request: Request, + { params }: { params: { flowName: string } } + ) { + const flow = flows[params.flowName as keyof typeof flows]; + + if (!flow) { + return new Response('Flow not found', { status: 404 }); + } + + return appRoute(flow)(request); + } + ``` + + + Set up API routes in your Gin router: + + ```go + func setupRoutes(g *genkit.Genkit) *gin.Engine { + r := gin.Default() + + // CORS middleware + r.Use(cors.New(cors.Config{ + AllowOrigins: []string{"http://localhost:3000"}, + AllowMethods: []string{"POST", "GET", "OPTIONS"}, + AllowHeaders: []string{"Content-Type", "Authorization"}, + AllowCredentials: true, + })) + + // API routes + api := r.Group("/api") + { + api.POST("/menuSuggestion", handleMenuSuggestion) + api.POST("/chat", handleChat) + } + + // Health check + r.GET("/health", func(c *gin.Context) { + c.JSON(200, gin.H{"status": "healthy"}) + }) + + return r + } + ``` + + + Set up API routes in FastAPI: + + ```python + # backend/main.py + @app.post("/api/menuSuggestion") + async def menu_suggestion_endpoint(input_data: MenuSuggestionInput): + result = await menu_suggestion_flow(input_data) + return result + + @app.post("/api/chat") + async def chat_endpoint(input_data: ChatInput): + result = await chat_flow(input_data) + return result + + @app.get("/health") + async def health_check(): + return {"status": "healthy"} + ``` + + + +## Frontend Implementation + + + + Create your React components with Genkit integration: + + ### Basic Usage + + ```tsx + // src/app/page.tsx + 'use client'; + + import { useState } from 'react'; + import { runFlow, streamFlow } from '@genkit-ai/next/client'; + import { menuSuggestionFlow } from '@/genkit/flows'; + + export default function Home() { + const [menuItem, setMenuItem] = useState(''); + const [isLoading, setIsLoading] = useState(false); + const [streamedText, setStreamedText] = useState(''); + + async function getMenuItem(formData: FormData) { + const theme = formData.get('theme')?.toString() ?? ''; + setIsLoading(true); + + try { + const result = await runFlow({ + url: '/api/menuSuggestion', + input: { theme }, + }); + + setMenuItem(result.menuItem); + } catch (error) { + console.error('Error generating menu item:', error); + } finally { + setIsLoading(false); + } + } + + async function streamMenuItem(formData: FormData) { + const theme = formData.get('theme')?.toString() ?? ''; + setIsLoading(true); + setStreamedText(''); + + try { + const result = streamFlow({ + url: '/api/menuSuggestion', + input: { theme }, + }); + + for await (const chunk of result.stream) { + setStreamedText((prev) => prev + chunk); + } + + const finalOutput = await result.output; + setMenuItem(finalOutput.menuItem); + } catch (error) { + console.error('Error streaming menu item:', error); + } finally { + setIsLoading(false); + } + } + + return ( +
+

AI Menu Generator

+ +
+
+ + +
+ +
+ + +
+
+ + {streamedText && ( +
+

Streaming Output:

+
{streamedText}
+
+ )} + + {menuItem && ( +
+

Final Output:

+
{menuItem}
+
+ )} +
+ ); + } + ``` + + ### Chat Interface + + ```tsx + // src/components/ChatInterface.tsx + 'use client'; + + import { useState } from 'react'; + import { streamFlow } from '@genkit-ai/next/client'; + import { chatFlow } from '@/genkit/flows'; + + interface Message { + role: 'user' | 'assistant'; + content: string; + } + + export default function ChatInterface() { + const [messages, setMessages] = useState([]); + const [input, setInput] = useState(''); + const [isLoading, setIsLoading] = useState(false); + + async function sendMessage() { + if (!input.trim()) return; + + const userMessage: Message = { role: 'user', content: input }; + setMessages(prev => [...prev, userMessage]); + setInput(''); + setIsLoading(true); + + try { + const result = streamFlow({ + url: '/api/chat', + input: { + message: input, + history: messages, + }, + }); + + let assistantMessage = ''; + setMessages(prev => [...prev, { role: 'assistant', content: '' }]); + + for await (const chunk of result.stream) { + assistantMessage += chunk; + setMessages(prev => [ + ...prev.slice(0, -1), + { role: 'assistant', content: assistantMessage } + ]); + } + + const finalOutput = await result.output; + setMessages(prev => [ + ...prev.slice(0, -1), + { role: 'assistant', content: finalOutput.response } + ]); + } catch (error) { + console.error('Error sending message:', error); + } finally { + setIsLoading(false); + } + } + + return ( +
+
+ {messages.map((message, index) => ( +
+
+ {message.content} +
+
+ ))} +
+ +
+ setInput(e.target.value)} + onKeyPress={(e) => e.key === 'Enter' && sendMessage()} + className="flex-1 border rounded-lg px-3 py-2" + placeholder="Type your message..." + disabled={isLoading} + /> + +
+
+ ); + } + ``` +
+ + Create React components that call your Go backend: + + ```tsx + // frontend/src/app/page.tsx + 'use client'; + + import { useState } from 'react'; + + interface MenuSuggestionResponse { + menuItem: string; + } + + export default function Home() { + const [menuItem, setMenuItem] = useState(''); + const [isLoading, setIsLoading] = useState(false); + + async function getMenuItem(formData: FormData) { + const theme = formData.get('theme')?.toString() ?? ''; + setIsLoading(true); + + try { + const response = await fetch('http://localhost:8080/api/menuSuggestion', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ theme }), + }); + + if (!response.ok) { + throw new Error('Failed to generate menu item'); + } + + const result: MenuSuggestionResponse = await response.json(); + setMenuItem(result.menuItem); + } catch (error) { + console.error('Error generating menu item:', error); + } finally { + setIsLoading(false); + } + } + + return ( +
+

AI Menu Generator

+ +
+
+ + +
+ + +
+ + {menuItem && ( +
+

Generated Menu Item:

+
{menuItem}
+
+ )} +
+ ); + } + ``` +
+ + Create React components that call your Python backend: + + ```tsx + // frontend/src/app/page.tsx + 'use client'; + + import { useState } from 'react'; + + interface MenuSuggestionResponse { + menuItem: string; + } + + export default function Home() { + const [menuItem, setMenuItem] = useState(''); + const [isLoading, setIsLoading] = useState(false); + + async function getMenuItem(formData: FormData) { + const theme = formData.get('theme')?.toString() ?? ''; + setIsLoading(true); + + try { + const response = await fetch('http://localhost:8080/api/menuSuggestion', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ theme }), + }); + + if (!response.ok) { + throw new Error('Failed to generate menu item'); + } + + const result: MenuSuggestionResponse = await response.json(); + setMenuItem(result.menuItem); + } catch (error) { + console.error('Error generating menu item:', error); + } finally { + setIsLoading(false); + } + } + + return ( +
+

AI Menu Generator

+ +
+
+ + +
+ + +
+ + {menuItem && ( +
+

Generated Menu Item:

+
{menuItem}
+
+ )} +
+ ); + } + ``` +
+
+ +## Authentication and Security + + + + ### API Key Authentication + + ```tsx + // Client-side with headers + const result = await runFlow({ + url: '/api/menuSuggestion', + headers: { + Authorization: 'Bearer your-token-here', + }, + input: { theme }, + }); + ``` + + ### Next.js Middleware + + ```ts + // middleware.ts + import { NextRequest, NextResponse } from 'next/server'; + import { jwtVerify } from 'jose'; + + export async function middleware(request: NextRequest) { + if (request.nextUrl.pathname.startsWith('/api/')) { + const token = request.headers.get('authorization')?.replace('Bearer ', ''); + + if (!token) { + return NextResponse.json({ error: 'Unauthorized' }, { status: 401 }); + } + + try { + await jwtVerify(token, new TextEncoder().encode(process.env.JWT_SECRET!)); + } catch (error) { + return NextResponse.json({ error: 'Invalid token' }, { status: 401 }); + } + } + + return NextResponse.next(); + } + + export const config = { + matcher: '/api/:path*', + }; + ``` + + ### Session-based Authentication + + ```ts + // src/app/api/auth/route.ts + import { NextRequest, NextResponse } from 'next/server'; + import { cookies } from 'next/headers'; + + export async function POST(request: NextRequest) { + const { username, password } = await request.json(); + + // Verify credentials + if (await verifyCredentials(username, password)) { + const sessionToken = generateSessionToken(); + + cookies().set('session', sessionToken, { + httpOnly: true, + secure: process.env.NODE_ENV === 'production', + sameSite: 'strict', + maxAge: 60 * 60 * 24 * 7, // 1 week + }); + + return NextResponse.json({ success: true }); + } + + return NextResponse.json({ error: 'Invalid credentials' }, { status: 401 }); + } + ``` + + + Implement JWT authentication in your Go backend: + + ```go + import ( + "github.com/golang-jwt/jwt/v4" + "github.com/gin-gonic/gin" + ) + + func authMiddleware() gin.HandlerFunc { + return func(c *gin.Context) { + authHeader := c.GetHeader("Authorization") + if authHeader == "" { + c.JSON(401, gin.H{"error": "Missing authorization header"}) + c.Abort() + return + } + + tokenString := strings.Replace(authHeader, "Bearer ", "", 1) + + token, err := jwt.Parse(tokenString, func(token *jwt.Token) (interface{}, error) { + return []byte(os.Getenv("JWT_SECRET")), nil + }) + + if err != nil || !token.Valid { + c.JSON(401, gin.H{"error": "Invalid token"}) + c.Abort() + return + } + + c.Next() + } + } + + // Apply to protected routes + api.POST("/menuSuggestion", authMiddleware(), handleMenuSuggestion) + ``` + + + Implement JWT authentication in your FastAPI backend: + + ```python + from fastapi import Depends, HTTPException, status + from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials + import jwt + + security = HTTPBearer() + + def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)): + try: + payload = jwt.decode( + credentials.credentials, + os.getenv("JWT_SECRET"), + algorithms=["HS256"] + ) + return payload + except jwt.InvalidTokenError: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid token" + ) + + @app.post("/api/menuSuggestion") + async def menu_suggestion_endpoint( + input_data: MenuSuggestionInput, + user=Depends(verify_token) + ): + result = await menu_suggestion_flow(input_data) + return result + ``` + + + +## Deployment Considerations + +### Environment Variables + + + + Configure environment variables for production: + + ```bash + # .env.local + GEMINI_API_KEY=your_gemini_api_key + JWT_SECRET=your_jwt_secret + NEXTAUTH_SECRET=your_nextauth_secret + NEXTAUTH_URL=https://your-domain.com + ``` + + ### Vercel Deployment + + ```bash + # Install Vercel CLI + npm install -g vercel + + # Deploy + vercel + + # Set environment variables + vercel env add GEMINI_API_KEY + vercel env add JWT_SECRET + ``` + + ### Docker Deployment + + ```dockerfile + # Dockerfile + FROM node:18-alpine + + WORKDIR /app + + COPY package*.json ./ + RUN npm ci --only=production + + COPY . . + RUN npm run build + + EXPOSE 3000 + + CMD ["npm", "start"] + ``` + + + Deploy your Go backend: + + ```dockerfile + # Dockerfile + FROM golang:1.21-alpine AS builder + + WORKDIR /app + COPY go.mod go.sum ./ + RUN go mod download + + COPY . . + RUN go build -o main . + + FROM alpine:latest + RUN apk --no-cache add ca-certificates + WORKDIR /root/ + + COPY --from=builder /app/main . + + EXPOSE 8080 + + CMD ["./main"] + ``` + + ### Cloud Run Deployment + + ```bash + # Build and deploy + gcloud builds submit --tag gcr.io/PROJECT_ID/genkit-backend + gcloud run deploy --image gcr.io/PROJECT_ID/genkit-backend --platform managed + ``` + + + Deploy your Python backend: + + ```dockerfile + # Dockerfile + FROM python:3.11-slim + + WORKDIR /app + + COPY requirements.txt . + RUN pip install --no-cache-dir -r requirements.txt + + COPY . . + + EXPOSE 8080 + + CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"] + ``` + + ### Cloud Run Deployment + + ```bash + # Build and deploy + gcloud builds submit --tag gcr.io/PROJECT_ID/genkit-backend + gcloud run deploy --image gcr.io/PROJECT_ID/genkit-backend --platform managed + ``` + + + +## Next Steps + +- Learn about [creating flows](/unified-docs/creating-flows) to build more complex AI workflows +- Explore [authentication patterns](/unified-docs/auth) for securing your applications +- See [deployment guides](/unified-docs/deployment) for production deployment strategies +- Check out other framework integrations: + - [Express.js](/unified-docs/frameworks/express) for API-first applications + - [Gin](/unified-docs/frameworks/gin) for Go applications + - [Flask](/unified-docs/frameworks/flask) for Python applications diff --git a/src/content/docs/unified-docs/get-started.mdx b/src/content/docs/unified-docs/get-started.mdx new file mode 100644 index 00000000..59ddc951 --- /dev/null +++ b/src/content/docs/unified-docs/get-started.mdx @@ -0,0 +1,537 @@ +--- +title: Get started with Genkit +description: Learn how to get started with Genkit across JavaScript, Go, and Python, including project setup, installing packages, configuring API keys, creating your first flow, and testing in the Developer UI. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; +import { LinkButton } from '@astrojs/starlight/components'; + +This guide shows you how to get started with Genkit in your preferred language and test it in the Developer UI. + +## Prerequisites + +Before you begin, make sure your environment meets these requirements: + + + + - Node.js v20 or later + - npm + + This guide assumes you're already familiar with building Node.js applications. + + + - Go 1.24 or later ([Download and install](https://go.dev/doc/install)) + + This guide assumes you're already familiar with building Go applications. + + + - Python 3.10 or later ([Download and install](https://www.python.org/downloads/)) + - Node.js 20 or later (for the Genkit CLI and UI) + + :::note[Alpha Release] + The Genkit libraries for Python are currently in **Alpha**. You might see API and functional changes as development progresses. We recommend using it only for prototyping and exploration. + ::: + + + +## Set up your project + + + + Create a new Node.js project and configure TypeScript: + + ```sh + mkdir my-genkit-app + cd my-genkit-app + npm init -y + + # Set up your source directory + mkdir src + touch src/index.ts + + # Install and configure TypeScript + npm install -D typescript tsx + npx tsc --init + ``` + + This sets up your project structure and a TypeScript entry point at `src/index.ts`. + + + Initialize a new Go project directory: + + ```bash + mkdir genkit-intro && cd genkit-intro + + go mod init example/genkit-intro + ``` + + Create a `main.go` file for your application entry point. + + + Create a new project directory and set up a virtual environment: + + ```bash + mkdir genkit-intro && cd genkit-intro + ``` + + (Recommended) Create a Python virtual environment: + + ```bash + python3 -m venv . + ``` + + Activate the virtual environment if necessary: + + ```bash + source bin/activate # for bash + ``` + + + +## Install Genkit packages + + + + First, install the Genkit CLI globally. This gives you access to local developer tools, including the Developer UI: + + ```bash + npm install -g genkit-cli + ``` + + Then, add the following packages to your project: + + ```bash + npm install genkit @genkit-ai/googleai + ``` + + - `genkit` provides Genkit core capabilities. + - `@genkit-ai/googleai` provides access to the Google AI Gemini models. + + + Install the Genkit package for Go: + + ```bash + go get github.com/firebase/genkit/go + ``` + + This provides Genkit core capabilities and access to Google AI Gemini models. + + + Install the required Python packages: + + ```bash + pip3 install genkit + pip3 install genkit-plugin-google-genai + ``` + + Or create a `requirements.txt` file: + + ```text title="requirements.txt" + genkit + genkit-plugin-google-genai + ``` + + and run: + + ```bash + pip3 install -r requirements.txt + ``` + + + +## Configure your model API key + +Genkit can work with multiple model providers. This guide uses the **Gemini API**, which offers a generous free tier and doesn't require a credit card to get started. + +To use it, you'll need an API key from Google AI Studio: + + + Get a Gemini API Key + + +Once you have a key, set the `GEMINI_API_KEY` environment variable: + +```sh +export GEMINI_API_KEY= +``` + +:::note +Genkit also supports models from Vertex AI, Anthropic, OpenAI, Cohere, Ollama, and more. See [generating content](/unified-docs/generating-content) for details. +::: + +## Create your first application + + + + A flow is a special Genkit function with built-in observability, type safety, and tooling integration. + + Update `src/index.ts` with the following: + + ```ts + import { googleAI } from '@genkit-ai/googleai'; + import { genkit, z } from 'genkit'; + + // Initialize Genkit with the Google AI plugin + const ai = genkit({ + plugins: [googleAI()], + model: googleAI.model('gemini-2.5-flash', { + temperature: 0.8 + }), + }); + + // Define input schema + const RecipeInputSchema = z.object({ + ingredient: z.string().describe('Main ingredient or cuisine type'), + dietaryRestrictions: z.string().optional().describe('Any dietary restrictions'), + }); + + // Define output schema + const RecipeSchema = z.object({ + title: z.string(), + description: z.string(), + prepTime: z.string(), + cookTime: z.string(), + servings: z.number(), + ingredients: z.array(z.string()), + instructions: z.array(z.string()), + tips: z.array(z.string()).optional(), + }); + + // Define a recipe generator flow + export const recipeGeneratorFlow = ai.defineFlow( + { + name: 'recipeGeneratorFlow', + inputSchema: RecipeInputSchema, + outputSchema: RecipeSchema, + }, + async (input) => { + // Create a prompt based on the input + const prompt = `Create a recipe with the following requirements: + Main ingredient: ${input.ingredient} + Dietary restrictions: ${input.dietaryRestrictions || 'none'}`; + + // Generate structured recipe data using the same schema + const { output } = await ai.generate({ + prompt, + output: { schema: RecipeSchema }, + }); + + if (!output) throw new Error('Failed to generate recipe'); + + return output; + } + ); + + // Run the flow + async function main() { + const recipe = await recipeGeneratorFlow({ + ingredient: 'avocado', + dietaryRestrictions: 'vegetarian' + }); + + console.log(recipe); + } + + main().catch(console.error); + ``` + + This code sample: + + - Defines reusable input and output schemas with [Zod](https://zod.dev/) + - Configures the `gemini-2.5-flash` model with temperature settings + - Defines a Genkit flow to generate a structured recipe based on your input + - Runs the flow with a sample input and prints the result + + ##### Why use flows? + + - Type-safe inputs and outputs + - Integrates with the Developer UI + - Easy deployment as APIs + - Built-in tracing and observability + + + Create a `main.go` file with the following sample code: + + ```go + package main + + import ( + "context" + "log" + + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/googlegenai" + ) + + func main() { + ctx := context.Background() + + // Initialize Genkit with the Google AI plugin and Gemini 2.5 Flash. + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlegenai.GoogleAI{}), + genkit.WithDefaultModel("googleai/gemini-2.5-flash"), + ) + if err != nil { + log.Fatalf("could not initialize Genkit: %v", err) + } + + resp, err := genkit.Generate(ctx, g, ai.WithPrompt("What is the meaning of life?")) + if err != nil { + log.Fatalf("could not generate model response: %v", err) + } + + log.Println(resp.Text()) + } + ``` + + This code sample: + + - Initializes Genkit with the Google AI plugin + - Configures the `gemini-2.5-flash` model as the default + - Makes a simple generation request + - Prints the model's response + + For more advanced examples with flows and structured output, see [creating flows](/unified-docs/creating-flows). + + + Create a `main.py` file: + + ```python title="main.py" + import json + from pydantic import BaseModel, Field + from genkit.ai import Genkit + from genkit.plugins.google_genai import GoogleAI + + ai = Genkit( + plugins=[GoogleAI()], + model='googleai/gemini-2.5-flash', + ) + + class RpgCharacter(BaseModel): + name: str = Field(description='name of the character') + back_story: str = Field(description='back story') + abilities: list[str] = Field(description='list of abilities (3-4)') + + @ai.flow() + async def generate_character(name: str): + result = await ai.generate( + prompt=f'generate an RPG character named {name}', + output_schema=RpgCharacter, + ) + return result.output + + async def main() -> None: + print(json.dumps(await generate_character('Goblorb'), indent=2)) + + ai.run_main(main()) + ``` + + This code sample: + + - Initializes Genkit with the Google AI plugin + - Defines a structured output schema using Pydantic + - Creates a flow to generate RPG characters + - Runs the flow and prints the structured result + + + +## Run your application + + + + Run your application to see it in action: + + ```bash + npx tsx src/index.ts + ``` + + You should see a structured recipe output in your console. + + + Run the app to see the model response: + + ```bash + go run . + # Example output (may vary): + # There is no single universally agreed-upon meaning of life; it's a deeply + # personal question. Many find meaning through connection, growth, + # contribution, happiness, or discovering their own purpose. + ``` + + + Run your app (Genkit apps are just regular Python applications): + + ```bash + python3 main.py + ``` + + You should see a structured RPG character output in JSON format. + + + +## Test in the Developer UI + +The **Developer UI** is a local tool for testing and inspecting Genkit components, like flows, with a visual interface. + +### Install the Genkit CLI (if needed) + + + + If you followed the installation steps above, you already have the Genkit CLI installed. + + + Install the Genkit CLI using npm: + + ```bash + npm install -g genkit-cli + ``` + + This requires Node.js to be installed on your system. + + + If you don't already have Node 20 or newer on your system, install it now. + + **Recommendation**: The [`nvm`](https://github.com/nvm-sh/nvm) and [`nvm-windows`](https://github.com/coreybutler/nvm-windows) tools are a convenient way to install specific versions of Node. + + To install `nvm`: + + **Linux, macOS, etc.:** + ```bash + curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.7/install.sh | bash + ``` + + **Windows:** + Download and run the installer as described in the [nvm-windows docs](https://github.com/coreybutler/nvm-windows?tab=readme-ov-file#install-nvm-windows). + + Then, to install Node and npm: + ```bash + nvm install 20 + ``` + + Install the Genkit CLI: + ```bash + npm install -g genkit-cli + ``` + + + +### Start the Developer UI + + + + Run the following command from your project root: + + ```bash + genkit start -- npx tsx --watch src/index.ts + ``` + + This starts your app and launches the Developer UI at `http://localhost:4000` by default. + + :::note + The command after `--` should run the file that defines or imports your Genkit components. You can use `tsx`, `node`, or other commands based on your setup. Learn more in [developer tools](/unified-docs/developer-tools). + ::: + + ##### Optional: Add an npm script + + To make starting the Developer UI easier, add the following to your `package.json` scripts: + + ```json + "scripts": { + "genkit:ui": "genkit start -- npx tsx --watch src/index.ts" + } + ``` + + Then run it with: + + ```sh + npm run genkit:ui + ``` + + + Run the following command from your project root: + + ```bash + genkit start -- go run . + ``` + + This starts your app and launches the Developer UI at `http://localhost:4000` by default. + + + To inspect your app with Genkit Dev UI, run: + + ```bash + genkit start -- python3 main.py + ``` + + The command will print the Dev UI URL: + + ``` + Genkit Developer UI: http://localhost:4000 + ``` + + + +### Run and inspect flows + + + + In the Developer UI: + + 1. Select the `recipeGeneratorFlow` from the list of flows + 2. Enter sample input: + ```json + { + "ingredient": "avocado", + "dietaryRestrictions": "vegetarian" + } + ``` + 3. Click **Run** + + You'll see the generated recipe as structured output, along with a visual trace of the AI generation process for debugging and optimization. + + + + + In the Developer UI, you can: + + - Test generation requests with different prompts + - View traces of your application's execution + - Inspect model responses and performance metrics + - Debug any issues with your Genkit integration + + For more advanced flows and structured output, see [creating flows](/unified-docs/creating-flows). + + + In the Developer UI: + + 1. Select the `generate_character` flow from the list of flows + 2. Enter a character name as input (e.g., `"Goblorb"`) + 3. Click **Run** + + You'll see the generated RPG character as structured output, along with execution traces for debugging. + + + +## Next steps + +Now that you've created and tested your first Genkit application, explore more features to build powerful AI-driven applications: + +- [Developer tools](/unified-docs/developer-tools): Set up your local workflow with the Genkit CLI and Dev UI. +- [Generating content](/unified-docs/generating-content): Use Genkit's unified generation API to work with multimodal and structured output across supported models. +- [Creating flows](/unified-docs/creating-flows): Learn about streaming flows, schema customization, deployment options, and more. +- [Tool calling](/unified-docs/tool-calling): Enable your AI models to interact with external systems and APIs. +- [Managing prompts with Dotprompt](/unified-docs/dotprompt): Define flexible prompt templates using `.prompt` files or code. diff --git a/src/content/docs/unified-docs/interrupts.mdx b/src/content/docs/unified-docs/interrupts.mdx new file mode 100644 index 00000000..2b0bff57 --- /dev/null +++ b/src/content/docs/unified-docs/interrupts.mdx @@ -0,0 +1,550 @@ +--- +title: Pause generation using interrupts +description: Learn how to use interrupts in Genkit to pause and resume LLM generation, enabling human-in-the-loop interactions, asynchronous processing, and controlled task completion across JavaScript and Python. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +:::caution[Beta] +This feature of Genkit is in **Beta,** which means it is not yet part of Genkit's stable API. APIs of beta features may change in minor version releases. +::: + +_Interrupts_ are a special kind of [tool](/unified-docs/tool-calling) that can pause the +LLM generation-and-tool-calling loop to return control back to you. When +you're ready, you can then _resume_ generation by sending _replies_ that the LLM +processes for further generation. + +The most common uses for interrupts fall into a few categories: + +- **Human-in-the-Loop:** Enabling the user of an interactive AI + to clarify needed information or confirm the LLM's action + before it is completed, providing a measure of safety and confidence. +- **Async Processing:** Starting an asynchronous task that can only be + completed out-of-band, such as sending an approval notification to + a human reviewer or kicking off a long-running background process. +- **Exit from an Autonomous Task:** Providing the model a way + to mark a task as complete, in a workflow that might iterate through + a long series of tool calls. + +## Availability + + + + Interrupts are fully supported in JavaScript with comprehensive APIs for defining, using, and responding to interrupts. + + + Interrupts are not currently available in Go. Use alternative patterns like conditional tool execution or external coordination mechanisms. + + + Interrupts are supported in Python with similar functionality to JavaScript, though with some API differences. + + + +## Before you begin + +All of the examples documented here assume that you have already set up a +project with Genkit dependencies installed. If you want to run the code +examples on this page, first complete the steps in the Getting started guide for your language. + +Before diving too deeply, you should also be familiar with the following +concepts: + +- [Generating content](/unified-docs/generating-content) with AI models +- Genkit's system for [defining input and output schemas](/unified-docs/creating-flows) +- General methods of [tool calling](/unified-docs/tool-calling) + +## Overview of interrupts + +At a high level, this is what an interrupt looks like when +interacting with an LLM: + +1. The calling application prompts the LLM with a request. The prompt includes + a list of tools, including at least one for an interrupt that the LLM + can use to generate a response. +2. The LLM generates either a complete response or a tool call request + in a specific format. To the LLM, an interrupt call looks like any + other tool call. +3. If the LLM calls an interrupt tool, + the Genkit library automatically pauses generation rather than immediately + passing responses back to the model for additional processing. +4. The developer checks whether an interrupt call is made, and performs whatever + task is needed to collect the information needed for the interrupt response. +5. The developer resumes generation by passing an interrupt response to the + model. This action triggers a return to Step 2. + +## Define manual-response interrupts + +The most common kind of interrupt allows the LLM to request clarification from +the user, for example by asking a multiple-choice question. + + + + Use the Genkit instance's `defineInterrupt()` method: + + ```ts + import { genkit, z } from 'genkit'; + import { googleAI } from '@genkit-ai/googleai'; + + const ai = genkit({ + plugins: [googleAI()], + model: googleAI.model('gemini-2.5-flash'), + }); + + const askQuestion = ai.defineInterrupt({ + name: 'askQuestion', + description: 'use this to ask the user a clarifying question', + inputSchema: z.object({ + choices: z.array(z.string()).describe('the choices to display to the user'), + allowOther: z.boolean().optional().describe('when true, allow write-ins'), + }), + outputSchema: z.string(), + }); + ``` + + Note that the `outputSchema` of an interrupt corresponds to the response data + you will provide as opposed to something that will be automatically populated + by a tool function. + + + Interrupts are not currently available in Go. Consider using alternative patterns: + + ```go + // Alternative: Use conditional tool execution + func conditionalTool(ctx context.Context, input ToolInput) (ToolOutput, error) { + // Check conditions and return early if user confirmation needed + if needsConfirmation(input) { + return ToolOutput{ + Status: "NEEDS_CONFIRMATION", + Message: "Please confirm this action", + }, nil + } + + // Proceed with normal execution + return executeAction(input) + } + ``` + + + Use the Genkit instance's `tool()` decorator: + + ```python + from pydantic import BaseModel, Field + + class Questions(BaseModel): + choices: list[str] = Field(description='the choices to display to the user') + allow_other: bool = Field(description='when true, allow write-ins') + + @ai.tool() + def ask_question(input: Questions, ctx) -> str: + """Use this to ask the user a clarifying question""" + ctx.interrupt() + ``` + + Note that the return type annotation of an interrupt corresponds to the response data + you will provide as opposed to something that will be automatically populated + by a tool function. + + + +## Use interrupts + +Interrupts are passed into the `tools` array when generating content, just like +other types of tools. You can pass both normal tools and interrupts to the +same `generate` call: + + + + ### Generate + + ```ts + const response = await ai.generate({ + prompt: "Ask me a movie trivia question.", + tools: [askQuestion], + }); + ``` + + ### definePrompt + + ```ts + const triviaPrompt = ai.definePrompt({ + name: "triviaPrompt", + tools: [askQuestion], + input: { + schema: z.object({ subject: z.string() }), + }, + prompt: "Ask me a trivia question about {{subject}}.", + }); + + const response = await triviaPrompt({ subject: "computer history" }); + ``` + + ### Prompt file + + ```dotprompt + --- + tools: [askQuestion] + input: + schema: + partyType: string + --- + + {{role "system"}} + Use the askQuestion tool if you need to clarify something. + + {{role "user"}} + Help me plan a {{partyType}} party next week. + ``` + + Then you can execute the prompt in your code as follows: + + ```ts + // assuming prompt file is named partyPlanner.prompt + const partyPlanner = ai.prompt("partyPlanner"); + + const response = await partyPlanner({ partyType: "birthday" }); + ``` + + ### Chat + + ```ts + const chat = ai.chat({ + system: "Use the askQuestion tool if you need to clarify something.", + tools: [askQuestion], + }); + + const response = await chat.send("make a plan for my birthday party"); + ``` + + + Interrupts are not available in Go. Use alternative patterns like conditional tool execution or external coordination mechanisms. + + + ```python + interrupted_response = await ai.generate( + prompt='Ask me a movie trivia question.', + tools=['ask_question'], + ) + ``` + + + +Genkit immediately returns a response on receipt of an interrupt tool call. + +## Respond to interrupts + +If you've passed one or more interrupts to your generate call, you +need to check the response for interrupts so that you can handle them: + + + + ```ts + // you can check the 'finishReason' of the response + response.finishReason === 'interrupted'; + // or you can check to see if any interrupt requests are on the response + response.interrupts.length > 0; + ``` + + Responding to an interrupt is done using the `resume` option on a subsequent + `generate` call, making sure to pass in the existing history. Each tool has + a `.respond()` method on it to help construct the response. + + Once resumed, the model re-enters the generation loop, including tool + execution, until either it completes or another interrupt is triggered: + + ```ts + let response = await ai.generate({ + tools: [askQuestion], + system: 'ask clarifying questions until you have a complete solution', + prompt: 'help me plan a backyard BBQ', + }); + + while (response.interrupts.length) { + const answers = []; + // multiple interrupts can be called at once, so we handle them all + for (const question of response.interrupts) { + answers.push( + // use the `respond` method on our tool to populate answers + askQuestion.respond( + question, + // send the tool request input to the user to respond + await askUser(question.toolRequest.input), + ), + ); + } + + response = await ai.generate({ + tools: [askQuestion], + messages: response.messages, + resume: { + respond: answers, + }, + }); + } + + // no more interrupts, we can see the final response + console.log(response.text); + ``` + + + Not applicable - interrupts are not available in Go. + + + ```python + # You can check the 'finish_reason' attribute of the response + if interrupted_response.finish_reason == 'interrupted': + print("Generation interrupted.") + + # Or you can check if any interrupt requests are on the response + if interrupted_response.interrupts and len(interrupted_response.interrupts) > 0: + print(f"Interrupts found: {len(interrupted_response.interrupts)}") + ``` + + Responding to an interrupt is done using the `tool_responses` option on a subsequent + `generate` call, making sure to pass in the existing history. There's a `tool_response` + helper function to help you construct the response. + + Once resumed, the model re-enters the generation loop, including tool + execution, until either it completes or another interrupt is triggered: + + ```python + from genkit.ai import tool_response + + response = await ai.generate( + messages=interrupted_response.messages, + tool_responses=[tool_response(interrupted_response.interrupts[0], 'b')], + tools=['ask_question'], + ) + ``` + + + +## Tools with restartable interrupts + +Another common pattern for interrupts is the need to _confirm_ an action that +the LLM suggests before actually performing it. For example, a payments app +might want the user to confirm certain kinds of transfers. + + + + For this use case, you can use the standard `defineTool` method to add custom + logic around when to trigger an interrupt, and what to do when an interrupt is + _restarted_ with additional metadata. + + ### Define a restartable tool + + Every tool has access to two special helpers in the second argument of its + implementation definition: + + - `interrupt`: when called, this method throws a special kind of exception that + is caught to pause the generation loop. You can provide additional metadata + as an object. + - `resumed`: when a request from an interrupted generation is restarted using + the `{resume: {restart: ...}}` option (see below), this helper contains the + metadata provided when restarting. + + If you were building a payments app, for example, you might want to confirm with + the user before making a transfer exceeding a certain amount: + + ```ts + const transferMoney = ai.defineTool({ + name: 'transferMoney', + description: 'Transfers money between accounts.', + inputSchema: z.object({ + toAccountId: z.string().describe('the account id of the transfer destination'), + amount: z.number().describe('the amount in integer cents (100 = $1.00)'), + }), + outputSchema: z.object({ + status: z.string().describe('the outcome of the transfer'), + message: z.string().optional(), + }) + }, async (input, {context, interrupt, resumed}) => { + // if the user rejected the transaction + if (resumed?.status === "REJECTED") { + return {status: 'REJECTED', message: 'The user rejected the transaction.'}; + } + // trigger an interrupt to confirm if amount > $100 + if (resumed?.status !== "APPROVED" && input.amount > 10000) { + interrupt({ + message: "Please confirm sending an amount > $100.", + }); + } + // complete the transaction if not interrupted + return doTransfer(input); + }); + ``` + + In this example, on first execution (when `resumed` is undefined), the tool + checks to see if the amount exceeds $100, and triggers an interrupt if so. On + second execution, it looks for a status in the new metadata provided and + performs the transfer or returns a rejection response, depending on whether it + is approved or rejected. + + ### Restart tools after interruption + + Interrupt tools give you full control over: + + 1. When an initial tool request should trigger an interrupt. + 2. When and whether to resume the generation loop. + 3. What additional information to provide to the tool when resuming. + + In the example shown in the previous section, the application might ask the user + to confirm the interrupted request to make sure the transfer amount is okay: + + ```ts + let response = await ai.generate({ + tools: [transferMoney], + prompt: "Transfer $1000 to account ABC123", + }); + + while (response.interrupts.length) { + const confirmations = []; + // multiple interrupts can be called at once, so we handle them all + for (const interrupt of response.interrupts) { + confirmations.push( + // use the 'restart' method on our tool to provide `resumed` metadata + transferMoney.restart( + interrupt, + // send the tool request input to the user to respond. assume that this + // returns `{status: "APPROVED"}` or `{status: "REJECTED"}` + await requestConfirmation(interrupt.toolRequest.input) + ) + ); + } + + response = await ai.generate({ + tools: [transferMoney], + messages: response.messages, + resume: { + restart: confirmations, + } + }) + } + + // no more interrupts, we can see the final response + console.log(response.text); + ``` + + + Not applicable - interrupts are not available in Go. Consider implementing confirmation logic within your tools: + + ```go + func transferMoney(ctx context.Context, input TransferInput) (TransferOutput, error) { + // Implement confirmation logic within the tool + if input.Amount > 10000 && !input.Confirmed { + return TransferOutput{ + Status: "NEEDS_CONFIRMATION", + Message: "Please confirm transfer amount > $100", + RequiresConfirmation: true, + }, nil + } + + // Proceed with transfer if confirmed or amount is small + return executeTransfer(input) + } + ``` + + + Similar patterns are available in Python, though the specific APIs may differ. Consult the Python documentation for the most current implementation details. + + + +## Best practices + +### When to use interrupts + + + + - **User confirmation**: For actions that have significant consequences (payments, deletions, etc.) + - **Missing information**: When the LLM needs clarification to proceed + - **Async operations**: For long-running tasks that need to complete out-of-band + - **Safety checks**: To add human oversight to autonomous AI workflows + + + Since interrupts are not available, consider these alternatives: + - **Conditional tools**: Return status codes that indicate when confirmation is needed + - **Multi-step flows**: Break complex operations into smaller, confirmable steps + - **External coordination**: Use external systems to manage approval workflows + + + - **User confirmation**: For actions that have significant consequences + - **Missing information**: When the LLM needs clarification to proceed + - **Async operations**: For long-running tasks that need to complete out-of-band + - **Safety checks**: To add human oversight to autonomous AI workflows + + + +### Error handling + + + + Always handle the case where interrupts might not be responded to: + + ```ts + let response = await ai.generate({ + tools: [askQuestion], + prompt: 'help me plan a party', + }); + + let maxRetries = 3; + let retryCount = 0; + + while (response.interrupts.length && retryCount < maxRetries) { + try { + // Handle interrupts... + response = await ai.generate({ + tools: [askQuestion], + messages: response.messages, + resume: { respond: answers }, + }); + retryCount++; + } catch (error) { + console.error('Error handling interrupt:', error); + break; + } + } + ``` + + + Implement proper error handling in your conditional tools: + + ```go + func handleConditionalTool(ctx context.Context, input ToolInput) (ToolOutput, error) { + if needsConfirmation(input) { + return ToolOutput{ + Status: "NEEDS_CONFIRMATION", + Message: "Please confirm this action", + }, nil + } + + result, err := executeAction(input) + if err != nil { + return ToolOutput{}, fmt.Errorf("action failed: %w", err) + } + + return result, nil + } + ``` + + + Always handle the case where interrupts might not be responded to: + + ```python + try: + response = await ai.generate( + messages=interrupted_response.messages, + tool_responses=[tool_response(interrupted_response.interrupts[0], user_input)], + tools=['ask_question'], + ) + except Exception as e: + print(f"Error handling interrupt: {e}") + # Handle error appropriately + ``` + + + +## Next steps + +- Learn about [tool calling](/unified-docs/tool-calling) to understand the foundation that interrupts build upon +- Explore [creating flows](/unified-docs/creating-flows) to build complex AI workflows that incorporate interrupts +- See [developer tools](/unified-docs/developer-tools) for testing and debugging interrupt-enabled applications +- Check out [generating content](/unified-docs/generating-content) for understanding the generation loop that interrupts can pause diff --git a/src/content/docs/unified-docs/mcp-server.mdx b/src/content/docs/unified-docs/mcp-server.mdx new file mode 100644 index 00000000..7a4edaff --- /dev/null +++ b/src/content/docs/unified-docs/mcp-server.mdx @@ -0,0 +1,260 @@ +--- +title: "Genkit MCP Server" +description: "Integrate Genkit with MCP-aware IDEs and tools." +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +The Genkit MCP (Model Context Protocol) Server enables seamless integration of your Genkit projects with various development environments and AI tools. By exposing Genkit functionalities through the Model Context Protocol, it allows LLM agents and IDEs to discover, interact with, and monitor your Genkit flows and other components. + +## What is the MCP Server? + +The Genkit MCP Server acts as a bridge between your Genkit application and external tools that understand the Model Context Protocol. This allows these tools to: + +- **Discover Genkit flows:** Tools can list all available flows defined in your project, along with their input schemas, enabling them to understand how to call them. +- **Run Genkit flows:** External tools can execute your Genkit flows, providing inputs and receiving outputs. +- **Access trace details:** The server allows for retrieval and analysis of execution traces for your Genkit flows, providing insights into their performance and behavior. +- **Look up Genkit documentation:** Integrated tools can access Genkit documentation directly through the MCP server, aiding in development and debugging. + +## Getting Started + +To use the Genkit MCP Server, you first need to have the Genkit CLI installed. If you haven't already, install it globally: + +```bash +npm install -g genkit-cli +``` + +:::note +The examples in this guide assume you have installed the Genkit CLI globally using `npm install -g genkit-cli`. If you have installed Genkit CLI locally in your project instead, you'll need to prefix all `genkit` commands with `npx` (e.g., use `npx genkit mcp` instead of `genkit mcp`). +::: + +### Configuring the MCP Server + +The Genkit MCP Server is typically configured within an MCP-aware IDE or tool. The configuration details often include: + +- **`serverName`**: A unique name for the server (e.g., "genkit"). +- **`command`**: The command to execute the MCP server (e.g., `genkit`). +- **`args`**: Arguments to pass to the command (e.g., `["mcp"]` to run the Genkit MCP server). +- **`cwd`**: The current working directory where the command should be executed. +- **`timeout`**: The maximum time (in milliseconds) the server is allowed to start. +- **`trust`**: A boolean indicating whether to automatically trust the server. Setting this to `true` allows tools to execute commands from this server without requiring explicit user confirmation for each action. + +## Integration with AI Development Tools + + + + To integrate the Genkit MCP Server with the Gemini CLI, you can add a configuration entry to your `.gemini/settings.json` file. This file is typically located in your project root or your user's home directory. + + ```json + { + "mcpServers": { + "genkit": { + "command": "genkit", + "args": ["mcp"], + "cwd": "", + "timeout": 30000, + "trust": false + } + } + } + ``` + + After adding this configuration, restart your Gemini CLI session for the changes to take effect. You can then interact with your Genkit flows and tools directly from the Gemini CLI. + + ### Video Tutorial + + Watch this video tutorial to see how to set up and use the Genkit MCP Server with the Gemini CLI: + +
+
+ +
+
+
+ + + Cursor AI IDE provides a built-in MCP client that supports an arbitrary number of MCP servers. + + To add the Genkit MCP Server in Cursor: + + 1. Open Cursor Settings by navigating to **File > Preferences > Cursor Settings** or by using the command palette. + 2. Select the **MCP** option in the settings. + 3. Click on the **"+ Add New MCP Server"** button. + 4. Provide the configuration details. You can set the `Type` to `stdio` and the `Command` to `genkit mcp`. Remember to specify the correct `cwd` if your Genkit project is not in the default directory. + 5. Configuration can be stored globally (`~/.cursor/mcp.json`) or locally (project-specific, `.cursor/mcp.json`). + + Once configured, Cursor's AI assistant will automatically invoke the server's tools when needed. + + + + Claude Code functions as both an MCP server and client and can connect to external tools via MCP. + + To add the Genkit MCP Server to Claude Code: + + 1. You can configure MCP servers in Claude Code through: + - Project configuration (available when running Claude Code in that directory). + - Global configuration (available in all projects). + - A checked-in `.mcp.json` file (shared with everyone in the project). + + 2. From the command line, use the `claude mcp add` command: + + ```bash + claude mcp add --transport stdio genkit genkit mcp --cwd --scope + ``` + + - Replace `` with the actual path to your Genkit project. + - Choose a ``: `local` (default, only available to you in the current project), `project` (shared with everyone via `.mcp.json`), or `user` (available to you across all projects). + + Claude Code will then be able to leverage Genkit's functionalities. + + + + Windsurf, an AI-enhanced IDE built on VS Code, also supports MCP servers to extend its capabilities. + + To set up the Genkit MCP Server in Windsurf: + + 1. Open Windsurf Settings by clicking the **Windsurf - Settings** button (bottom right) or by hitting `Cmd+Shift+P` (Mac) / `Ctrl+Shift+P` (Windows/Linux) and searching for "Open Windsurf Settings". + 2. Navigate to the **Cascade** section in **Advanced Settings** and look for the **MCP** option to enable it. + 3. You can add a new MCP server directly through the settings UI or by manually editing the `~/.codeium/windsurf/mcp_config.json` file. + 4. Provide the `stdio` transport command: `genkit mcp`. Ensure the working directory (`cwd`) is correctly set to your Genkit project. + + After configuration, Windsurf's AI assistant (Cascade) can interact with your Genkit components. + + + + Cline, an AI assistant for your CLI and Editor, can also extend its capabilities through custom MCP tools. + + To configure the Genkit MCP Server in Cline: + + 1. Click the **"MCP Servers"** icon in the top navigation bar of the Cline pane. + 2. Select the **"Installed"** tab. + 3. Click the **"Configure MCP Servers"** button at the bottom of the pane. + 4. You can then add a new server configuration using JSON. An example configuration would be: + + ```json + { + "mcpServers": { + "genkit": { + "command": "genkit", + "args": ["mcp"], + "cwd": "", + "timeout": 30000, + "trust": false + } + } + } + ``` + + The settings for all installed MCP servers are located in the `cline_mcp_settings.json` file. + + 5. Alternatively, you can ask Cline directly to "add a tool" and it can guide you through creating and installing a new MCP server. + + Once configured, Cline will automatically detect and leverage the tools provided by the Genkit MCP Server. + +
+ +## Using the MCP Server + +Once configured, your MCP-aware tool can interact with the Genkit MCP Server. Here are some of the available operations: + +### Look up Genkit Documentation + +You can use the `lookup_genkit_docs` tool to retrieve documentation for the Genkit AI framework. You can specify a language and particular files to look up. + +**Example:** +To get a list of available documentation files for JavaScript: + +``` +@genkit:lookup_genkit_docs { "language": "js" } +``` + +### List Genkit Flows + +The `list_flows` tool allows you to discover all defined Genkit flows in your project and inspect their input schemas. + +**Example:** + +``` +@genkit:list_flows {} +``` + +This will return a list of flows with their descriptions and input schemas, similar to: + +``` +- Flow name: recipeGeneratorFlow + Input schema: {"type":"object","properties":{"ingredient":{"type":"string"},"dietaryRestrictions":{"type":"string"}},"required":["ingredient","dietaryRestrictions"]} +``` + +### Run Genkit Flows + +You can execute a specific Genkit flow using the `run_flow` tool. You'll need to provide the `flowName` and any required `input` as a JSON string conforming to the flow's input schema. + +**Example:** +To run a `recipeGeneratorFlow` with specific ingredients and dietary restrictions: + +``` +@genkit:run_flow { "flowName": "recipeGeneratorFlow", "input": "{\"ingredient\": \"avocado\", \"dietaryRestrictions\": \"vegetarian\"}" } +``` + +The output will be the result of the flow execution, for example: + +```json +{ + "cookTime": "5 minutes", + "description": "A quick and easy vegetarian recipe featuring creamy avocado.", + "ingredients": [ + "1 ripe avocado", + "1/4 cup chopped red onion", + "1/4 cup chopped cilantro", + "1 tablespoon lime juice", + "1/4 teaspoon salt", + "1/4 teaspoon black pepper" + ], + "instructions": [ + "Halve the avocado and remove the pit.", + "Scoop the avocado flesh into a bowl.", + "Add the red onion, cilantro, lime juice, salt, and pepper.", + "Mash everything together with a fork until it is mostly smooth but still has some chunks.", + "Stir in the red onion, cilantro, lime juice, salt, and pepper.", + "Serve immediately with tortilla chips or as a topping for tacos or salads." + ], + "prepTime": "5 minutes", + "servings": 1, + "title": "Simple Avocado Mash", + "tips": [ + "For a spicier dish, add a pinch of cayenne pepper.", + "If you don't have fresh cilantro, you can use parsley instead." + ] +} +``` + +### Get Trace Details + +After running a flow, you can retrieve its detailed execution trace using the `get_trace` tool and the `traceId` returned from the flow execution. + +**Example:** + +``` +@genkit:get_trace { "traceId": "ecf38e20f418b2964f7ab472b799" } +``` + +The output will provide a breakdown of the trace, including details about each span, such as input, output, and execution time. + +## Local Development and Documentation Bundle + +The Genkit MCP Server includes a pre-built documentation bundle. If you need to update this bundle or work with custom documentation, the server can download and serve an experimental bundle from `http://genkit.dev/docs-bundle-experimental.json`. + +The documentation bundle is stored locally in `~/.genkit/docs//bundle.json`. + +## Next steps + +- Learn about [developer tools](/unified-docs/developer-tools) for more ways to work with Genkit locally +- Explore [creating flows](/unified-docs/creating-flows) to build flows that can be accessed through the MCP server +- See [observability and monitoring](/unified-docs/observability-monitoring) to understand how traces work in Genkit diff --git a/src/content/docs/unified-docs/multi-agent-systems.mdx b/src/content/docs/unified-docs/multi-agent-systems.mdx new file mode 100644 index 00000000..52d2c22e --- /dev/null +++ b/src/content/docs/unified-docs/multi-agent-systems.mdx @@ -0,0 +1,698 @@ +--- +title: Building multi-agent systems +description: Learn how to build multi-agent systems in Genkit by delegating tasks to specialized agents, addressing challenges of complex agentic workflows across different languages. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +:::caution[Beta] +This feature of Genkit is in **Beta,** which means it is not yet part of Genkit's stable API. APIs of beta features may change in minor version releases. +::: + +A powerful application of large language models are LLM-powered agents. An agent +is a system that can carry out complex tasks by planning how to break tasks into +smaller ones, and (with the help of [tool calling](/unified-docs/tool-calling)) execute tasks +that interact with external resources such as databases or even physical +devices. + +Multi-agent systems take this concept further by using specialized agents that can delegate tasks to each other, creating more sophisticated and maintainable AI workflows. + +## Availability and Approach + + + + JavaScript provides built-in multi-agent system support through Genkit's prompt-as-tool architecture. You can define specialized agents as prompts and use them as tools in other agents, creating hierarchical delegation patterns. + + Features include: + - Prompt-based agent definitions + - Automatic tool delegation between agents + - Specialized agent contexts and capabilities + - Integration with chat sessions and flows + - Built-in orchestration patterns + + + Go doesn't have built-in multi-agent system APIs. You need to implement agent coordination manually by: + - Creating separate functions or flows for each agent + - Implementing your own delegation logic + - Managing agent state and context manually + - Building coordination patterns using flows and tools + + + Python doesn't have built-in multi-agent system APIs. You need to implement agent coordination manually by: + - Creating separate functions or flows for each agent + - Implementing your own delegation logic + - Managing agent state and context manually + - Building coordination patterns using flows and tools + + + +## Why use multi-agent systems? + +As you build more complex AI applications, you start to run into some problems with single-agent architectures: + +- **Tool overload**: The more tools you add, the more you stretch the model's ability to consistently and correctly employ the right tool for the job. +- **Context switching**: Some tasks might best be served through a more focused back and forth between the user and the agent, rather than by a single tool call. +- **Specialized behavior**: Some tasks might benefit from a specialized prompt. For example, if your agent is responding to an unhappy customer, you might want its tone to be more business-like, whereas the agent that greets the customer initially can have a more friendly and lighthearted tone. + +Multi-agent systems address these issues by creating specialized agents that can delegate tasks to each other. + +## Single agent example + +Let's start with a simple customer service agent to understand the progression to multi-agent systems: + + + + Here are some excerpts from a very simple customer service agent built using a single prompt and several tools: + + ```typescript + const menuLookupTool = ai.defineTool( + { + name: 'menuLookupTool', + description: 'use this tool to look up the menu for a given date', + inputSchema: z.object({ + date: z.string().describe('the date to look up the menu for'), + }), + outputSchema: z.string().describe('the menu for a given date'), + }, + async (input) => { + // Retrieve the menu from a database, website, etc. + // ... + }, + ); + + const reservationTool = ai.defineTool( + { + name: 'reservationTool', + description: 'use this tool to try to book a reservation', + inputSchema: z.object({ + partySize: z.coerce.number().describe('the number of guests'), + date: z.string().describe('the date to book for'), + }), + outputSchema: z + .string() + .describe( + "true if the reservation was successfully booked and false if there's" + + ' no table available for the requested time', + ), + }, + async (input) => { + // Access your database to try to make the reservation. + // ... + }, + ); + + const chat = ai.chat({ + model: googleAI.model('gemini-2.5-flash'), + system: + "You are an AI customer service agent for Pavel's Cafe. Use the tools " + + 'available to you to help the customer. If you cannot help the ' + + 'customer with the available tools, politely explain so.', + tools: [menuLookupTool, reservationTool], + }); + ``` + + + In Go, you would implement a single agent using flows and tools: + + ```go + func menuLookupTool(ctx context.Context, input MenuLookupInput) (string, error) { + // Retrieve the menu from a database, website, etc. + return getMenuForDate(input.Date) + } + + func reservationTool(ctx context.Context, input ReservationInput) (string, error) { + // Access your database to try to make the reservation + success, err := makeReservation(input.PartySize, input.Date) + if err != nil { + return "", err + } + if success { + return "true", nil + } + return "false", nil + } + + func customerServiceAgent(ctx context.Context, userInput string) (string, error) { + // Build a prompt that includes available tools and user input + prompt := fmt.Sprintf(`You are an AI customer service agent for Pavel's Cafe. + Available tools: menuLookup, makeReservation + User: %s + + How can I help you today?`, userInput) + + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt(prompt), + ai.WithModelName("googleai/gemini-2.5-flash"), + ai.WithTools(menuLookupTool, reservationTool), + ) + if err != nil { + return "", err + } + + return resp.Text(), nil + } + ``` + + + In Python, you would implement a single agent using flows and tools: + + ```python + @ai.tool() + def menu_lookup_tool(date: str, ctx) -> str: + """Use this tool to look up the menu for a given date""" + # Retrieve the menu from a database, website, etc. + return get_menu_for_date(date) + + @ai.tool() + def reservation_tool(party_size: int, date: str, ctx) -> str: + """Use this tool to try to book a reservation""" + # Access your database to try to make the reservation + success = make_reservation(party_size, date) + return "true" if success else "false" + + @ai.flow() + async def customer_service_agent(user_input: str, ctx): + """AI customer service agent for Pavel's Cafe""" + + response = await ai.generate( + prompt=f"""You are an AI customer service agent for Pavel's Cafe. + Use the tools available to you to help the customer. + + User: {user_input}""", + tools=['menu_lookup_tool', 'reservation_tool'], + model=google_genai_name('gemini-2.5-flash'), + ) + + return response.text + ``` + + + +## Multi-agent architecture + + + + One approach you can use to deal with the issues that arise when building + complex agents is to create many specialized agents and use a general purpose + agent to delegate tasks to them. Genkit supports this architecture by allowing + you to specify prompts as tools. Each prompt represents a single specialized + agent, with its own set of tools available to it, and those agents are in turn + available as tools to your single orchestration agent, which is the primary + interface with the user. + + Here's what an expanded version of the previous example might look like as a + multi-agent system: + + ```typescript + // Define a prompt that represents a specialist agent + const reservationAgent = ai.definePrompt({ + name: 'reservationAgent', + description: 'Reservation Agent can help manage guest reservations', + tools: [reservationTool, reservationCancelationTool, reservationListTool], + system: 'Help guests make and manage reservations', + }); + + // Or load agents from .prompt files + const menuInfoAgent = ai.prompt('menuInfoAgent'); + const complaintAgent = ai.prompt('complaintAgent'); + + // The triage agent is the agent that users interact with initially + const triageAgent = ai.definePrompt({ + name: 'triageAgent', + description: 'Triage Agent', + tools: [reservationAgent, menuInfoAgent, complaintAgent], + system: `You are an AI customer service agent for Pavel's Cafe. + Greet the user and ask them how you can help. If appropriate, transfer to an + agent that can better handle the request. If you cannot help the customer with + the available tools, politely explain so.`, + }); + + // Start a chat session, initially with the triage agent + const chat = ai.chat(triageAgent); + ``` + + + In Go, you can implement multi-agent patterns by creating separate flows for each agent and implementing delegation logic: + + ```go + // Define specialized agent functions + func reservationAgent(ctx context.Context, userInput string) (string, error) { + prompt := fmt.Sprintf(`You are a reservation specialist for Pavel's Cafe. + Help guests make and manage reservations. + + User: %s`, userInput) + + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt(prompt), + ai.WithModelName("googleai/gemini-2.5-flash"), + ai.WithTools(reservationTool, reservationCancelationTool, reservationListTool), + ) + if err != nil { + return "", err + } + + return resp.Text(), nil + } + + func menuInfoAgent(ctx context.Context, userInput string) (string, error) { + prompt := fmt.Sprintf(`You are a menu information specialist for Pavel's Cafe. + Help guests with menu questions and dietary information. + + User: %s`, userInput) + + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt(prompt), + ai.WithModelName("googleai/gemini-2.5-flash"), + ai.WithTools(menuLookupTool, allergyInfoTool), + ) + if err != nil { + return "", err + } + + return resp.Text(), nil + } + + func complaintAgent(ctx context.Context, userInput string) (string, error) { + prompt := fmt.Sprintf(`You are a customer service specialist for Pavel's Cafe. + Handle customer complaints with empathy and professionalism. + + User: %s`, userInput) + + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt(prompt), + ai.WithModelName("googleai/gemini-2.5-flash"), + ai.WithTools(refundTool, managerContactTool), + ) + if err != nil { + return "", err + } + + return resp.Text(), nil + } + + // Triage agent that delegates to specialists + func triageAgent(ctx context.Context, userInput string) (string, error) { + // First, determine which agent should handle this request + classificationPrompt := fmt.Sprintf(`Classify this customer request: + User: %s + + Categories: + - reservation: booking, canceling, or modifying reservations + - menu: questions about food, ingredients, or menu items + - complaint: issues, problems, or complaints + - general: greeting or general questions + + Respond with just the category name.`, userInput) + + classResp, err := genkit.Generate(ctx, g, + ai.WithPrompt(classificationPrompt), + ai.WithModelName("googleai/gemini-2.5-flash"), + ) + if err != nil { + return "", err + } + + category := strings.ToLower(strings.TrimSpace(classResp.Text())) + + // Delegate to the appropriate specialist agent + switch category { + case "reservation": + return reservationAgent(ctx, userInput) + case "menu": + return menuInfoAgent(ctx, userInput) + case "complaint": + return complaintAgent(ctx, userInput) + default: + // Handle general queries directly + return generalGreeting(ctx, userInput) + } + } + ``` + + + In Python, you can implement multi-agent patterns by creating separate flows for each agent: + + ```python + @ai.flow() + async def reservation_agent(user_input: str, ctx): + """Reservation specialist for Pavel's Cafe""" + response = await ai.generate( + prompt=f"""You are a reservation specialist for Pavel's Cafe. + Help guests make and manage reservations. + + User: {user_input}""", + tools=['reservation_tool', 'reservation_cancelation_tool', 'reservation_list_tool'], + model=google_genai_name('gemini-2.5-flash'), + ) + return response.text + + @ai.flow() + async def menu_info_agent(user_input: str, ctx): + """Menu information specialist for Pavel's Cafe""" + response = await ai.generate( + prompt=f"""You are a menu information specialist for Pavel's Cafe. + Help guests with menu questions and dietary information. + + User: {user_input}""", + tools=['menu_lookup_tool', 'allergy_info_tool'], + model=google_genai_name('gemini-2.5-flash'), + ) + return response.text + + @ai.flow() + async def complaint_agent(user_input: str, ctx): + """Customer service specialist for Pavel's Cafe""" + response = await ai.generate( + prompt=f"""You are a customer service specialist for Pavel's Cafe. + Handle customer complaints with empathy and professionalism. + + User: {user_input}""", + tools=['refund_tool', 'manager_contact_tool'], + model=google_genai_name('gemini-2.5-flash'), + ) + return response.text + + @ai.flow() + async def triage_agent(user_input: str, ctx): + """Triage agent that delegates to specialists""" + + # First, classify the request + classification_response = await ai.generate( + prompt=f"""Classify this customer request: + User: {user_input} + + Categories: + - reservation: booking, canceling, or modifying reservations + - menu: questions about food, ingredients, or menu items + - complaint: issues, problems, or complaints + - general: greeting or general questions + + Respond with just the category name.""", + model=google_genai_name('gemini-2.5-flash'), + ) + + category = classification_response.text.strip().lower() + + # Delegate to the appropriate specialist agent + if category == "reservation": + return await reservation_agent(user_input, ctx) + elif category == "menu": + return await menu_info_agent(user_input, ctx) + elif category == "complaint": + return await complaint_agent(user_input, ctx) + else: + # Handle general queries directly + return await general_greeting(user_input, ctx) + ``` + + + +## Agent coordination patterns + + + + ### Hierarchical delegation + + The most common pattern is hierarchical delegation, where a triage agent routes requests to specialized agents: + + ```typescript + const triageAgent = ai.definePrompt({ + name: 'triageAgent', + description: 'Routes customer requests to appropriate specialists', + tools: [reservationAgent, menuAgent, supportAgent], + system: `You are a customer service triage agent. + Analyze the customer's request and delegate to the most appropriate specialist. + Always explain to the customer which specialist you're connecting them with.`, + }); + ``` + + ### Collaborative agents + + Agents can also work together on complex tasks: + + ```typescript + const researchAgent = ai.definePrompt({ + name: 'researchAgent', + description: 'Researches information for complex queries', + tools: [webSearchTool, databaseQueryTool], + system: 'Research and gather information to answer complex questions.', + }); + + const analysisAgent = ai.definePrompt({ + name: 'analysisAgent', + description: 'Analyzes research data and provides insights', + tools: [researchAgent, calculatorTool], + system: 'Analyze research data and provide clear, actionable insights.', + }); + ``` + + ### Sequential workflows + + You can create agents that work in sequence: + + ```typescript + const orderProcessingFlow = ai.defineFlow({ + name: 'orderProcessingFlow', + inputSchema: z.object({ order: z.string() }), + outputSchema: z.string(), + }, async (input) => { + // Step 1: Validate order + const validation = await validationAgent({ order: input.order }); + + if (!validation.isValid) { + return validation.errorMessage; + } + + // Step 2: Process payment + const payment = await paymentAgent({ + order: input.order, + validatedData: validation.data + }); + + // Step 3: Fulfill order + const fulfillment = await fulfillmentAgent({ + order: input.order, + paymentConfirmation: payment.confirmation + }); + + return fulfillment.trackingNumber; + }); + ``` + + + ### Hierarchical delegation + + Implement delegation through function routing: + + ```go + type AgentRouter struct { + agents map[string]func(context.Context, string) (string, error) + } + + func NewAgentRouter() *AgentRouter { + return &AgentRouter{ + agents: map[string]func(context.Context, string) (string, error){ + "reservation": reservationAgent, + "menu": menuInfoAgent, + "complaint": complaintAgent, + }, + } + } + + func (r *AgentRouter) Route(ctx context.Context, category, userInput string) (string, error) { + agent, exists := r.agents[category] + if !exists { + return "I'm sorry, I don't have a specialist for that type of request.", nil + } + + return agent(ctx, userInput) + } + ``` + + ### Sequential workflows + + Use flows to coordinate multiple agents: + + ```go + func orderProcessingFlow(ctx context.Context, order OrderInput) (string, error) { + // Step 1: Validate order + validation, err := validationAgent(ctx, order.Details) + if err != nil { + return "", err + } + + if !validation.IsValid { + return validation.ErrorMessage, nil + } + + // Step 2: Process payment + payment, err := paymentAgent(ctx, PaymentInput{ + Order: order.Details, + ValidatedData: validation.Data, + }) + if err != nil { + return "", err + } + + // Step 3: Fulfill order + fulfillment, err := fulfillmentAgent(ctx, FulfillmentInput{ + Order: order.Details, + PaymentConfirmation: payment.Confirmation, + }) + if err != nil { + return "", err + } + + return fulfillment.TrackingNumber, nil + } + ``` + + + ### Hierarchical delegation + + Implement delegation through flow routing: + + ```python + class AgentRouter: + def __init__(self): + self.agents = { + "reservation": reservation_agent, + "menu": menu_info_agent, + "complaint": complaint_agent, + } + + async def route(self, category: str, user_input: str, ctx): + agent = self.agents.get(category) + if not agent: + return "I'm sorry, I don't have a specialist for that type of request." + + return await agent(user_input, ctx) + + router = AgentRouter() + + @ai.flow() + async def triage_with_routing(user_input: str, ctx): + # Classify the request + classification = await classify_request(user_input, ctx) + + # Route to appropriate agent + return await router.route(classification, user_input, ctx) + ``` + + ### Sequential workflows + + Use flows to coordinate multiple agents: + + ```python + @ai.flow() + async def order_processing_flow(order_details: str, ctx): + # Step 1: Validate order + validation = await validation_agent(order_details, ctx) + + if not validation.get('is_valid'): + return validation.get('error_message') + + # Step 2: Process payment + payment = await payment_agent({ + 'order': order_details, + 'validated_data': validation.get('data') + }, ctx) + + # Step 3: Fulfill order + fulfillment = await fulfillment_agent({ + 'order': order_details, + 'payment_confirmation': payment.get('confirmation') + }, ctx) + + return fulfillment.get('tracking_number') + ``` + + + +## Best practices + +### Agent design principles + + + + - **Single responsibility**: Each agent should have a clear, focused purpose + - **Clear interfaces**: Define clear input/output schemas for agent communication + - **Graceful delegation**: Always explain to users when transferring between agents + - **Error handling**: Implement fallback strategies when specialist agents fail + - **Context preservation**: Maintain conversation context across agent transfers + + + - **Modular design**: Keep agent functions focused and composable + - **Error propagation**: Handle errors gracefully and provide meaningful feedback + - **State management**: Carefully manage state between agent calls + - **Resource efficiency**: Avoid unnecessary agent calls through smart routing + - **Testing**: Test individual agents and coordination logic separately + + + - **Flow composition**: Use flows to create reusable agent patterns + - **Type safety**: Use proper type hints for agent inputs and outputs + - **Async patterns**: Leverage async/await for efficient agent coordination + - **Error handling**: Implement comprehensive error handling and recovery + - **Monitoring**: Add logging and metrics to track agent performance + + + +### Performance considerations + + + + - **Minimize agent hops**: Avoid unnecessary delegation chains + - **Cache agent responses**: Cache responses for repeated queries + - **Parallel execution**: Use Promise.all() for independent agent calls + - **Context size**: Keep agent contexts focused to reduce token usage + - **Tool selection**: Provide only relevant tools to each agent + + + - **Concurrent execution**: Use goroutines for parallel agent processing + - **Connection pooling**: Reuse connections for agent communications + - **Memory management**: Be mindful of memory usage in long-running agent systems + - **Timeout handling**: Implement timeouts for agent calls + - **Resource limits**: Set appropriate limits on agent execution + + + - **Async coordination**: Use asyncio for efficient agent orchestration + - **Resource pooling**: Pool expensive resources across agents + - **Memory optimization**: Monitor memory usage in complex agent workflows + - **Caching strategies**: Implement intelligent caching for agent responses + - **Load balancing**: Distribute agent workloads appropriately + + + +### Security and safety + + + + - **Agent isolation**: Ensure agents can only access their designated tools + - **Input validation**: Validate all inputs before passing between agents + - **Permission boundaries**: Define clear permission boundaries for each agent + - **Audit trails**: Log agent interactions for debugging and compliance + - **Rate limiting**: Implement rate limiting to prevent agent abuse + + + - **Access control**: Implement proper access controls for agent functions + - **Input sanitization**: Sanitize all inputs to prevent injection attacks + - **Resource limits**: Set limits on agent resource consumption + - **Logging**: Implement comprehensive logging for agent activities + - **Validation**: Validate agent outputs before using them + + + - **Flow security**: Secure flow execution and data passing + - **Input validation**: Validate all agent inputs and outputs + - **Access patterns**: Control access to sensitive operations + - **Monitoring**: Monitor agent behavior for anomalies + - **Sandboxing**: Consider sandboxing for untrusted agent code + + + +## Next steps + +- Learn about [tool calling](/unified-docs/tool-calling) to understand how agents interact with external systems +- Explore [chat sessions](/unified-docs/chat-sessions) to build conversational multi-agent experiences +- See [creating flows](/unified-docs/creating-flows) for building complex agent workflows +- Check out [context](/unified-docs/context) for managing information flow between agents +- Review [developer tools](/unified-docs/developer-tools) for testing and debugging multi-agent systems diff --git a/src/content/docs/unified-docs/observability-monitoring.mdx b/src/content/docs/unified-docs/observability-monitoring.mdx new file mode 100644 index 00000000..f4585667 --- /dev/null +++ b/src/content/docs/unified-docs/observability-monitoring.mdx @@ -0,0 +1,529 @@ +--- +title: Observability and monitoring +description: Learn about Genkit's observability features, including tracing, metrics collection, logging, and monitoring capabilities across JavaScript, Go, and Python. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +Genkit provides robust observability features to help you understand and debug your AI applications. These capabilities include tracing, metrics collection, logging, and monitoring, with different levels of support across languages. + +## Availability and Approach + + + + JavaScript provides comprehensive observability features including: + - Automatic OpenTelemetry tracing and metrics + - Built-in Developer UI for trace inspection + - Centralized logging system with configurable levels + - Firebase Genkit Monitoring integration + - Export to custom observability tools + + + Go provides core observability features including: + - Full OpenTelemetry instrumentation + - Developer UI trace inspection + - Google Cloud Operations Suite integration + - Trace store for development debugging + - Custom telemetry export capabilities + + + Python has basic observability features: + - Limited OpenTelemetry support + - Basic logging capabilities + - Framework-specific monitoring (e.g., Flask integration) + - Manual instrumentation may be required + + + +## Automatic tracing and metrics + + + + Genkit automatically collects traces and metrics without requiring explicit configuration, allowing you to observe and debug your Genkit code's behavior in the Developer UI. Genkit stores these traces, enabling you to analyze your Genkit flows step-by-step with detailed input/output logging and statistics. + + ```typescript + import { genkit } from 'genkit'; + import { googleAI } from '@genkit-ai/googleai'; + + const ai = genkit({ + plugins: [googleAI()], + // Tracing is automatically enabled + }); + + // All flows, tools, and generation calls are automatically traced + const myFlow = ai.defineFlow({ + name: 'myFlow', + inputSchema: z.string(), + outputSchema: z.string(), + }, async (input) => { + // This entire flow execution will be traced + const result = await ai.generate({ + prompt: `Process this: ${input}`, + }); + return result.text; + }); + ``` + + In production, Genkit can export traces and metrics to Firebase Genkit Monitoring for further analysis. + + + Genkit is fully instrumented with [OpenTelemetry](https://opentelemetry.io/) and provides hooks to export telemetry data. The trace store feature lets you inspect your traces for your flow runs in the Genkit Developer UI. + + ```go + import ( + "context" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/googlecloud" + ) + + func main() { + ctx := context.Background() + + // Initialize with Google Cloud plugin for telemetry export + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlecloud.GoogleCloud{}), + ) + if err != nil { + panic(err) + } + + // All flows and generation calls are automatically traced + flow := genkit.DefineFlow(g, "myFlow", + func(ctx context.Context, input string) (string, error) { + // This entire flow execution will be traced + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("Process this: " + input), + ) + if err != nil { + return "", err + } + return resp.Text(), nil + }, + ) + } + ``` + + This feature is enabled whenever you run a Genkit flow in a dev environment (such as when using `genkit start` or `genkit flow:run`). + + + Python has limited built-in observability features. You may need to implement custom tracing and monitoring: + + ```python + import logging + from genkit.ai import Genkit + from genkit.plugins.google_genai import GoogleGenai + + # Configure basic logging + logging.basicConfig(level=logging.INFO) + logger = logging.getLogger(__name__) + + ai = Genkit(plugins=[GoogleGenai()]) + + @ai.flow() + async def my_flow(input_text: str, ctx): + # Add manual logging for observability + logger.info(f"Processing input: {input_text}") + + try: + result = await ai.generate( + prompt=f"Process this: {input_text}", + ) + logger.info("Generation completed successfully") + return result.text + except Exception as e: + logger.error(f"Generation failed: {e}") + raise + ``` + + For more advanced observability, consider integrating with external monitoring tools or using framework-specific monitoring solutions. + + + +## Logging + + + + Genkit provides a centralized logging system that you can configure using the logging module. One advantage of using the Genkit-provided logger is that it automatically exports logs to Genkit Monitoring when the Firebase Telemetry plugin is enabled. + + ```typescript + import { logger } from 'genkit/logging'; + + // Set the desired log level + logger.setLogLevel('debug'); + + // Use the logger in your flows + const myFlow = ai.defineFlow({ + name: 'myFlow', + inputSchema: z.string(), + outputSchema: z.string(), + }, async (input) => { + logger.info('Starting flow execution', { input }); + + try { + const result = await ai.generate({ + prompt: `Process this: ${input}`, + }); + + logger.info('Flow completed successfully', { + inputLength: input.length, + outputLength: result.text.length + }); + + return result.text; + } catch (error) { + logger.error('Flow execution failed', { error, input }); + throw error; + } + }); + ``` + + Available log levels: `debug`, `info`, `warn`, `error` + + + In Go, you can use the standard logging package or structured logging libraries: + + ```go + import ( + "log" + "context" + "github.com/firebase/genkit/go/genkit" + ) + + func myFlow(ctx context.Context, input string) (string, error) { + log.Printf("Starting flow execution with input: %s", input) + + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("Process this: " + input), + ) + if err != nil { + log.Printf("Flow execution failed: %v", err) + return "", err + } + + log.Printf("Flow completed successfully, output length: %d", len(resp.Text())) + return resp.Text(), nil + } + ``` + + For structured logging, consider using libraries like `logrus` or `zap`: + + ```go + import ( + "github.com/sirupsen/logrus" + ) + + func myFlowWithStructuredLogging(ctx context.Context, input string) (string, error) { + logrus.WithFields(logrus.Fields{ + "input_length": len(input), + "flow_name": "myFlow", + }).Info("Starting flow execution") + + // ... flow logic + + logrus.WithFields(logrus.Fields{ + "output_length": len(result), + "success": true, + }).Info("Flow completed") + + return result, nil + } + ``` + + + Use Python's standard logging module or integrate with your preferred logging framework: + + ```python + import logging + import structlog + + # Standard logging + logger = logging.getLogger(__name__) + + @ai.flow() + async def my_flow(input_text: str, ctx): + logger.info("Starting flow execution", extra={ + "input_length": len(input_text), + "flow_name": "my_flow" + }) + + try: + result = await ai.generate( + prompt=f"Process this: {input_text}", + ) + + logger.info("Flow completed successfully", extra={ + "input_length": len(input_text), + "output_length": len(result.text) + }) + + return result.text + except Exception as e: + logger.error("Flow execution failed", extra={ + "error": str(e), + "input_length": len(input_text) + }) + raise + + # Or use structured logging with structlog + struct_logger = structlog.get_logger() + + @ai.flow() + async def my_structured_flow(input_text: str, ctx): + struct_logger.info("flow_started", + input_length=len(input_text), + flow_name="my_structured_flow") + # ... flow logic + ``` + + + +## Developer UI integration + + + + The Genkit Developer UI provides detailed trace viewing and debugging capabilities. When you run your application with `genkit start`, you can: + + - View detailed traces of flow executions + - Inspect input/output at each step + - Analyze performance metrics + - Debug errors with full stack traces + - Monitor token usage and costs + + ```bash + # Start your app with the Developer UI + genkit start -- npm run dev + + # The Developer UI will be available at http://localhost:4000 + ``` + + All traces are automatically captured and displayed in the UI without additional configuration. + + + The Developer UI works the same way for Go applications. When you run your application in development mode, traces are automatically captured: + + ```bash + # Start your Go app with the Developer UI + genkit start -- go run main.go + + # The Developer UI will be available at http://localhost:4000 + ``` + + You can inspect: + - Flow execution traces + - Generation requests and responses + - Tool call details + - Performance metrics + - Error information + + + Python applications can also use the Developer UI, though with potentially limited trace details: + + ```bash + # Start your Python app with the Developer UI + genkit start -- python main.py + + # The Developer UI will be available at http://localhost:4000 + ``` + + The level of trace detail may vary depending on the Python implementation and available instrumentation. + + + +## Production monitoring + + + + For production environments, Genkit integrates with Firebase Genkit Monitoring and other observability platforms: + + ### Firebase Genkit Monitoring + + The [Genkit Monitoring](https://console.firebase.google.com/project/_/genai_monitoring) dashboard helps you understand the overall health of your Genkit features. It is useful for debugging stability and content issues that may indicate problems with your LLM prompts and/or Genkit Flows. + + ```typescript + import { genkit } from 'genkit'; + import { firebase } from '@genkit-ai/firebase'; + + const ai = genkit({ + plugins: [ + firebase({ + // Enables telemetry export to Firebase + telemetry: true, + }), + ], + }); + ``` + + ### Custom observability platforms + + You can export OpenTelemetry data to other platforms: + + ```typescript + import { genkit } from 'genkit'; + import { opentelemetry } from '@genkit-ai/opentelemetry'; + + const ai = genkit({ + plugins: [ + opentelemetry({ + endpoint: 'https://your-otel-collector.com', + headers: { + 'Authorization': 'Bearer your-token', + }, + }), + ], + }); + ``` + + + For production monitoring, use the Google Cloud plugin to export telemetry to Cloud's operations suite: + + ```go + import ( + "github.com/firebase/genkit/go/plugins/googlecloud" + ) + + func main() { + ctx := context.Background() + + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlecloud.GoogleCloud{ + // Configure telemetry export + ProjectID: "your-project-id", + }), + ) + if err != nil { + panic(err) + } + + // Your flows will now export telemetry to Google Cloud + } + ``` + + This provides: + - Distributed tracing in Cloud Trace + - Metrics in Cloud Monitoring + - Logs in Cloud Logging + - Error reporting in Cloud Error Reporting + + + For production monitoring in Python, you'll typically need to integrate with external monitoring solutions: + + ```python + import logging + from opentelemetry import trace + from opentelemetry.exporter.cloud_trace import CloudTraceSpanExporter + from opentelemetry.sdk.trace import TracerProvider + from opentelemetry.sdk.trace.export import BatchSpanProcessor + + # Configure OpenTelemetry for Google Cloud + trace.set_tracer_provider(TracerProvider()) + tracer = trace.get_tracer(__name__) + + cloud_trace_exporter = CloudTraceSpanExporter() + span_processor = BatchSpanProcessor(cloud_trace_exporter) + trace.get_tracer_provider().add_span_processor(span_processor) + + @ai.flow() + async def monitored_flow(input_text: str, ctx): + with tracer.start_as_current_span("my_flow") as span: + span.set_attribute("input.length", len(input_text)) + + try: + result = await ai.generate( + prompt=f"Process this: {input_text}", + ) + span.set_attribute("output.length", len(result.text)) + return result.text + except Exception as e: + span.record_exception(e) + span.set_status(trace.Status(trace.StatusCode.ERROR)) + raise + ``` + + + +## Best practices + +### Development monitoring + + + + - Use the Developer UI for real-time debugging + - Set appropriate log levels for development vs production + - Monitor token usage and costs during development + - Use trace inspection to optimize flow performance + - Test error scenarios and review error traces + + + - Leverage the Developer UI for flow debugging + - Use structured logging for better searchability + - Monitor goroutine usage in concurrent flows + - Profile memory usage for long-running applications + - Test error handling and recovery scenarios + + + - Implement comprehensive logging throughout your flows + - Use async-compatible logging libraries + - Monitor memory usage and garbage collection + - Test error scenarios thoroughly + - Consider using APM tools for detailed insights + + + +### Production monitoring + + + + - Enable Firebase Genkit Monitoring for comprehensive insights + - Set up alerts for error rates and latency thresholds + - Monitor token usage and costs + - Track flow success rates and performance metrics + - Implement custom metrics for business-specific KPIs + + + - Use Google Cloud Operations Suite for full observability + - Set up monitoring dashboards for key metrics + - Configure alerting for error rates and latency + - Monitor resource usage and scaling metrics + - Implement health checks and readiness probes + + + - Integrate with your organization's monitoring stack + - Set up comprehensive error tracking + - Monitor application performance and resource usage + - Implement custom metrics collection + - Use distributed tracing for complex workflows + + + +### Security and privacy + + + + - Avoid logging sensitive data in traces + - Use log level filtering to control information exposure + - Implement proper access controls for monitoring dashboards + - Consider data retention policies for traces and logs + - Sanitize user inputs in log messages + + + - Implement log sanitization for sensitive data + - Use structured logging to control field exposure + - Configure proper IAM for monitoring resources + - Implement trace sampling for high-volume applications + - Consider data residency requirements + + + - Implement data sanitization in logging + - Use environment-specific log levels + - Secure monitoring endpoints and dashboards + - Consider GDPR and privacy requirements + - Implement proper error handling to avoid data leaks + + + +## Next steps + +- Learn about [developer tools](/unified-docs/developer-tools) for local development and debugging +- Explore [evaluation](/unified-docs/evaluation) to measure and improve your application quality +- See [creating flows](/unified-docs/creating-flows) to build observable AI workflows +- Check out [generating content](/unified-docs/generating-content) for understanding traced generation calls diff --git a/src/content/docs/unified-docs/plugin-authoring/models.mdx b/src/content/docs/unified-docs/plugin-authoring/models.mdx new file mode 100644 index 00000000..e39e7432 --- /dev/null +++ b/src/content/docs/unified-docs/plugin-authoring/models.mdx @@ -0,0 +1,1120 @@ +--- +title: Writing Model Plugins +description: Learn how to create Genkit model plugins across JavaScript, Go, and Python to integrate new generative AI models with comprehensive examples and best practices. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +Model plugins add generative AI models to the Genkit registry. A model represents any generative model capable of receiving a prompt as input and generating text, media, or data as output. This guide covers creating model plugins across all supported languages. + +## Model Plugin Architecture + +A model plugin consists of three main components: + +1. **Metadata**: Declares the model's capabilities (multiturn, media, tools, etc.) +2. **Configuration Schema**: Defines model-specific parameters and options +3. **Generation Function**: Transforms requests/responses between Genkit and the model API + +## Basic Model Plugin + + + + ```ts + import { Genkit, z } from 'genkit'; + import { GenkitPlugin, genkitPlugin } from 'genkit/plugin'; + import { GenerationCommonConfigSchema } from 'genkit/model'; + + // Define model configuration schema + const MyModelConfigSchema = GenerationCommonConfigSchema.extend({ + customParam: z.string().optional(), + temperature: z.number().min(0).max(2).default(0.7), + maxTokens: z.number().positive().default(1000), + }); + + type MyModelConfig = z.infer; + + interface MyPluginOptions { + apiKey?: string; + baseURL?: string; + } + + export function myModelPlugin(options?: MyPluginOptions): GenkitPlugin { + return genkitPlugin('myModel', async (ai: Genkit) => { + const apiKey = options?.apiKey || process.env.MY_MODEL_API_KEY; + const baseURL = options?.baseURL || 'https://api.mymodel.com'; + + if (!apiKey) { + throw new Error('API key required'); + } + + // Create API client + const client = new MyModelAPIClient({ apiKey, baseURL }); + + // Define the model + ai.defineModel({ + name: 'myModel/text-generator', + label: 'My Custom Text Generator', + versions: ['v1', 'latest'], + supports: { + multiturn: true, + media: false, + tools: true, + systemRole: true, + output: ['text', 'json'], + }, + configSchema: MyModelConfigSchema, + }, async (request) => { + // Transform Genkit request to API format + const apiRequest = await transformRequest(request, client); + + // Call the model API + const apiResponse = await client.generate(apiRequest); + + // Transform API response to Genkit format + return transformResponse(apiResponse); + }); + }); + } + + // Request transformation + async function transformRequest(request: any, client: any) { + const config = request.config as MyModelConfig; + + return { + messages: request.messages.map((msg: any) => ({ + role: msg.role, + content: msg.content.map((part: any) => part.text).join(''), + })), + temperature: config.temperature, + max_tokens: config.maxTokens, + custom_param: config.customParam, + }; + } + + // Response transformation + function transformResponse(apiResponse: any) { + return { + candidates: [{ + message: { + role: 'model', + content: [{ text: apiResponse.text }], + }, + finishReason: apiResponse.finish_reason || 'stop', + }], + usage: { + inputTokens: apiResponse.usage?.prompt_tokens || 0, + outputTokens: apiResponse.usage?.completion_tokens || 0, + totalTokens: apiResponse.usage?.total_tokens || 0, + }, + }; + } + ``` + + + ```go + package mymodelplugin + + import ( + "context" + "encoding/json" + "fmt" + "os" + + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + ) + + const ProviderID = "myModel" + + // MyModelConfig defines configuration options + type MyModelConfig struct { + ai.GenerationCommonConfig + CustomParam string `json:"customParam,omitempty"` + Temperature float64 `json:"temperature,omitempty"` + MaxTokens int `json:"maxTokens,omitempty"` + } + + // MyModelPlugin implements the genkit.Plugin interface + type MyModelPlugin struct { + APIKey string + BaseURL string + } + + func (p *MyModelPlugin) Name() string { + return ProviderID + } + + func (p *MyModelPlugin) Init(ctx context.Context, g *genkit.Genkit) error { + // Set defaults from environment + if p.APIKey == "" { + p.APIKey = os.Getenv("MY_MODEL_API_KEY") + } + if p.BaseURL == "" { + p.BaseURL = "https://api.mymodel.com" + } + + if p.APIKey == "" { + return fmt.Errorf("API key required") + } + + // Create API client + client := NewMyModelAPIClient(p.APIKey, p.BaseURL) + + // Define the model + err := g.DefineModel(ProviderID, "text-generator", + &ai.ModelInfo{ + Label: "My Custom Text Generator", + Supports: &ai.ModelSupports{ + Multiturn: true, + Media: false, + Tools: true, + SystemRole: true, + }, + Versions: []string{"v1", "latest"}, + }, + func(ctx context.Context, req *ai.ModelRequest, cb ai.ModelStreamCallback) (*ai.ModelResponse, error) { + // Parse configuration + var config MyModelConfig + if req.Config != nil { + if typedConfig, ok := req.Config.(*MyModelConfig); ok { + config = *typedConfig + } + } + + // Set defaults + if config.Temperature == 0 { + config.Temperature = 0.7 + } + if config.MaxTokens == 0 { + config.MaxTokens = 1000 + } + + // Transform request + apiRequest, err := transformRequest(req, config) + if err != nil { + return nil, fmt.Errorf("failed to transform request: %w", err) + } + + // Call API + apiResponse, err := client.Generate(ctx, apiRequest) + if err != nil { + return nil, fmt.Errorf("API call failed: %w", err) + } + + // Transform response + return transformResponse(apiResponse) + }, + ) + + return err + } + + // API client interface + type MyModelAPIClient struct { + APIKey string + BaseURL string + } + + func NewMyModelAPIClient(apiKey, baseURL string) *MyModelAPIClient { + return &MyModelAPIClient{ + APIKey: apiKey, + BaseURL: baseURL, + } + } + + func (c *MyModelAPIClient) Generate(ctx context.Context, req *APIRequest) (*APIResponse, error) { + // Implementation of API call + // This would make actual HTTP requests to your model API + return &APIResponse{ + Text: "Generated response", + FinishReason: "stop", + Usage: &Usage{ + PromptTokens: 10, + CompletionTokens: 20, + TotalTokens: 30, + }, + }, nil + } + + // API request/response types + type APIRequest struct { + Messages []Message `json:"messages"` + Temperature float64 `json:"temperature"` + MaxTokens int `json:"max_tokens"` + CustomParam string `json:"custom_param,omitempty"` + } + + type APIResponse struct { + Text string `json:"text"` + FinishReason string `json:"finish_reason"` + Usage *Usage `json:"usage,omitempty"` + } + + type Message struct { + Role string `json:"role"` + Content string `json:"content"` + } + + type Usage struct { + PromptTokens int `json:"prompt_tokens"` + CompletionTokens int `json:"completion_tokens"` + TotalTokens int `json:"total_tokens"` + } + + // Transform Genkit request to API format + func transformRequest(req *ai.ModelRequest, config MyModelConfig) (*APIRequest, error) { + var messages []Message + + for _, msg := range req.Messages { + content := "" + for _, part := range msg.Content { + if part.Text != "" { + content += part.Text + } + } + + messages = append(messages, Message{ + Role: string(msg.Role), + Content: content, + }) + } + + return &APIRequest{ + Messages: messages, + Temperature: config.Temperature, + MaxTokens: config.MaxTokens, + CustomParam: config.CustomParam, + }, nil + } + + // Transform API response to Genkit format + func transformResponse(apiResp *APIResponse) (*ai.ModelResponse, error) { + finishReason := ai.FinishReasonStop + if apiResp.FinishReason == "length" { + finishReason = ai.FinishReasonLength + } + + response := &ai.ModelResponse{ + Candidates: []*ai.Candidate{ + { + Message: &ai.Message{ + Content: []*ai.Part{ai.NewTextPart(apiResp.Text)}, + Role: ai.RoleModel, + }, + FinishReason: finishReason, + }, + }, + } + + if apiResp.Usage != nil { + response.Usage = &ai.Usage{ + InputTokens: apiResp.Usage.PromptTokens, + OutputTokens: apiResp.Usage.CompletionTokens, + TotalTokens: apiResp.Usage.TotalTokens, + } + } + + return response, nil + } + + // Helper functions for users + func Model(g *genkit.Genkit, name string) *ai.Model { + return genkit.LookupModel(g, ProviderID, name) + } + + func ModelRef(name string, config *MyModelConfig) *ai.ModelRef { + return ai.NewModelRef(fmt.Sprintf("%s/%s", ProviderID, name), config) + } + ``` + + + ```python + import os + import asyncio + from typing import Dict, Any, List, Optional, AsyncGenerator + from dataclasses import dataclass + from genkit.ai import Genkit + from genkit.plugins.base import Plugin + + @dataclass + class MyModelConfig: + """Configuration for MyModel""" + custom_param: Optional[str] = None + temperature: float = 0.7 + max_tokens: int = 1000 + top_p: float = 1.0 + frequency_penalty: float = 0.0 + + class MyModelPlugin(Plugin): + def __init__( + self, + api_key: Optional[str] = None, + base_url: Optional[str] = None, + **kwargs + ): + self.api_key = api_key or os.getenv("MY_MODEL_API_KEY") + self.base_url = base_url or "https://api.mymodel.com" + + if not self.api_key: + raise ValueError("API key required") + + super().__init__(provider_id="myModel", **kwargs) + + # Create API client + self.client = MyModelAPIClient(self.api_key, self.base_url) + + def initialize(self, ai: Genkit) -> None: + """Initialize the plugin and register models""" + + ai.define_model( + name=f"{self.provider_id}/text-generator", + config_schema={ + "type": "object", + "properties": { + "custom_param": {"type": "string"}, + "temperature": {"type": "number", "minimum": 0, "maximum": 2, "default": 0.7}, + "max_tokens": {"type": "integer", "minimum": 1, "default": 1000}, + "top_p": {"type": "number", "minimum": 0, "maximum": 1, "default": 1.0}, + "frequency_penalty": {"type": "number", "minimum": -2, "maximum": 2, "default": 0.0}, + }, + }, + supports={ + "multiturn": True, + "media": False, + "tools": True, + "system_role": True, + "output": ["text", "json"], + }, + generate_fn=self._generate_text, + stream_fn=self._stream_text, + ) + + async def _generate_text(self, request: Dict[str, Any]) -> Dict[str, Any]: + """Generate text using the model""" + + # Parse configuration + config = MyModelConfig(**request.get("config", {})) + + # Transform request + api_request = self._transform_request(request, config) + + # Call API + api_response = await self.client.generate(api_request) + + # Transform response + return self._transform_response(api_response) + + async def _stream_text(self, request: Dict[str, Any]) -> AsyncGenerator[Dict[str, Any], None]: + """Stream text generation""" + + config = MyModelConfig(**request.get("config", {})) + api_request = self._transform_request(request, config) + + async for chunk in self.client.stream_generate(api_request): + yield self._transform_stream_chunk(chunk) + + def _transform_request(self, request: Dict[str, Any], config: MyModelConfig) -> Dict[str, Any]: + """Transform Genkit request to API format""" + + messages = [] + for msg in request.get("messages", []): + content = "" + for part in msg.get("content", []): + if "text" in part: + content += part["text"] + + messages.append({ + "role": msg["role"], + "content": content, + }) + + return { + "messages": messages, + "temperature": config.temperature, + "max_tokens": config.max_tokens, + "top_p": config.top_p, + "frequency_penalty": config.frequency_penalty, + "custom_param": config.custom_param, + } + + def _transform_response(self, api_response: Dict[str, Any]) -> Dict[str, Any]: + """Transform API response to Genkit format""" + + return { + "candidates": [{ + "message": { + "role": "model", + "content": [{"text": api_response["text"]}], + }, + "finish_reason": api_response.get("finish_reason", "stop"), + }], + "usage": { + "input_tokens": api_response.get("usage", {}).get("prompt_tokens", 0), + "output_tokens": api_response.get("usage", {}).get("completion_tokens", 0), + "total_tokens": api_response.get("usage", {}).get("total_tokens", 0), + }, + } + + def _transform_stream_chunk(self, chunk: Dict[str, Any]) -> Dict[str, Any]: + """Transform streaming chunk to Genkit format""" + + return { + "candidates": [{ + "message": { + "role": "model", + "content": [{"text": chunk.get("delta", "")}], + }, + "finish_reason": chunk.get("finish_reason"), + }], + } + + class MyModelAPIClient: + """API client for MyModel service""" + + def __init__(self, api_key: str, base_url: str): + self.api_key = api_key + self.base_url = base_url + + async def generate(self, request: Dict[str, Any]) -> Dict[str, Any]: + """Generate text (non-streaming)""" + # Implementation would make actual HTTP request + # This is a mock response + return { + "text": "Generated response text", + "finish_reason": "stop", + "usage": { + "prompt_tokens": 10, + "completion_tokens": 20, + "total_tokens": 30, + }, + } + + async def stream_generate(self, request: Dict[str, Any]) -> AsyncGenerator[Dict[str, Any], None]: + """Generate text (streaming)""" + # Implementation would make actual streaming HTTP request + # This is a mock streaming response + words = ["Generated", " response", " text"] + for word in words: + yield { + "delta": word, + "finish_reason": None, + } + + yield { + "delta": "", + "finish_reason": "stop", + } + + # Helper functions for users + def create_model_reference(name: str, config: Optional[MyModelConfig] = None) -> str: + """Create a model reference for use in generate calls""" + return f"myModel/{name}" + ``` + + + +## Advanced Model Features + +### Supporting Tool Calling + + + + ```ts + // In your model definition + ai.defineModel({ + name: 'myModel/tool-capable', + supports: { + tools: true, // Enable tool calling support + // ... other capabilities + }, + configSchema: MyModelConfigSchema, + }, async (request) => { + const config = request.config as MyModelConfig; + + // Check if tools are provided + const tools = request.tools || []; + + const apiRequest = { + messages: transformMessages(request.messages), + tools: tools.map(tool => ({ + type: 'function', + function: { + name: tool.name, + description: tool.description, + parameters: tool.inputSchema, + }, + })), + tool_choice: request.toolChoice || 'auto', + temperature: config.temperature, + }; + + const apiResponse = await client.generate(apiRequest); + + return { + candidates: [{ + message: { + role: 'model', + content: apiResponse.content ? [{ text: apiResponse.content }] : [], + toolCalls: apiResponse.tool_calls?.map(call => ({ + id: call.id, + name: call.function.name, + args: JSON.parse(call.function.arguments), + })) || [], + }, + finishReason: apiResponse.finish_reason || 'stop', + }], + usage: apiResponse.usage, + }; + }); + ``` + + + ```go + // In your model generation function + func(ctx context.Context, req *ai.ModelRequest, cb ai.ModelStreamCallback) (*ai.ModelResponse, error) { + // Handle tools if provided + var apiTools []APITool + for _, tool := range req.Tools { + apiTools = append(apiTools, APITool{ + Type: "function", + Function: APIFunction{ + Name: tool.Name, + Description: tool.Description, + Parameters: tool.InputSchema, + }, + }) + } + + apiRequest := &APIRequest{ + Messages: transformMessages(req.Messages), + Tools: apiTools, + ToolChoice: req.ToolChoice, + Temperature: config.Temperature, + } + + apiResponse, err := client.Generate(ctx, apiRequest) + if err != nil { + return nil, err + } + + // Transform tool calls + var toolCalls []*ai.ToolCall + for _, call := range apiResponse.ToolCalls { + var args map[string]interface{} + if err := json.Unmarshal([]byte(call.Function.Arguments), &args); err != nil { + return nil, fmt.Errorf("failed to parse tool arguments: %w", err) + } + + toolCalls = append(toolCalls, &ai.ToolCall{ + ID: call.ID, + Name: call.Function.Name, + Args: args, + }) + } + + return &ai.ModelResponse{ + Candidates: []*ai.Candidate{ + { + Message: &ai.Message{ + Content: transformContent(apiResponse.Content), + Role: ai.RoleModel, + ToolCalls: toolCalls, + }, + FinishReason: transformFinishReason(apiResponse.FinishReason), + }, + }, + Usage: transformUsage(apiResponse.Usage), + }, nil + } + ``` + + + ```python + async def _generate_text(self, request: Dict[str, Any]) -> Dict[str, Any]: + """Generate text with tool calling support""" + + config = MyModelConfig(**request.get("config", {})) + + # Handle tools + tools = [] + for tool in request.get("tools", []): + tools.append({ + "type": "function", + "function": { + "name": tool["name"], + "description": tool["description"], + "parameters": tool["input_schema"], + }, + }) + + api_request = { + "messages": self._transform_messages(request.get("messages", [])), + "tools": tools, + "tool_choice": request.get("tool_choice", "auto"), + "temperature": config.temperature, + "max_tokens": config.max_tokens, + } + + api_response = await self.client.generate(api_request) + + # Transform tool calls + tool_calls = [] + for call in api_response.get("tool_calls", []): + tool_calls.append({ + "id": call["id"], + "name": call["function"]["name"], + "args": json.loads(call["function"]["arguments"]), + }) + + return { + "candidates": [{ + "message": { + "role": "model", + "content": [{"text": api_response.get("content", "")}] if api_response.get("content") else [], + "tool_calls": tool_calls, + }, + "finish_reason": api_response.get("finish_reason", "stop"), + }], + "usage": api_response.get("usage", {}), + } + ``` + + + +### Supporting Media Input + + + + ```ts + ai.defineModel({ + name: 'myModel/vision-model', + supports: { + media: true, // Enable media support + // ... other capabilities + }, + configSchema: MyModelConfigSchema, + }, async (request) => { + const messages = request.messages.map(msg => ({ + role: msg.role, + content: msg.content.map(part => { + if (part.text) { + return { type: 'text', text: part.text }; + } else if (part.media) { + return { + type: 'image_url', + image_url: { + url: part.media.url, + detail: 'auto', + }, + }; + } + return null; + }).filter(Boolean), + })); + + const apiResponse = await client.generate({ + messages, + temperature: request.config.temperature, + }); + + return transformResponse(apiResponse); + }); + ``` + + + ```go + // In your request transformation function + func transformMessages(messages []*ai.Message) []APIMessage { + var apiMessages []APIMessage + + for _, msg := range messages { + var content []APIContent + + for _, part := range msg.Content { + if part.Text != "" { + content = append(content, APIContent{ + Type: "text", + Text: part.Text, + }) + } else if part.Media != nil { + content = append(content, APIContent{ + Type: "image_url", + ImageURL: &APIImageURL{ + URL: part.Media.URL, + Detail: "auto", + }, + }) + } + } + + apiMessages = append(apiMessages, APIMessage{ + Role: string(msg.Role), + Content: content, + }) + } + + return apiMessages + } + + type APIContent struct { + Type string `json:"type"` + Text string `json:"text,omitempty"` + ImageURL *APIImageURL `json:"image_url,omitempty"` + } + + type APIImageURL struct { + URL string `json:"url"` + Detail string `json:"detail"` + } + ``` + + + ```python + def _transform_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Transform messages with media support""" + + api_messages = [] + for msg in messages: + content = [] + + for part in msg.get("content", []): + if "text" in part: + content.append({ + "type": "text", + "text": part["text"], + }) + elif "media" in part: + content.append({ + "type": "image_url", + "image_url": { + "url": part["media"]["url"], + "detail": "auto", + }, + }) + + api_messages.append({ + "role": msg["role"], + "content": content, + }) + + return api_messages + ``` + + + +## Best Practices + +### Error Handling + + + + ```ts + import { GenkitError } from 'genkit'; + + // In your generation function + try { + const apiResponse = await client.generate(apiRequest); + return transformResponse(apiResponse); + } catch (error) { + if (error.status === 429) { + throw new GenkitError({ + source: 'myModel', + status: 'RESOURCE_EXHAUSTED', + message: 'Rate limit exceeded', + }); + } else if (error.status === 401) { + throw new GenkitError({ + source: 'myModel', + status: 'UNAUTHENTICATED', + message: 'Invalid API key', + }); + } else { + throw new GenkitError({ + source: 'myModel', + status: 'INTERNAL', + message: `Model API error: ${error.message}`, + }); + } + } + ``` + + + ```go + import "google.golang.org/grpc/codes" + + // In your generation function + apiResponse, err := client.Generate(ctx, apiRequest) + if err != nil { + // Handle specific error types + if isRateLimitError(err) { + return nil, fmt.Errorf("rate limit exceeded: %w", err) + } else if isAuthError(err) { + return nil, fmt.Errorf("authentication failed: %w", err) + } else { + return nil, fmt.Errorf("model API error: %w", err) + } + } + + func isRateLimitError(err error) bool { + // Check if error indicates rate limiting + return strings.Contains(err.Error(), "rate limit") || + strings.Contains(err.Error(), "429") + } + + func isAuthError(err error) bool { + // Check if error indicates authentication failure + return strings.Contains(err.Error(), "unauthorized") || + strings.Contains(err.Error(), "401") + } + ``` + + + ```python + import httpx + from genkit.exceptions import GenkitError + + async def _generate_text(self, request: Dict[str, Any]) -> Dict[str, Any]: + try: + api_response = await self.client.generate(api_request) + return self._transform_response(api_response) + except httpx.HTTPStatusError as e: + if e.response.status_code == 429: + raise GenkitError( + source="myModel", + status="RESOURCE_EXHAUSTED", + message="Rate limit exceeded" + ) + elif e.response.status_code == 401: + raise GenkitError( + source="myModel", + status="UNAUTHENTICATED", + message="Invalid API key" + ) + else: + raise GenkitError( + source="myModel", + status="INTERNAL", + message=f"Model API error: {e.response.text}" + ) + except Exception as e: + raise GenkitError( + source="myModel", + status="INTERNAL", + message=f"Unexpected error: {str(e)}" + ) + ``` + + + +### Configuration Validation + + + + ```ts + const MyModelConfigSchema = GenerationCommonConfigSchema.extend({ + temperature: z.number().min(0).max(2).default(0.7), + maxTokens: z.number().positive().max(4096).default(1000), + topP: z.number().min(0).max(1).default(1.0), + customParam: z.string().optional(), + }); + + // In your generation function + const config = MyModelConfigSchema.parse(request.config || {}); + ``` + + + ```go + func validateConfig(config *MyModelConfig) error { + if config.Temperature < 0 || config.Temperature > 2 { + return fmt.Errorf("temperature must be between 0 and 2") + } + if config.MaxTokens <= 0 || config.MaxTokens > 4096 { + return fmt.Errorf("maxTokens must be between 1 and 4096") + } + return nil + } + + // In your generation function + if err := validateConfig(&config); err != nil { + return nil, fmt.Errorf("invalid configuration: %w", err) + } + ``` + + + ```python + from pydantic import BaseModel, Field, validator + + class MyModelConfig(BaseModel): + temperature: float = Field(default=0.7, ge=0, le=2) + max_tokens: int = Field(default=1000, gt=0, le=4096) + top_p: float = Field(default=1.0, ge=0, le=1) + + @validator('temperature') + def validate_temperature(cls, v): + if not 0 <= v <= 2: + raise ValueError('temperature must be between 0 and 2') + return v + ``` + + + +## Testing Your Model Plugin + +### Unit Testing + + + + ```ts + import { describe, it, expect, beforeEach } from 'vitest'; + import { genkit } from 'genkit'; + import { myModelPlugin } from './my-model-plugin'; + + describe('MyModel Plugin', () => { + let ai: any; + + beforeEach(async () => { + ai = genkit({ + plugins: [myModelPlugin({ apiKey: 'test-key' })], + }); + }); + + it('should generate text', async () => { + const response = await ai.generate({ + model: 'myModel/text-generator', + prompt: 'Hello, world!', + config: { temperature: 0.5 }, + }); + + expect(response.text).toBeDefined(); + expect(response.text.length).toBeGreaterThan(0); + }); + + it('should handle tool calls', async () => { + const response = await ai.generate({ + model: 'myModel/tool-capable', + prompt: 'What is the weather like?', + tools: [{ + name: 'get_weather', + description: 'Get current weather', + inputSchema: { + type: 'object', + properties: { + location: { type: 'string' }, + }, + }, + }], + }); + + expect(response.toolCalls).toBeDefined(); + }); + }); + ``` + + + ```go + package mymodelplugin_test + + import ( + "context" + "testing" + + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + ) + + func TestMyModelPlugin(t *testing.T) { + ctx := context.Background() + + // Initialize Genkit with the plugin + g, err := genkit.Init(ctx, + genkit.WithPlugins( + &MyModelPlugin{ + APIKey: "test-key", + }, + ), + ) + require.NoError(t, err) + + t.Run("should generate text", func(t *testing.T) { + model := genkit.LookupModel(g, "myModel", "text-generator") + require.NotNil(t, model) + + req := &ai.ModelRequest{ + Messages: []*ai.Message{ + { + Content: []*ai.Part{ai.NewTextPart("Hello, world!")}, + Role: ai.RoleUser, + }, + }, + Config: &MyModelConfig{ + Temperature: 0.5, + }, + } + + resp, err := model.Generate(ctx, req, nil) + require.NoError(t, err) + assert.NotEmpty(t, resp.Candidates) + assert.NotEmpty(t, resp.Candidates[0].Message.Content) + }) + } + ``` + + + ```python + import pytest + import asyncio + from genkit.ai import Genkit + from my_model_plugin import MyModelPlugin + + @pytest.fixture + async def ai(): + """Create a Genkit instance with the plugin for testing""" + return Genkit( + plugins=[ + MyModelPlugin(api_key="test-key"), + ], + ) + + @pytest.mark.asyncio + async def test_generate_text(ai): + """Test basic text generation""" + response = await ai.generate( + model="myModel/text-generator", + prompt="Hello, world!", + config={"temperature": 0.5}, + ) + + assert response["text"] + assert len(response["text"]) > 0 + + @pytest.mark.asyncio + async def test_tool_calling(ai): + """Test tool calling functionality""" + response = await ai.generate( + model="myModel/tool-capable", + prompt="What is the weather like?", + tools=[{ + "name": "get_weather", + "description": "Get current weather", + "input_schema": { + "type": "object", + "properties": { + "location": {"type": "string"}, + }, + }, + }], + ) + + assert "tool_calls" in response + ``` + + + +## Next Steps + +- Learn about [writing embedder plugins](/unified-docs/plugin-authoring/embedders) for text embedding models +- Explore [writing retriever plugins](/unified-docs/plugin-authoring/retrievers) for custom data sources +- See [telemetry plugins](/unified-docs/plugin-authoring/telemetry) for monitoring and observability +- Check out the [plugin authoring overview](/unified-docs/plugin-authoring/overview) for general plugin concepts diff --git a/src/content/docs/unified-docs/plugin-authoring/overview.mdx b/src/content/docs/unified-docs/plugin-authoring/overview.mdx new file mode 100644 index 00000000..5158fcb0 --- /dev/null +++ b/src/content/docs/unified-docs/plugin-authoring/overview.mdx @@ -0,0 +1,467 @@ +--- +title: Writing Genkit Plugins +description: Learn how to extend Genkit's capabilities by writing custom plugins across JavaScript, Go, and Python, covering plugin creation, models, retrievers, and publishing. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +Genkit's capabilities are designed to be extended by plugins. Genkit plugins are configurable modules that can provide models, retrievers, indexers, trace stores, and more. You've already seen plugins in action just by using Genkit - every AI provider, vector database, and framework integration is implemented as a plugin. + +## Plugin Architecture + +Plugins in Genkit follow a consistent architecture across all languages, providing a way to: + +- **Register new actions**: Models, embedders, retrievers, indexers, and tools +- **Configure services**: API keys, endpoints, and service-specific settings +- **Extend functionality**: Add new capabilities to the Genkit ecosystem +- **Maintain consistency**: Follow established patterns for reliability and discoverability + + + + In JavaScript, plugins are created using the `genkitPlugin` helper: + + ```ts + import { genkit } from 'genkit'; + import { vertexAI } from '@genkit-ai/vertexai'; + + const ai = genkit({ + plugins: [vertexAI({ projectId: 'my-project' })], + }); + ``` + + The Vertex AI plugin takes configuration and registers models, embedders, and more with the Genkit registry, which powers the local UI and serves as a lookup service for named actions at runtime. + + + In Go, plugins implement the `genkit.Plugin` interface: + + ```go + import ( + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/googlegenai" + ) + + g, err := genkit.Init(ctx, + genkit.WithPlugins( + &googlegenai.GoogleAI{APIKey: "..."}, + &googlegenai.VertexAI{ + ProjectID: "my-project", + Location: "us-central1", + }, + ), + ) + ``` + + Plugins register resources with unique identifiers to prevent naming conflicts with other plugins. + + + In Python, plugins are classes that extend the base plugin functionality: + + ```python + from genkit.ai import Genkit + from genkit.plugins.vertexai import VertexAI + + ai = Genkit( + plugins=[ + VertexAI(project_id="my-project", location="us-central1"), + ], + ) + ``` + + Python plugins follow similar patterns to JavaScript and Go, providing consistent APIs across languages. + + + +## Creating a Plugin + +### Project Setup + + + + Create a new NPM package for your plugin: + + ```bash + mkdir genkitx-my-plugin + cd genkitx-my-plugin + npm init -y + npm install genkit + npm install --save-dev typescript + npx tsc --init + ``` + + Define and export your plugin using the `genkitPlugin` helper: + + ```ts + import { Genkit, z } from 'genkit'; + import { GenkitPlugin, genkitPlugin } from 'genkit/plugin'; + + interface MyPluginOptions { + apiKey?: string; + // Add any plugin configuration here + } + + export function myPlugin(options?: MyPluginOptions): GenkitPlugin { + return genkitPlugin( + 'myPlugin', + // Initializer function (required) + async (ai: Genkit) => { + const apiKey = options?.apiKey || process.env.MY_PLUGIN_API_KEY; + if (!apiKey) { + throw new Error('API key required'); + } + + // Register actions that are always available + ai.defineModel({ + name: 'myPlugin/my-model', + // ... model definition + }); + + ai.defineEmbedder({ + name: 'myPlugin/my-embedder', + // ... embedder definition + }); + }, + // Dynamic Action Resolver (optional) + async (ai: Genkit, actionType, actionName) => { + // Define actions on-demand + if (actionType === 'model' && actionName === 'dynamic-model') { + ai.defineModel({ + name: `myPlugin/${actionName}`, + // ... dynamic model definition + }); + } + }, + // List Actions function (optional) + async () => { + // Return metadata for all potential actions + const availableModels = await fetchAvailableModels(); + return availableModels.map(model => ({ + type: 'model', + name: `myPlugin/${model.id}`, + // ... other metadata + })); + } + ); + } + ``` + + + Create a Go package that implements the `genkit.Plugin` interface: + + ```go + package myplugin + + import ( + "context" + "os" + "github.com/firebase/genkit/go/genkit" + ) + + const ProviderID = "myplugin" + + // Plugin configuration struct + type MyPlugin struct { + APIKey string + // Other configuration options + } + + // Name returns the provider ID + func (p *MyPlugin) Name() string { + return ProviderID + } + + // Init initializes the plugin + func (p *MyPlugin) Init(ctx context.Context, g *genkit.Genkit) error { + // Set default values and validate configuration + if p.APIKey == "" { + p.APIKey = os.Getenv("MY_PLUGIN_API_KEY") + } + if p.APIKey == "" { + return fmt.Errorf("API key required") + } + + // Register models, embedders, etc. + err := g.DefineModel(ProviderID, "my-model", &ModelConfig{ + // ... model configuration + }, func(ctx context.Context, req *ai.GenerateRequest) (*ai.GenerateResponse, error) { + // ... model implementation + }) + if err != nil { + return fmt.Errorf("failed to define model: %w", err) + } + + return nil + } + ``` + + Usage: + + ```go + g, err := genkit.Init(ctx, + genkit.WithPlugins( + &myplugin.MyPlugin{ + APIKey: "your-api-key", + }, + ), + ) + ``` + + + Create a Python package with a plugin class: + + ```python + import os + from typing import Optional, Dict, Any + from genkit.ai import Genkit + from genkit.plugins.base import Plugin + + class MyPlugin(Plugin): + def __init__(self, api_key: Optional[str] = None, **kwargs): + self.api_key = api_key or os.getenv("MY_PLUGIN_API_KEY") + if not self.api_key: + raise ValueError("API key required") + + super().__init__(provider_id="myplugin", **kwargs) + + def initialize(self, ai: Genkit) -> None: + """Initialize the plugin and register actions""" + + # Register a model + ai.define_model( + name=f"{self.provider_id}/my-model", + config_schema={ + "temperature": {"type": "number", "default": 0.7}, + "max_tokens": {"type": "integer", "default": 1000}, + }, + generate_fn=self._generate_text, + ) + + # Register an embedder + ai.define_embedder( + name=f"{self.provider_id}/my-embedder", + embed_fn=self._embed_text, + ) + + async def _generate_text(self, request: Dict[str, Any]) -> Dict[str, Any]: + """Generate text using the model""" + # Implementation here + pass + + async def _embed_text(self, text: str) -> List[float]: + """Generate embeddings for text""" + # Implementation here + pass + ``` + + Usage: + + ```python + from genkit.ai import Genkit + from my_plugin import MyPlugin + + ai = Genkit( + plugins=[ + MyPlugin(api_key="your-api-key"), + ], + ) + ``` + + + +## Plugin Configuration Best Practices + +### Secure Configuration + +For any plugin options that require secret values (API keys, tokens), provide both configuration options and environment variable defaults: + + + + ```ts + interface MyPluginOptions { + apiKey?: string; + endpoint?: string; + timeout?: number; + } + + export function myPlugin(options?: MyPluginOptions): GenkitPlugin { + return genkitPlugin('myPlugin', async (ai: Genkit) => { + // Prioritize explicit options, fall back to environment variables + const apiKey = options?.apiKey || process.env.MY_PLUGIN_API_KEY; + const endpoint = options?.endpoint || process.env.MY_PLUGIN_ENDPOINT || 'https://api.default.com'; + + if (!apiKey) { + throw new GenkitError({ + source: 'my-plugin', + status: 'INVALID_ARGUMENT', + message: 'Must supply either `options.apiKey` or set `MY_PLUGIN_API_KEY` environment variable.', + }); + } + + // Use configuration to set up the plugin + const client = new MyAPIClient({ apiKey, endpoint }); + + // Register actions using the configured client + ai.defineModel(/* ... */); + }); + } + ``` + + + ```go + type MyPlugin struct { + APIKey string + Endpoint string + Timeout time.Duration + } + + func (p *MyPlugin) Init(ctx context.Context, g *genkit.Genkit) error { + // Set defaults from environment variables + if p.APIKey == "" { + p.APIKey = os.Getenv("MY_PLUGIN_API_KEY") + } + if p.Endpoint == "" { + p.Endpoint = os.Getenv("MY_PLUGIN_ENDPOINT") + if p.Endpoint == "" { + p.Endpoint = "https://api.default.com" + } + } + if p.Timeout == 0 { + p.Timeout = 30 * time.Second + } + + if p.APIKey == "" { + return fmt.Errorf("API key required: set APIKey field or MY_PLUGIN_API_KEY environment variable") + } + + // Create client with configuration + client := NewMyAPIClient(p.APIKey, p.Endpoint, p.Timeout) + + // Register actions + return g.DefineModel(/* ... */) + } + ``` + + + ```python + class MyPlugin(Plugin): + def __init__( + self, + api_key: Optional[str] = None, + endpoint: Optional[str] = None, + timeout: int = 30, + **kwargs + ): + # Prioritize explicit options, fall back to environment variables + self.api_key = api_key or os.getenv("MY_PLUGIN_API_KEY") + self.endpoint = endpoint or os.getenv("MY_PLUGIN_ENDPOINT", "https://api.default.com") + self.timeout = timeout + + if not self.api_key: + raise ValueError( + "API key required: provide api_key parameter or set MY_PLUGIN_API_KEY environment variable" + ) + + super().__init__(provider_id="myplugin", **kwargs) + + # Create client with configuration + self.client = MyAPIClient( + api_key=self.api_key, + endpoint=self.endpoint, + timeout=self.timeout + ) + ``` + + + +## Plugin Types + +Genkit supports several types of plugins, each serving different purposes: + +### Model Plugins +Provide generative AI models that can receive prompts and generate text, media, or data. + +### Embedder Plugins +Provide text embedding models that convert text into vector representations. + +### Retriever Plugins +Provide document retrieval capabilities for RAG (Retrieval-Augmented Generation) systems. + +### Indexer Plugins +Provide document indexing capabilities for storing and organizing documents. + +### Tool Plugins +Provide function calling capabilities that models can use to interact with external systems. + +### Telemetry Plugins +Configure observability and monitoring for Genkit applications. + +## Publishing Plugins + +### Package Naming and Keywords + + + + Use the `genkitx-{name}` naming convention and include relevant keywords in your `package.json`: + + ```json + { + "name": "genkitx-my-plugin", + "keywords": [ + "genkit-plugin", + "genkit-model", + "genkit-embedder", + "genkit-retriever" + ], + "description": "My custom Genkit plugin", + "main": "dist/index.js", + "types": "dist/index.d.ts" + } + ``` + + Available keywords: + - `genkit-plugin`: Always include this + - `genkit-model`: If your plugin defines models + - `genkit-embedder`: If your plugin defines embedders + - `genkit-retriever`: If your plugin defines retrievers + - `genkit-indexer`: If your plugin defines indexers + - `genkit-telemetry`: If your plugin provides telemetry + - `genkit-deploy`: If your plugin includes deployment helpers + - `genkit-flow`: If your plugin enhances flows + + + Use descriptive package names that include "genkit" for discoverability: + + ``` + github.com/yourorg/genkit-plugins/servicename + github.com/yourorg/your-repo/genkit/servicename + ``` + + Include a comprehensive README.md with: + - Installation instructions + - Configuration options + - Usage examples + - API documentation + + + Use the `genkit-{name}` naming convention and include relevant classifiers in your `setup.py` or `pyproject.toml`: + + ```toml + [project] + name = "genkit-my-plugin" + description = "My custom Genkit plugin" + keywords = ["genkit", "plugin", "ai", "model"] + classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Topic :: Software Development :: Libraries :: Python Modules", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + ] + ``` + + + +## Next Steps + +- Learn about [writing model plugins](/unified-docs/plugin-authoring/models) to add new AI models +- Explore [writing retriever plugins](/unified-docs/plugin-authoring/retrievers) for custom data sources +- See [writing embedder plugins](/unified-docs/plugin-authoring/embedders) for custom embedding models +- Check out [telemetry plugins](/unified-docs/plugin-authoring/telemetry) for monitoring and observability diff --git a/src/content/docs/unified-docs/plugins/anthropic.mdx b/src/content/docs/unified-docs/plugins/anthropic.mdx new file mode 100644 index 00000000..10f45dea --- /dev/null +++ b/src/content/docs/unified-docs/plugins/anthropic.mdx @@ -0,0 +1,817 @@ +--- +title: Anthropic (Claude) Plugin +description: Learn how to use Anthropic's Claude models with Genkit across JavaScript, Go, and Python for advanced reasoning, analysis, and conversational AI. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +Anthropic's Claude models are known for their advanced reasoning capabilities, safety features, and nuanced understanding of complex topics. Claude excels at analysis, writing, math, coding, and thoughtful conversation while maintaining helpful, harmless, and honest interactions. + +## Installation and Setup + + + + Claude models are available in JavaScript through Vertex AI Model Garden. You'll need access to Claude models in your Google Cloud project. + + Install the Vertex AI plugin: + + ```bash + npm install @genkit-ai/vertexai + ``` + + Configure the plugin with Claude models: + + ```ts + import { genkit } from 'genkit'; + import { vertexAI } from '@genkit-ai/vertexai'; + + const ai = genkit({ + plugins: [ + vertexAI({ + projectId: 'your-project-id', + location: 'us-central1', + models: ['claude-3-haiku', 'claude-3-sonnet', 'claude-3-opus'], + }), + ], + }); + ``` + + ### Prerequisites + + 1. **Google Cloud Project**: Set up a Google Cloud project with Vertex AI enabled + 2. **Claude Model Access**: Request access to Claude models in [Vertex AI Model Garden](https://console.cloud.google.com/vertex-ai/publishers/anthropic/model-garden) + 3. **Authentication**: Configure Google Cloud authentication + + ```bash + # Set up authentication + gcloud auth application-default login + export GOOGLE_CLOUD_PROJECT=your-project-id + ``` + + ### Available Models via Vertex AI + + - **claude-3-haiku**: Fast and efficient for simple tasks + - **claude-3-sonnet**: Balanced performance and capability + - **claude-3-opus**: Most capable for complex reasoning + + + Claude models are available in Go through the OpenAI-compatible Anthropic plugin. + + Install the required packages: + + ```bash + go get github.com/firebase/genkit/go/plugins/compat_oai/anthropic + go get github.com/openai/openai-go/option + ``` + + Configure the Anthropic plugin: + + ```go + package main + + import ( + "context" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/compat_oai/anthropic" + "github.com/openai/openai-go/option" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins(&anthropic.Anthropic{ + Opts: []option.RequestOption{ + option.WithAPIKey(os.Getenv("ANTHROPIC_API_KEY")), + }, + }), + ) + if err != nil { + log.Fatal(err) + } + } + ``` + + ### API Key Configuration + + ```bash + export ANTHROPIC_API_KEY=your_anthropic_api_key + ``` + + Get your API key from [Anthropic Console](https://console.anthropic.com/). + + ### Available Models + + - **claude-3-7-sonnet-20250219**: Latest Claude 3.7 Sonnet with advanced capabilities + - **claude-3-5-haiku-20241022**: Fast and efficient Claude 3.5 Haiku + - **claude-3-5-sonnet-20240620**: Balanced Claude 3.5 Sonnet + - **claude-3-opus-20240229**: Most capable Claude 3 model + - **claude-3-haiku-20240307**: Fastest Claude 3 model + + + Claude models are currently not directly supported in Python Genkit. However, you can access Claude through: + + 1. **Vertex AI Model Garden** (if available in your region) + 2. **Custom OpenAI-compatible wrapper** using the Anthropic API + + For Vertex AI access (if available): + + ```python + from genkit.ai import Genkit + from genkit.plugins.vertex_ai import VertexAI + + ai = Genkit( + plugins=[ + VertexAI( + project_id="your-project-id", + location="us-central1", + models=["claude-3-haiku", "claude-3-sonnet", "claude-3-opus"], + ), + ], + ) + ``` + + For direct Anthropic API access, you would need to create a custom plugin or use the Anthropic Python SDK directly alongside Genkit. + + ### Environment Configuration + + ```bash + export GOOGLE_CLOUD_PROJECT=your-project-id + # or + export ANTHROPIC_API_KEY=your_anthropic_api_key + ``` + + + +## Basic Usage + +### Text Generation + + + + Use Claude models for text generation through Vertex AI: + + ```ts + import { genkit, z } from 'genkit'; + import { vertexAI } from '@genkit-ai/vertexai'; + + const ai = genkit({ + plugins: [ + vertexAI({ + projectId: 'your-project-id', + location: 'us-central1', + models: ['claude-3-sonnet'], + }), + ], + }); + + // Basic text generation + const response = await ai.generate({ + model: 'claude-3-sonnet', + prompt: 'Explain the concept of quantum entanglement in simple terms.', + }); + + console.log(response.text); + + // Flow with Claude + export const claudeAnalysisFlow = ai.defineFlow( + { + name: 'claudeAnalysisFlow', + inputSchema: z.object({ + text: z.string(), + analysisType: z.enum(['sentiment', 'summary', 'critique']), + }), + outputSchema: z.object({ analysis: z.string() }), + }, + async ({ text, analysisType }) => { + const prompts = { + sentiment: `Analyze the sentiment of this text: "${text}"`, + summary: `Provide a concise summary of this text: "${text}"`, + critique: `Provide a thoughtful critique of this text: "${text}"`, + }; + + const response = await ai.generate({ + model: 'claude-3-sonnet', + prompt: prompts[analysisType], + config: { + temperature: 0.3, + maxTokens: 500, + }, + }); + + return { analysis: response.text }; + }, + ); + ``` + + + Use Claude models with the Anthropic plugin: + + ```go + import ( + "context" + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/compat_oai/anthropic" + ) + + func main() { + ctx := context.Background() + + // Initialize Anthropic plugin + claude := &anthropic.Anthropic{ + Opts: []option.RequestOption{ + option.WithAPIKey(os.Getenv("ANTHROPIC_API_KEY")), + }, + } + g, err := genkit.Init(ctx, genkit.WithPlugins(claude)) + if err != nil { + log.Fatal(err) + } + + // Basic text generation + model := claude.Model(g, "claude-3-7-sonnet-20250219") + resp, err := genkit.Generate(ctx, g, + ai.WithModel(model), + ai.WithPrompt("Explain the concept of quantum entanglement in simple terms."), + ) + if err != nil { + log.Fatal(err) + } + + fmt.Println(resp.Text()) + + // Advanced reasoning task + reasoningResp, err := genkit.Generate(ctx, g, + ai.WithModel(model), + ai.WithPrompt("Analyze the ethical implications of AI in healthcare decision-making."), + ai.WithConfig(map[string]interface{}{ + "temperature": 0.3, + "max_tokens": 1000, + }), + ) + if err != nil { + log.Fatal(err) + } + + fmt.Println(reasoningResp.Text()) + } + ``` + + + Use Claude models through available integrations: + + ```python + # If using Vertex AI Model Garden + from genkit.ai import Genkit + from genkit.plugins.vertex_ai import VertexAI + + ai = Genkit( + plugins=[ + VertexAI( + project_id="your-project-id", + location="us-central1", + models=["claude-3-sonnet"], + ), + ], + ) + + # Basic text generation + response = await ai.generate( + model="claude-3-sonnet", + prompt="Explain the concept of quantum entanglement in simple terms." + ) + print(response.text) + + # Analysis task + async def analyze_text(text: str, analysis_type: str) -> str: + prompts = { + "sentiment": f"Analyze the sentiment of this text: \"{text}\"", + "summary": f"Provide a concise summary of this text: \"{text}\"", + "critique": f"Provide a thoughtful critique of this text: \"{text}\"", + } + + response = await ai.generate( + model="claude-3-sonnet", + prompt=prompts[analysis_type], + config={ + "temperature": 0.3, + "max_tokens": 500, + } + ) + return response.text + ``` + + + +## Advanced Features + +### Complex Reasoning + + + + Leverage Claude's reasoning capabilities: + + ```ts + // Complex analysis flow + export const complexAnalysisFlow = ai.defineFlow( + { + name: 'complexAnalysisFlow', + inputSchema: z.object({ + problem: z.string(), + context: z.string().optional(), + }), + outputSchema: z.object({ + analysis: z.string(), + reasoning: z.string(), + recommendations: z.array(z.string()), + }), + }, + async ({ problem, context }) => { + const prompt = context + ? `Given this context: ${context}\n\nAnalyze this problem step by step: ${problem}` + : `Analyze this problem step by step: ${problem}`; + + const response = await ai.generate({ + model: 'claude-3-opus', // Use most capable model for complex reasoning + prompt: `${prompt} + +Please provide: +1. A thorough analysis +2. Your reasoning process +3. Specific recommendations + +Format your response clearly with sections.`, + config: { + temperature: 0.2, // Lower temperature for analytical tasks + maxTokens: 2000, + }, + }); + + // Parse the structured response + const sections = response.text.split('\n\n'); + const analysis = sections[0] || ''; + const reasoning = sections[1] || ''; + const recommendations = sections.slice(2) + .filter(section => section.includes('-')) + .flatMap(section => + section.split('\n') + .filter(line => line.trim().startsWith('-')) + .map(line => line.replace(/^-\s*/, '').trim()) + ); + + return { analysis, reasoning, recommendations }; + }, + ); + + // Ethical reasoning flow + export const ethicalAnalysisFlow = ai.defineFlow( + { + name: 'ethicalAnalysisFlow', + inputSchema: z.object({ scenario: z.string() }), + outputSchema: z.object({ + ethicalConsiderations: z.array(z.string()), + stakeholders: z.array(z.string()), + recommendations: z.string(), + }), + }, + async ({ scenario }) => { + const response = await ai.generate({ + model: 'claude-3-sonnet', + prompt: `Analyze the ethical implications of this scenario: ${scenario} + +Please identify: +1. Key ethical considerations +2. Affected stakeholders +3. Recommended approach + +Be thorough and consider multiple perspectives.`, + config: { + temperature: 0.3, + maxTokens: 1500, + }, + }); + + // Extract structured information from response + const lines = response.text.split('\n').filter(line => line.trim()); + const ethicalConsiderations = lines + .filter(line => line.includes('ethical') || line.includes('moral')) + .slice(0, 5); + const stakeholders = lines + .filter(line => line.includes('stakeholder') || line.includes('affected')) + .slice(0, 5); + const recommendations = lines + .filter(line => line.includes('recommend') || line.includes('suggest')) + .join(' '); + + return { ethicalConsiderations, stakeholders, recommendations }; + }, + ); + ``` + + + Leverage Claude's reasoning capabilities: + + ```go + // Complex analysis function + func performComplexAnalysis(ctx context.Context, problem, context string) (map[string]interface{}, error) { + prompt := problem + if context != "" { + prompt = fmt.Sprintf("Given this context: %s\n\nAnalyze this problem step by step: %s", context, problem) + } + + fullPrompt := fmt.Sprintf(`%s + +Please provide: +1. A thorough analysis +2. Your reasoning process +3. Specific recommendations + +Format your response clearly with sections.`, prompt) + + model := claude.Model(g, "claude-3-opus-20240229") // Most capable model + resp, err := genkit.Generate(ctx, g, + ai.WithModel(model), + ai.WithPrompt(fullPrompt), + ai.WithConfig(map[string]interface{}{ + "temperature": 0.2, + "max_tokens": 2000, + }), + ) + if err != nil { + return nil, fmt.Errorf("analysis failed: %w", err) + } + + // Parse structured response + sections := strings.Split(resp.Text(), "\n\n") + analysis := "" + reasoning := "" + var recommendations []string + + if len(sections) > 0 { + analysis = sections[0] + } + if len(sections) > 1 { + reasoning = sections[1] + } + if len(sections) > 2 { + for _, section := range sections[2:] { + lines := strings.Split(section, "\n") + for _, line := range lines { + if strings.HasPrefix(strings.TrimSpace(line), "-") { + recommendations = append(recommendations, strings.TrimSpace(strings.TrimPrefix(line, "-"))) + } + } + } + } + + return map[string]interface{}{ + "analysis": analysis, + "reasoning": reasoning, + "recommendations": recommendations, + }, nil + } + + // Ethical analysis function + func performEthicalAnalysis(ctx context.Context, scenario string) (map[string]interface{}, error) { + prompt := fmt.Sprintf(`Analyze the ethical implications of this scenario: %s + +Please identify: +1. Key ethical considerations +2. Affected stakeholders +3. Recommended approach + +Be thorough and consider multiple perspectives.`, scenario) + + model := claude.Model(g, "claude-3-7-sonnet-20250219") + resp, err := genkit.Generate(ctx, g, + ai.WithModel(model), + ai.WithPrompt(prompt), + ai.WithConfig(map[string]interface{}{ + "temperature": 0.3, + "max_tokens": 1500, + }), + ) + if err != nil { + return nil, fmt.Errorf("ethical analysis failed: %w", err) + } + + // Extract structured information + lines := strings.Split(resp.Text(), "\n") + var ethicalConsiderations, stakeholders []string + var recommendations string + + for _, line := range lines { + line = strings.TrimSpace(line) + if strings.Contains(strings.ToLower(line), "ethical") || strings.Contains(strings.ToLower(line), "moral") { + ethicalConsiderations = append(ethicalConsiderations, line) + } + if strings.Contains(strings.ToLower(line), "stakeholder") || strings.Contains(strings.ToLower(line), "affected") { + stakeholders = append(stakeholders, line) + } + if strings.Contains(strings.ToLower(line), "recommend") || strings.Contains(strings.ToLower(line), "suggest") { + recommendations += line + " " + } + } + + return map[string]interface{}{ + "ethicalConsiderations": ethicalConsiderations[:min(len(ethicalConsiderations), 5)], + "stakeholders": stakeholders[:min(len(stakeholders), 5)], + "recommendations": strings.TrimSpace(recommendations), + }, nil + } + ``` + + + Leverage Claude's reasoning capabilities: + + ```python + from typing import List, Dict, Any, Optional + + # Complex analysis function + async def perform_complex_analysis( + problem: str, + context: Optional[str] = None + ) -> Dict[str, Any]: + prompt = problem + if context: + prompt = f"Given this context: {context}\n\nAnalyze this problem step by step: {problem}" + + full_prompt = f"""{prompt} + +Please provide: +1. A thorough analysis +2. Your reasoning process +3. Specific recommendations + +Format your response clearly with sections.""" + + try: + response = await ai.generate( + model="claude-3-opus", # Most capable model + prompt=full_prompt, + config={ + "temperature": 0.2, + "max_tokens": 2000, + } + ) + + # Parse structured response + sections = response.text.split('\n\n') + analysis = sections[0] if sections else "" + reasoning = sections[1] if len(sections) > 1 else "" + + recommendations = [] + for section in sections[2:]: + lines = section.split('\n') + for line in lines: + if line.strip().startswith('-'): + recommendations.append(line.replace('-', '').strip()) + + return { + "analysis": analysis, + "reasoning": reasoning, + "recommendations": recommendations, + } + except Exception as error: + print(f"Analysis failed: {error}") + return {"analysis": "", "reasoning": "", "recommendations": []} + + # Ethical analysis function + async def perform_ethical_analysis(scenario: str) -> Dict[str, Any]: + prompt = f"""Analyze the ethical implications of this scenario: {scenario} + +Please identify: +1. Key ethical considerations +2. Affected stakeholders +3. Recommended approach + +Be thorough and consider multiple perspectives.""" + + try: + response = await ai.generate( + model="claude-3-sonnet", + prompt=prompt, + config={ + "temperature": 0.3, + "max_tokens": 1500, + } + ) + + # Extract structured information + lines = [line.strip() for line in response.text.split('\n') if line.strip()] + + ethical_considerations = [ + line for line in lines + if 'ethical' in line.lower() or 'moral' in line.lower() + ][:5] + + stakeholders = [ + line for line in lines + if 'stakeholder' in line.lower() or 'affected' in line.lower() + ][:5] + + recommendations = ' '.join([ + line for line in lines + if 'recommend' in line.lower() or 'suggest' in line.lower() + ]) + + return { + "ethical_considerations": ethical_considerations, + "stakeholders": stakeholders, + "recommendations": recommendations, + } + except Exception as error: + print(f"Ethical analysis failed: {error}") + return {"ethical_considerations": [], "stakeholders": [], "recommendations": ""} + ``` + + + +### Conversational AI + + + + Build sophisticated conversational applications: + + ```ts + // Advanced conversational flow + export const claudeConversationFlow = ai.defineFlow( + { + name: 'claudeConversationFlow', + inputSchema: z.object({ + message: z.string(), + history: z.array(z.object({ + role: z.enum(['user', 'assistant']), + content: z.string(), + })).optional(), + personality: z.enum(['analytical', 'creative', 'supportive', 'professional']).optional(), + }), + outputSchema: z.object({ response: z.string() }), + }, + async ({ message, history = [], personality = 'analytical' }) => { + const personalityPrompts = { + analytical: 'You are a thoughtful analyst who provides detailed, logical responses.', + creative: 'You are a creative thinker who offers imaginative and innovative perspectives.', + supportive: 'You are a supportive companion who provides encouragement and understanding.', + professional: 'You are a professional consultant who gives clear, actionable advice.', + }; + + const messages = [ + { role: 'system', content: personalityPrompts[personality] }, + ...history, + { role: 'user', content: message }, + ]; + + const response = await ai.generate({ + model: 'claude-3-sonnet', + messages, + config: { + temperature: personality === 'creative' ? 0.8 : 0.6, + maxTokens: 1000, + }, + }); + + return { response: response.text }; + }, + ); + ``` + + + Build sophisticated conversational applications: + + ```go + type ChatMessage struct { + Role string `json:"role"` + Content string `json:"content"` + } + + func handleClaudeConversation(ctx context.Context, message string, history []ChatMessage, personality string) (string, error) { + personalityPrompts := map[string]string{ + "analytical": "You are a thoughtful analyst who provides detailed, logical responses.", + "creative": "You are a creative thinker who offers imaginative and innovative perspectives.", + "supportive": "You are a supportive companion who provides encouragement and understanding.", + "professional": "You are a professional consultant who gives clear, actionable advice.", + } + + systemPrompt, exists := personalityPrompts[personality] + if !exists { + systemPrompt = personalityPrompts["analytical"] + } + + messages := []ChatMessage{ + {Role: "system", Content: systemPrompt}, + } + messages = append(messages, history...) + messages = append(messages, ChatMessage{Role: "user", Content: message}) + + temperature := 0.6 + if personality == "creative" { + temperature = 0.8 + } + + model := claude.Model(g, "claude-3-7-sonnet-20250219") + resp, err := genkit.Generate(ctx, g, + ai.WithModel(model), + ai.WithMessages(messages), + ai.WithConfig(map[string]interface{}{ + "temperature": temperature, + "max_tokens": 1000, + }), + ) + if err != nil { + return "", fmt.Errorf("conversation failed: %w", err) + } + + return resp.Text(), nil + } + ``` + + + Build sophisticated conversational applications: + + ```python + from typing import List, Dict, Optional + + async def handle_claude_conversation( + message: str, + history: List[Dict[str, str]] = None, + personality: str = "analytical" + ) -> str: + if history is None: + history = [] + + personality_prompts = { + "analytical": "You are a thoughtful analyst who provides detailed, logical responses.", + "creative": "You are a creative thinker who offers imaginative and innovative perspectives.", + "supportive": "You are a supportive companion who provides encouragement and understanding.", + "professional": "You are a professional consultant who gives clear, actionable advice.", + } + + system_prompt = personality_prompts.get(personality, personality_prompts["analytical"]) + + messages = [ + {"role": "system", "content": system_prompt}, + *history, + {"role": "user", "content": message}, + ] + + temperature = 0.8 if personality == "creative" else 0.6 + + try: + response = await ai.generate( + model="claude-3-sonnet", + messages=messages, + config={ + "temperature": temperature, + "max_tokens": 1000, + } + ) + return response.text + except Exception as error: + print(f"Conversation failed: {error}") + return "I'm sorry, I couldn't process your message at the moment." + ``` + + + +## Model Comparison + +### Available Models + +| Model | Capabilities | Best For | Context Window | +|-------|-------------|----------|----------------| +| **Claude 3 Haiku** | Fast, efficient | Simple tasks, quick responses | 200K tokens | +| **Claude 3 Sonnet** | Balanced performance | General-purpose tasks, analysis | 200K tokens | +| **Claude 3 Opus** | Most capable | Complex reasoning, research | 200K tokens | +| **Claude 3.5 Sonnet** | Enhanced reasoning | Advanced analysis, coding | 200K tokens | +| **Claude 3.7 Sonnet** | Latest capabilities | Cutting-edge reasoning tasks | 200K tokens | + +## Best Practices + +### Optimizing for Different Tasks + +1. **Analysis and reasoning**: Use Claude 3 Opus or 3.7 Sonnet with low temperature (0.2-0.3) +2. **Creative writing**: Use Claude 3.5 Sonnet with higher temperature (0.7-0.8) +3. **Quick responses**: Use Claude 3 Haiku for speed +4. **Ethical considerations**: Claude models excel at nuanced ethical reasoning + +### Prompt Engineering + +1. **Be specific**: Claude responds well to detailed, structured prompts +2. **Use examples**: Provide examples of desired output format +3. **Request reasoning**: Ask Claude to explain its thinking process +4. **Set context**: Provide relevant background information + +### Safety and Alignment + +1. **Built-in safety**: Claude has strong safety guardrails +2. **Helpful responses**: Models are trained to be helpful, harmless, and honest +3. **Nuanced understanding**: Excellent at understanding context and intent +4. **Ethical reasoning**: Strong capability for ethical analysis and decision-making + +## Next Steps + +- Learn about [generating content](/unified-docs/generating-content) to understand how to use these models effectively +- Explore [tool calling](/unified-docs/tool-calling) to add interactive capabilities (Note: Tool calling may have limitations with Claude models) +- See [creating flows](/unified-docs/creating-flows) to build structured AI workflows with advanced reasoning +- Check out [deployment guides](/unified-docs/deployment) for production deployment strategies diff --git a/src/content/docs/unified-docs/plugins/deepseek.mdx b/src/content/docs/unified-docs/plugins/deepseek.mdx new file mode 100644 index 00000000..92f28aec --- /dev/null +++ b/src/content/docs/unified-docs/plugins/deepseek.mdx @@ -0,0 +1,1050 @@ +--- +title: DeepSeek Plugin +description: Learn how to use DeepSeek's advanced AI models with Genkit across JavaScript, Go, and Python, including reasoning models, code generation, and cost-effective solutions. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +The DeepSeek plugin provides access to DeepSeek's powerful AI models, including their advanced reasoning models and cost-effective solutions. DeepSeek models are known for their strong performance in coding, mathematics, and reasoning tasks. + +## Installation and Setup + + + + Install the DeepSeek plugin: + + ```bash + npm install @genkit-ai/compat-oai + ``` + + Configure the plugin when initializing Genkit: + + ```ts + import { genkit } from 'genkit'; + import { deepSeek } from '@genkit-ai/compat-oai/deepseek'; + + const ai = genkit({ + plugins: [deepSeek()], + }); + ``` + + ### API Key Configuration + + Set your DeepSeek API key using one of these methods: + + ```bash + # Environment variable (recommended) + export DEEPSEEK_API_KEY=your_deepseek_api_key + ``` + + ```ts + // Or pass directly to plugin (not recommended for production) + const ai = genkit({ + plugins: [deepSeek({ apiKey: 'your_deepseek_api_key' })], + }); + ``` + + Get your API key from [DeepSeek Platform](https://platform.deepseek.com/). + + + For Go applications, use the OpenAI-compatible client with DeepSeek endpoints: + + ```go + package main + + import ( + "context" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/openai" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins(&openai.OpenAI{ + APIKey: os.Getenv("DEEPSEEK_API_KEY"), + BaseURL: "https://api.deepseek.com/v1", + Models: []openai.ModelConfig{ + {Name: "deepseek-chat", Type: "chat"}, + {Name: "deepseek-coder", Type: "chat"}, + {Name: "deepseek-reasoner", Type: "chat"}, + }, + }), + ) + if err != nil { + log.Fatal(err) + } + } + ``` + + ### Environment Configuration + + ```bash + export DEEPSEEK_API_KEY=your_deepseek_api_key + ``` + + + For Python applications, use the OpenAI-compatible client: + + ```bash + pip install genkit-plugin-openai + ``` + + ```python + from genkit.ai import Genkit + from genkit.plugins.openai import OpenAI + + ai = Genkit( + plugins=[OpenAI( + api_key=os.getenv("DEEPSEEK_API_KEY"), + base_url="https://api.deepseek.com/v1", + models=[ + {"name": "deepseek-chat", "type": "chat"}, + {"name": "deepseek-coder", "type": "chat"}, + {"name": "deepseek-reasoner", "type": "chat"}, + ], + )], + ) + ``` + + ### Environment Configuration + + ```bash + export DEEPSEEK_API_KEY=your_deepseek_api_key + ``` + + + +## Basic Usage + +### Text Generation + + + + Use DeepSeek models for text generation: + + ```ts + import { genkit, z } from 'genkit'; + import { deepSeek } from '@genkit-ai/compat-oai/deepseek'; + + const ai = genkit({ + plugins: [deepSeek()], + }); + + // Basic text generation + const response = await ai.generate({ + model: deepSeek.model('deepseek-chat'), + prompt: 'Explain the concept of machine learning', + }); + + console.log(response.text); + + // Flow with DeepSeek + export const deepseekFlow = ai.defineFlow( + { + name: 'deepseekFlow', + inputSchema: z.object({ subject: z.string() }), + outputSchema: z.object({ information: z.string() }), + }, + async ({ subject }) => { + const llmResponse = await ai.generate({ + model: deepSeek.model('deepseek-chat'), + prompt: `Tell me something about ${subject}.`, + }); + return { information: llmResponse.text }; + }, + ); + + // Advanced reasoning tasks + const reasoningResponse = await ai.generate({ + model: deepSeek.model('deepseek-reasoner'), + prompt: 'Solve this step by step: If a train travels 120 km in 2 hours, and then 180 km in 3 hours, what is the average speed for the entire journey?', + config: { + temperature: 0.1, // Lower temperature for reasoning tasks + maxTokens: 1000, + }, + }); + ``` + + + Use DeepSeek models with the generation API: + + ```go + import ( + "context" + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + ) + + func main() { + ctx := context.Background() + + // Basic text generation + resp, err := genkit.Generate(ctx, g, + ai.WithModel("deepseek-chat"), + ai.WithPrompt("Explain the concept of machine learning"), + ) + if err != nil { + log.Fatal(err) + } + + fmt.Println(resp.Text()) + + // Advanced reasoning tasks + reasoningResp, err := genkit.Generate(ctx, g, + ai.WithModel("deepseek-reasoner"), + ai.WithPrompt("Solve this step by step: If a train travels 120 km in 2 hours, and then 180 km in 3 hours, what is the average speed for the entire journey?"), + ai.WithConfig(map[string]interface{}{ + "temperature": 0.1, + "max_tokens": 1000, + }), + ) + if err != nil { + log.Fatal(err) + } + + fmt.Println(reasoningResp.Text()) + } + ``` + + + Use DeepSeek models with the generation API: + + ```python + from genkit.ai import Genkit + from genkit.plugins.openai import OpenAI, openai_name + + ai = Genkit( + plugins=[OpenAI( + api_key=os.getenv("DEEPSEEK_API_KEY"), + base_url="https://api.deepseek.com/v1", + models=[ + {"name": "deepseek-chat", "type": "chat"}, + {"name": "deepseek-reasoner", "type": "chat"}, + ], + )], + ) + + # Basic text generation + response = await ai.generate( + model=openai_name('deepseek-chat'), + prompt='Explain the concept of machine learning' + ) + print(response.text) + + # Advanced reasoning tasks + reasoning_response = await ai.generate( + model=openai_name('deepseek-reasoner'), + prompt='Solve this step by step: If a train travels 120 km in 2 hours, and then 180 km in 3 hours, what is the average speed for the entire journey?', + config={ + 'temperature': 0.1, + 'max_tokens': 1000, + } + ) + print(reasoning_response.text) + ``` + + + +### Code Generation + + + + Use DeepSeek for code generation and programming tasks: + + ```ts + // Code generation flow + export const codeGenerationFlow = ai.defineFlow( + { + name: 'codeGenerationFlow', + inputSchema: z.object({ + task: z.string(), + language: z.string(), + }), + outputSchema: z.object({ code: z.string() }), + }, + async ({ task, language }) => { + const response = await ai.generate({ + model: deepSeek.model('deepseek-coder'), + prompt: `Write ${language} code to ${task}. Include comments and error handling.`, + config: { + temperature: 0.2, // Lower temperature for code generation + maxTokens: 2000, + }, + }); + return { code: response.text }; + }, + ); + + // Code review and optimization + export const codeReviewFlow = ai.defineFlow( + { + name: 'codeReviewFlow', + inputSchema: z.object({ code: z.string() }), + outputSchema: z.object({ + review: z.string(), + suggestions: z.array(z.string()), + }), + }, + async ({ code }) => { + const response = await ai.generate({ + model: deepSeek.model('deepseek-coder'), + prompt: `Review this code and provide suggestions for improvement:\n\n${code}`, + config: { + temperature: 0.3, + maxTokens: 1500, + }, + }); + + // Parse the response to extract review and suggestions + const lines = response.text.split('\n'); + const review = lines.slice(0, 5).join('\n'); + const suggestions = lines.slice(5).filter(line => line.trim().startsWith('-')); + + return { review, suggestions }; + }, + ); + ``` + + + Use DeepSeek for code generation and programming tasks: + + ```go + // Code generation + func generateCode(ctx context.Context, task, language string) (string, error) { + prompt := fmt.Sprintf("Write %s code to %s. Include comments and error handling.", language, task) + + resp, err := genkit.Generate(ctx, g, + ai.WithModel("deepseek-coder"), + ai.WithPrompt(prompt), + ai.WithConfig(map[string]interface{}{ + "temperature": 0.2, + "max_tokens": 2000, + }), + ) + if err != nil { + return "", err + } + + return resp.Text(), nil + } + + // Code review and optimization + func reviewCode(ctx context.Context, code string) (string, error) { + prompt := fmt.Sprintf("Review this code and provide suggestions for improvement:\n\n%s", code) + + resp, err := genkit.Generate(ctx, g, + ai.WithModel("deepseek-coder"), + ai.WithPrompt(prompt), + ai.WithConfig(map[string]interface{}{ + "temperature": 0.3, + "max_tokens": 1500, + }), + ) + if err != nil { + return "", err + } + + return resp.Text(), nil + } + ``` + + + Use DeepSeek for code generation and programming tasks: + + ```python + # Code generation + async def generate_code(task: str, language: str) -> str: + prompt = f"Write {language} code to {task}. Include comments and error handling." + + response = await ai.generate( + model=openai_name('deepseek-coder'), + prompt=prompt, + config={ + 'temperature': 0.2, + 'max_tokens': 2000, + } + ) + return response.text + + # Code review and optimization + async def review_code(code: str) -> dict: + prompt = f"Review this code and provide suggestions for improvement:\n\n{code}" + + response = await ai.generate( + model=openai_name('deepseek-coder'), + prompt=prompt, + config={ + 'temperature': 0.3, + 'max_tokens': 1500, + } + ) + + # Parse the response to extract review and suggestions + lines = response.text.split('\n') + review = '\n'.join(lines[:5]) + suggestions = [line for line in lines[5:] if line.strip().startswith('-')] + + return {'review': review, 'suggestions': suggestions} + ``` + + + +## Advanced Features + +### Mathematical Reasoning + + + + Leverage DeepSeek's mathematical reasoning capabilities: + + ```ts + // Mathematical problem solving + export const mathSolverFlow = ai.defineFlow( + { + name: 'mathSolverFlow', + inputSchema: z.object({ problem: z.string() }), + outputSchema: z.object({ + solution: z.string(), + steps: z.array(z.string()), + }), + }, + async ({ problem }) => { + const response = await ai.generate({ + model: deepSeek.model('deepseek-reasoner'), + prompt: `Solve this mathematical problem step by step: ${problem}`, + config: { + temperature: 0.1, // Very low temperature for mathematical accuracy + maxTokens: 1500, + }, + }); + + // Parse the response to extract solution and steps + const lines = response.text.split('\n').filter(line => line.trim()); + const solution = lines[lines.length - 1]; + const steps = lines.slice(0, -1); + + return { solution, steps }; + }, + ); + + // Statistical analysis + export const statisticsFlow = ai.defineFlow( + { + name: 'statisticsFlow', + inputSchema: z.object({ + data: z.array(z.number()), + analysisType: z.enum(['descriptive', 'inferential', 'regression']), + }), + outputSchema: z.object({ analysis: z.string() }), + }, + async ({ data, analysisType }) => { + const dataStr = data.join(', '); + const prompt = `Perform ${analysisType} statistical analysis on this data: [${dataStr}]. Provide detailed calculations and interpretations.`; + + const response = await ai.generate({ + model: deepSeek.model('deepseek-reasoner'), + prompt, + config: { + temperature: 0.2, + maxTokens: 2000, + }, + }); + + return { analysis: response.text }; + }, + ); + ``` + + + Leverage DeepSeek's mathematical reasoning capabilities: + + ```go + // Mathematical problem solving + func solveMathProblem(ctx context.Context, problem string) (string, []string, error) { + prompt := fmt.Sprintf("Solve this mathematical problem step by step: %s", problem) + + resp, err := genkit.Generate(ctx, g, + ai.WithModel("deepseek-reasoner"), + ai.WithPrompt(prompt), + ai.WithConfig(map[string]interface{}{ + "temperature": 0.1, + "max_tokens": 1500, + }), + ) + if err != nil { + return "", nil, err + } + + lines := strings.Split(resp.Text(), "\n") + var steps []string + var solution string + + for _, line := range lines { + if strings.TrimSpace(line) != "" { + steps = append(steps, line) + } + } + + if len(steps) > 0 { + solution = steps[len(steps)-1] + steps = steps[:len(steps)-1] + } + + return solution, steps, nil + } + + // Statistical analysis + func performStatisticalAnalysis(ctx context.Context, data []float64, analysisType string) (string, error) { + dataStr := make([]string, len(data)) + for i, v := range data { + dataStr[i] = fmt.Sprintf("%.2f", v) + } + + prompt := fmt.Sprintf("Perform %s statistical analysis on this data: [%s]. Provide detailed calculations and interpretations.", + analysisType, strings.Join(dataStr, ", ")) + + resp, err := genkit.Generate(ctx, g, + ai.WithModel("deepseek-reasoner"), + ai.WithPrompt(prompt), + ai.WithConfig(map[string]interface{}{ + "temperature": 0.2, + "max_tokens": 2000, + }), + ) + if err != nil { + return "", err + } + + return resp.Text(), nil + } + ``` + + + Leverage DeepSeek's mathematical reasoning capabilities: + + ```python + from typing import List + + # Mathematical problem solving + async def solve_math_problem(problem: str) -> dict: + prompt = f"Solve this mathematical problem step by step: {problem}" + + response = await ai.generate( + model=openai_name('deepseek-reasoner'), + prompt=prompt, + config={ + 'temperature': 0.1, + 'max_tokens': 1500, + } + ) + + lines = [line for line in response.text.split('\n') if line.strip()] + solution = lines[-1] if lines else "" + steps = lines[:-1] if len(lines) > 1 else [] + + return {'solution': solution, 'steps': steps} + + # Statistical analysis + async def perform_statistical_analysis(data: List[float], analysis_type: str) -> str: + data_str = ', '.join([f"{x:.2f}" for x in data]) + prompt = f"Perform {analysis_type} statistical analysis on this data: [{data_str}]. Provide detailed calculations and interpretations." + + response = await ai.generate( + model=openai_name('deepseek-reasoner'), + prompt=prompt, + config={ + 'temperature': 0.2, + 'max_tokens': 2000, + } + ) + + return response.text + ``` + + + +### Conversational AI + + + + Build conversational applications with DeepSeek: + + ```ts + // Conversational chat flow + export const chatFlow = ai.defineFlow( + { + name: 'chatFlow', + inputSchema: z.object({ + message: z.string(), + history: z.array(z.object({ + role: z.enum(['user', 'assistant']), + content: z.string(), + })).optional(), + mode: z.enum(['general', 'coding', 'reasoning']).optional(), + }), + outputSchema: z.object({ response: z.string() }), + }, + async ({ message, history = [], mode = 'general' }) => { + // Select model based on conversation mode + const modelMap = { + general: 'deepseek-chat', + coding: 'deepseek-coder', + reasoning: 'deepseek-reasoner', + }; + + // Build conversation context + const messages = [ + { role: 'system', content: `You are a helpful AI assistant specialized in ${mode} tasks.` }, + ...history, + { role: 'user', content: message }, + ]; + + const response = await ai.generate({ + model: deepSeek.model(modelMap[mode]), + messages, + config: { + temperature: mode === 'reasoning' ? 0.1 : 0.7, + maxTokens: 1500, + }, + }); + + return { response: response.text }; + }, + ); + + // Multi-turn reasoning conversation + export const reasoningChatFlow = ai.defineFlow( + { + name: 'reasoningChatFlow', + inputSchema: z.object({ + question: z.string(), + context: z.string().optional(), + }), + outputSchema: z.object({ + answer: z.string(), + reasoning: z.string(), + }), + }, + async ({ question, context }) => { + const prompt = context + ? `Given this context: ${context}\n\nAnswer this question with detailed reasoning: ${question}` + : `Answer this question with detailed reasoning: ${question}`; + + const response = await ai.generate({ + model: deepSeek.model('deepseek-reasoner'), + prompt, + config: { + temperature: 0.2, + maxTokens: 2000, + }, + }); + + // Split response into answer and reasoning + const parts = response.text.split('\n\n'); + const answer = parts[parts.length - 1]; + const reasoning = parts.slice(0, -1).join('\n\n'); + + return { answer, reasoning }; + }, + ); + ``` + + + Build conversational applications with DeepSeek: + + ```go + type ChatMessage struct { + Role string `json:"role"` + Content string `json:"content"` + } + + func handleChat(ctx context.Context, message string, history []ChatMessage, mode string) (string, error) { + // Select model based on conversation mode + modelMap := map[string]string{ + "general": "deepseek-chat", + "coding": "deepseek-coder", + "reasoning": "deepseek-reasoner", + } + + model, exists := modelMap[mode] + if !exists { + model = "deepseek-chat" + } + + // Build conversation context + messages := []ChatMessage{ + {Role: "system", Content: fmt.Sprintf("You are a helpful AI assistant specialized in %s tasks.", mode)}, + } + messages = append(messages, history...) + messages = append(messages, ChatMessage{Role: "user", Content: message}) + + temperature := 0.7 + if mode == "reasoning" { + temperature = 0.1 + } + + resp, err := genkit.Generate(ctx, g, + ai.WithModel(model), + ai.WithMessages(messages), + ai.WithConfig(map[string]interface{}{ + "temperature": temperature, + "max_tokens": 1500, + }), + ) + if err != nil { + return "", err + } + + return resp.Text(), nil + } + ``` + + + Build conversational applications with DeepSeek: + + ```python + from typing import List, Dict, Optional + + async def handle_chat( + message: str, + history: List[Dict[str, str]] = None, + mode: str = 'general' + ) -> str: + if history is None: + history = [] + + # Select model based on conversation mode + model_map = { + 'general': 'deepseek-chat', + 'coding': 'deepseek-coder', + 'reasoning': 'deepseek-reasoner', + } + + model = model_map.get(mode, 'deepseek-chat') + + # Build conversation context + messages = [ + {"role": "system", "content": f"You are a helpful AI assistant specialized in {mode} tasks."}, + *history, + {"role": "user", "content": message}, + ] + + temperature = 0.1 if mode == 'reasoning' else 0.7 + + response = await ai.generate( + model=openai_name(model), + messages=messages, + config={ + 'temperature': temperature, + 'max_tokens': 1500, + } + ) + + return response.text + ``` + + + +## Model Comparison + +### Available Models + +| Model | Capabilities | Best For | Context Window | +|-------|-------------|----------|----------------| +| **deepseek-chat** | General conversation, reasoning | General-purpose tasks, Q&A | 32K tokens | +| **deepseek-coder** | Code generation, programming | Software development, code review | 32K tokens | +| **deepseek-reasoner** | Advanced reasoning, mathematics | Complex problem solving, analysis | 32K tokens | + +### Performance Characteristics + + + + ```ts + // Performance comparison example + const performanceTest = async () => { + const prompt = "Explain the time complexity of quicksort algorithm"; + + // General model + const startGeneral = Date.now(); + const generalResponse = await ai.generate({ + model: deepSeek.model('deepseek-chat'), + prompt, + }); + const generalTime = Date.now() - startGeneral; + + // Specialized coder model + const startCoder = Date.now(); + const coderResponse = await ai.generate({ + model: deepSeek.model('deepseek-coder'), + prompt, + }); + const coderTime = Date.now() - startCoder; + + console.log(`General: ${generalTime}ms, Coder: ${coderTime}ms`); + console.log(`General length: ${generalResponse.text.length}, Coder length: ${coderResponse.text.length}`); + }; + ``` + + + ```go + func performanceTest(ctx context.Context) { + prompt := "Explain the time complexity of quicksort algorithm" + + // General model + startGeneral := time.Now() + generalResp, _ := genkit.Generate(ctx, g, + ai.WithModel("deepseek-chat"), + ai.WithPrompt(prompt), + ) + generalTime := time.Since(startGeneral) + + // Specialized coder model + startCoder := time.Now() + coderResp, _ := genkit.Generate(ctx, g, + ai.WithModel("deepseek-coder"), + ai.WithPrompt(prompt), + ) + coderTime := time.Since(startCoder) + + fmt.Printf("General: %v, Coder: %v\n", generalTime, coderTime) + fmt.Printf("General length: %d, Coder length: %d\n", + len(generalResp.Text()), len(coderResp.Text())) + } + ``` + + + ```python + import time + + async def performance_test(): + prompt = "Explain the time complexity of quicksort algorithm" + + # General model + start_general = time.time() + general_response = await ai.generate( + model=openai_name('deepseek-chat'), + prompt=prompt + ) + general_time = time.time() - start_general + + # Specialized coder model + start_coder = time.time() + coder_response = await ai.generate( + model=openai_name('deepseek-coder'), + prompt=prompt + ) + coder_time = time.time() - start_coder + + print(f"General: {general_time:.2f}s, Coder: {coder_time:.2f}s") + print(f"General length: {len(general_response.text)}, Coder length: {len(coder_response.text)}") + ``` + + + +## Advanced Configuration + +### Custom Model Configuration + + + + ```ts + // Advanced configuration with passthrough options + const response = await ai.generate({ + model: deepSeek.model('deepseek-chat'), + prompt: 'Analyze the latest developments in AI', + config: { + temperature: 0.7, + maxTokens: 2000, + topP: 0.9, + frequencyPenalty: 0.1, + presencePenalty: 0.1, + // Passthrough configuration for new features + stream: true, + logprobs: true, + top_logprobs: 5, + }, + }); + + // Environment-specific configuration + const environmentConfig = { + development: { + model: deepSeek.model('deepseek-chat'), + temperature: 0.8, + maxTokens: 1000, + }, + production: { + model: deepSeek.model('deepseek-reasoner'), + temperature: 0.3, + maxTokens: 2000, + }, + }; + + const config = environmentConfig[process.env.NODE_ENV || 'development']; + ``` + + + ```go + // Advanced configuration + resp, err := genkit.Generate(ctx, g, + ai.WithModel("deepseek-chat"), + ai.WithPrompt("Analyze the latest developments in AI"), + ai.WithConfig(map[string]interface{}{ + "temperature": 0.7, + "max_tokens": 2000, + "top_p": 0.9, + "frequency_penalty": 0.1, + "presence_penalty": 0.1, + "stream": true, + "logprobs": true, + "top_logprobs": 5, + }), + ) + ``` + + + ```python + # Advanced configuration + response = await ai.generate( + model=openai_name('deepseek-chat'), + prompt='Analyze the latest developments in AI', + config={ + 'temperature': 0.7, + 'max_tokens': 2000, + 'top_p': 0.9, + 'frequency_penalty': 0.1, + 'presence_penalty': 0.1, + 'stream': True, + 'logprobs': True, + 'top_logprobs': 5, + } + ) + ``` + + + +## Best Practices + +### Optimizing for Different Tasks + +1. **General conversation**: Use `deepseek-chat` with moderate temperature (0.7) +2. **Code generation**: Use `deepseek-coder` with low temperature (0.2) +3. **Mathematical reasoning**: Use `deepseek-reasoner` with very low temperature (0.1) +4. **Creative writing**: Use `deepseek-chat` with higher temperature (0.8-0.9) + +### Cost Optimization + +1. **Choose the right model**: Use specialized models for their intended tasks +2. **Optimize token usage**: Be specific in prompts and set appropriate `maxTokens` +3. **Cache responses**: Cache frequently requested computations +4. **Batch similar requests**: Group related queries when possible + +### Error Handling + + + + ```ts + const robustDeepSeekFlow = ai.defineFlow( + { + name: 'robustDeepSeekFlow', + inputSchema: z.object({ query: z.string() }), + outputSchema: z.object({ response: z.string() }), + }, + async ({ query }) => { + try { + const response = await ai.generate({ + model: deepSeek.model('deepseek-chat'), + prompt: query, + config: { + temperature: 0.7, + maxTokens: 1000, + }, + }); + return { response: response.text }; + } catch (error) { + if (error.message.includes('rate_limit')) { + // Fallback to reasoning model with lower token limit + const fallbackResponse = await ai.generate({ + model: deepSeek.model('deepseek-reasoner'), + prompt: query, + config: { + maxTokens: 500, + }, + }); + return { response: fallbackResponse.text }; + } + throw error; + } + }, + ); + ``` + + + ```go + func robustDeepSeekGenerate(ctx context.Context, query string) (string, error) { + resp, err := genkit.Generate(ctx, g, + ai.WithModel("deepseek-chat"), + ai.WithPrompt(query), + ai.WithConfig(map[string]interface{}{ + "temperature": 0.7, + "max_tokens": 1000, + }), + ) + + if err != nil { + if strings.Contains(err.Error(), "rate_limit") { + // Fallback to reasoning model + fallbackResp, fallbackErr := genkit.Generate(ctx, g, + ai.WithModel("deepseek-reasoner"), + ai.WithPrompt(query), + ai.WithConfig(map[string]interface{}{ + "max_tokens": 500, + }), + ) + if fallbackErr != nil { + return "", fallbackErr + } + return fallbackResp.Text(), nil + } + return "", err + } + + return resp.Text(), nil + } + ``` + + + ```python + async def robust_deepseek_generate(query: str) -> str: + try: + response = await ai.generate( + model=openai_name('deepseek-chat'), + prompt=query, + config={ + 'temperature': 0.7, + 'max_tokens': 1000, + } + ) + return response.text + except Exception as error: + if 'rate_limit' in str(error): + # Fallback to reasoning model + fallback_response = await ai.generate( + model=openai_name('deepseek-reasoner'), + prompt=query, + config={ + 'max_tokens': 500, + } + ) + return fallback_response.text + raise error + ``` + + + +## Next Steps + +- Learn about [generating content](/unified-docs/generating-content) to understand how to use these models effectively +- Explore [tool calling](/unified-docs/tool-calling) to add interactive capabilities to your DeepSeek applications +- See [creating flows](/unified-docs/creating-flows) to build structured AI workflows with reasoning capabilities +- Check out [deployment guides](/unified-docs/deployment) for production deployment strategies diff --git a/src/content/docs/unified-docs/plugins/google-ai.mdx b/src/content/docs/unified-docs/plugins/google-ai.mdx new file mode 100644 index 00000000..4b4ecec7 --- /dev/null +++ b/src/content/docs/unified-docs/plugins/google-ai.mdx @@ -0,0 +1,589 @@ +--- +title: Google AI plugin +description: Learn how to use Google's Gemini models with Genkit across JavaScript, Go, and Python, including text generation, embeddings, TTS, video generation, and context caching. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +The Google AI plugin provides interfaces to Google's Gemini models through the [Gemini API](https://ai.google.dev/docs/gemini_api_overview), offering powerful text generation, embeddings, text-to-speech, video generation, and context caching capabilities. + +## Installation and Setup + + + + Install the Google AI plugin: + + ```bash + npm install @genkit-ai/googleai + ``` + + Configure the plugin when initializing Genkit: + + ```ts + import { genkit } from 'genkit'; + import { googleAI } from '@genkit-ai/googleai'; + + const ai = genkit({ + plugins: [googleAI()], + }); + ``` + + + The Google AI plugin is included with the Genkit Go package: + + ```go + import ( + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/googlegenai" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlegenai.GoogleAI{}), + ) + if err != nil { + log.Fatal(err) + } + } + ``` + + + Install the Google AI plugin: + + ```bash + pip install genkit-plugin-google-genai + ``` + + Configure the plugin when initializing Genkit: + + ```python + from genkit.ai import Genkit + from genkit.plugins.google_genai import GoogleGenai + + ai = Genkit( + plugins=[GoogleGenai()], + ) + ``` + + + +## API Key Configuration + +The plugin requires an API key for the Gemini API, which you can get from [Google AI Studio](https://aistudio.google.com/app/apikey). + + + + Configure your API key by doing one of the following: + + - Set the `GEMINI_API_KEY` environment variable: + ```bash + export GEMINI_API_KEY=your_api_key_here + ``` + + - Specify the API key when initializing the plugin: + ```ts + googleAI({ apiKey: yourKey }); + ``` + + :::caution + Don't embed your API key directly in code! Use environment variables or a service like Cloud Secret Manager. + ::: + + + Set the `GEMINI_API_KEY` environment variable: + + ```bash + export GEMINI_API_KEY=your_api_key_here + ``` + + The plugin will automatically use this environment variable. + + + Set the `GEMINI_API_KEY` environment variable: + + ```bash + export GEMINI_API_KEY=your_api_key_here + ``` + + The plugin will automatically use this environment variable. + + + +## Basic Usage + + + + Use the helper functions to reference models and embedders: + + ```ts + import { googleAI } from '@genkit-ai/googleai'; + + // Referencing models + const model = googleAI.model('gemini-2.5-flash'); + const modelPro = googleAI.model('gemini-2.5-flash-lite'); + + // Referencing embedders + const embedder = googleAI.embedder('gemini-embedding-001'); + + // Set default model + const ai = genkit({ + plugins: [googleAI()], + model: googleAI.model('gemini-2.5-flash'), + }); + + // Generate content + const llmResponse = await ai.generate('Tell me a joke.'); + + // Generate embeddings + const embeddings = await ai.embed({ + embedder: googleAI.embedder('gemini-embedding-001'), + content: 'Hello world', + }); + ``` + + + Use the models directly with the generation API: + + ```go + import ( + "context" + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/googlegenai" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlegenai.GoogleAI{}), + genkit.WithDefaultModel("googleai/gemini-2.5-flash"), + ) + if err != nil { + log.Fatal(err) + } + + // Generate content + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("Tell me a joke."), + ) + if err != nil { + log.Fatal(err) + } + + fmt.Println(resp.Text()) + } + ``` + + + Use the models with the generation API: + + ```python + from genkit.ai import Genkit + from genkit.plugins.google_genai import GoogleGenai, google_genai_name + + ai = Genkit( + plugins=[GoogleGenai()], + model=google_genai_name('gemini-2.5-flash'), + ) + + # Generate content + response = await ai.generate('Tell me a joke.') + print(response.text) + + # Generate embeddings + embeddings = await ai.embed( + embedder=google_genai_name('gemini-embedding-001'), + content='Hello world', + ) + ``` + + + +## Working with Files + + + + You can use files uploaded to the Gemini Files API: + + ```ts + import { GoogleAIFileManager } from '@google/generative-ai/server'; + + const fileManager = new GoogleAIFileManager(process.env.GEMINI_API_KEY); + const uploadResult = await fileManager.uploadFile('path/to/file.jpg', { + mimeType: 'image/jpeg', + displayName: 'Your Image', + }); + + const response = await ai.generate({ + model: googleAI.model('gemini-2.5-flash'), + prompt: [ + { text: 'Describe this image:' }, + { + media: { + contentType: uploadResult.file.mimeType, + url: uploadResult.file.uri, + }, + }, + ], + }); + ``` + + + File handling in Go requires using the Google AI SDK directly for file uploads, then referencing the files in Genkit: + + ```go + // Upload files using the Google AI SDK, then reference in Genkit + // File upload implementation depends on your specific use case + + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("Describe this image:"), + ai.WithMedia(&ai.Media{ + ContentType: "image/jpeg", + URL: "uploaded_file_uri", + }), + ) + ``` + + + File handling in Python requires using the Google AI SDK for uploads: + + ```python + # Upload files using the Google AI SDK, then reference in Genkit + # File upload implementation depends on your specific use case + + response = await ai.generate( + prompt=[ + {'text': 'Describe this image:'}, + { + 'media': { + 'contentType': 'image/jpeg', + 'url': 'uploaded_file_uri', + } + } + ], + model=google_genai_name('gemini-2.5-flash'), + ) + ``` + + + +## Fine-tuned Models + + + + You can use models fine-tuned with the Google Gemini API. Follow the instructions from the [Gemini API](https://ai.google.dev/gemini-api/docs/model-tuning/tutorial?lang=python) or fine-tune using [AI Studio](https://aistudio.corp.google.com/app/tune). + + When calling a tuned model, use the tuned model's ID directly: + + ```ts + const llmResponse = await ai.generate({ + prompt: 'Suggest an item for the menu of fish themed restaurant', + model: googleAI.model('tunedModels/my-example-model-apbm8oqbvuv2'), + }); + ``` + + + Use fine-tuned models by specifying the tuned model ID: + + ```go + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("Suggest an item for the menu of fish themed restaurant"), + ai.WithModelName("googleai/tunedModels/my-example-model-apbm8oqbvuv2"), + ) + ``` + + + Use fine-tuned models by specifying the tuned model ID: + + ```python + response = await ai.generate( + prompt='Suggest an item for the menu of fish themed restaurant', + model=google_genai_name('tunedModels/my-example-model-apbm8oqbvuv2'), + ) + ``` + + + +## Text-to-Speech (TTS) + + + + Generate audio using the Gemini TTS model: + + ```ts + import { writeFile } from 'node:fs/promises'; + + const { media } = await ai.generate({ + model: googleAI.model('gemini-2.5-flash-preview-tts'), + config: { + responseModalities: ['AUDIO'], + speechConfig: { + voiceConfig: { + prebuiltVoiceConfig: { voiceName: 'Algenib' }, + }, + }, + }, + prompt: 'Say that Genkit is an amazing Gen AI library', + }); + + if (media) { + const audioBuffer = Buffer.from( + media.url.substring(media.url.indexOf(',') + 1), + 'base64' + ); + await writeFile('output.wav', audioBuffer); + } + ``` + + ### Multi-speaker Audio + + Generate audio with multiple speakers: + + ```ts + const response = await ai.generate({ + model: googleAI.model('gemini-2.5-flash-preview-tts'), + config: { + responseModalities: ['AUDIO'], + speechConfig: { + multiSpeakerVoiceConfig: { + speakerVoiceConfigs: [ + { + speaker: 'Speaker1', + voiceConfig: { + prebuiltVoiceConfig: { voiceName: 'Algenib' }, + }, + }, + { + speaker: 'Speaker2', + voiceConfig: { + prebuiltVoiceConfig: { voiceName: 'Achernar' }, + }, + }, + ], + }, + }, + }, + prompt: `Here's the dialog: + Speaker1: "Genkit is an amazing Gen AI library!" + Speaker2: "I thought it was a framework."`, + }); + ``` + + + Text-to-speech functionality is currently available primarily in JavaScript. For Go applications, you would need to: + + 1. Use the Google AI SDK directly for TTS functionality + 2. Or call a JavaScript-based service that handles TTS + 3. Or use Google Cloud Text-to-Speech API separately + + ```go + // TTS is not directly supported in Go Genkit + // Consider using Google Cloud Text-to-Speech API or + // a JavaScript service for TTS functionality + ``` + + + Text-to-speech functionality is currently available primarily in JavaScript. For Python applications, you would need to: + + 1. Use the Google AI SDK directly for TTS functionality + 2. Or call a JavaScript-based service that handles TTS + 3. Or use Google Cloud Text-to-Speech API separately + + ```python + # TTS is not directly supported in Python Genkit + # Consider using Google Cloud Text-to-Speech API or + # a JavaScript service for TTS functionality + ``` + + + +## Video Generation (Veo) + + + + Generate videos using the Veo models: + + ```ts + const videoFlow = ai.defineFlow('text-to-video-veo', async () => { + let { operation } = await ai.generate({ + model: googleAI.model('veo-2.0-generate-001'), + prompt: 'A majestic dragon soaring over a mystical forest at dawn.', + config: { + durationSeconds: 5, + aspectRatio: '16:9', + }, + }); + + if (!operation) { + throw new Error('Expected the model to return an operation'); + } + + // Wait until the operation completes + while (!operation.done) { + operation = await ai.checkOperation(operation); + await new Promise((resolve) => setTimeout(resolve, 5000)); + } + + if (operation.error) { + throw new Error('Failed to generate video: ' + operation.error.message); + } + + const video = operation.output?.message?.content.find((p) => !!p.media); + if (!video) { + throw new Error('Failed to find the generated video'); + } + + return video; + }); + ``` + + ### Video from Photo Reference + + ```ts + const startingImage = fs.readFileSync('photo.jpg', { encoding: 'base64' }); + + let { operation } = await ai.generate({ + model: googleAI.model('veo-2.0-generate-001'), + prompt: [ + { text: 'make the subject in the photo move' }, + { + media: { + contentType: 'image/jpeg', + url: `data:image/jpeg;base64,${startingImage}`, + }, + }, + ], + config: { + durationSeconds: 5, + aspectRatio: '9:16', + personGeneration: 'allow_adult', + }, + }); + ``` + + + Video generation functionality is currently available primarily in JavaScript. For Go applications, you would need to: + + 1. Use the Google AI SDK directly for video generation + 2. Or call a JavaScript-based service that handles video generation + 3. Or implement video generation using the Gemini API directly + + ```go + // Video generation is not directly supported in Go Genkit + // Consider using the Google AI SDK directly or + // a JavaScript service for video generation functionality + ``` + + + Video generation functionality is currently available primarily in JavaScript. For Python applications, you would need to: + + 1. Use the Google AI SDK directly for video generation + 2. Or call a JavaScript-based service that handles video generation + 3. Or implement video generation using the Gemini API directly + + ```python + # Video generation is not directly supported in Python Genkit + # Consider using the Google AI SDK directly or + # a JavaScript service for video generation functionality + ``` + + + +## Context Caching + + + + Context caching allows models to reuse previously cached content to optimize performance: + + ```ts + const llmResponse = await ai.generate({ + messages: [ + { + role: 'user', + content: [{ text: 'Here is the relevant text from War and Peace.' }], + }, + { + role: 'model', + content: [ + { + text: 'Based on War and Peace, here is some analysis of Pierre Bezukhov\'s character.', + }, + ], + metadata: { + cache: { + ttlSeconds: 300, // Cache this message for 5 minutes + }, + }, + }, + ], + model: googleAI.model('gemini-2.5-flash-001'), + prompt: 'Describe Pierre\'s transformation throughout the novel', + }); + ``` + + ### Caching Large Documents + + ```ts + const textContent = await fs.readFile('path/to/war_and_peace.txt', 'utf-8'); + + const llmResponse = await ai.generate({ + messages: [ + { + role: 'user', + content: [{ text: textContent }], + }, + { + role: 'model', + content: [ + { + text: 'This analysis is based on the provided text from War and Peace.', + }, + ], + metadata: { + cache: { + ttlSeconds: 300, + }, + }, + }, + ], + model: googleAI.model('gemini-2.5-flash-001'), + prompt: 'Analyze the relationship between Pierre and Natasha.', + }); + ``` + + + Context caching functionality is currently available primarily in JavaScript. For Go applications, you would need to implement caching manually or use the Google AI SDK directly. + + ```go + // Context caching is not directly supported in Go Genkit + // Consider implementing your own caching layer or + // using the Google AI SDK directly for caching functionality + ``` + + + Context caching functionality is currently available primarily in JavaScript. For Python applications, you would need to implement caching manually or use the Google AI SDK directly. + + ```python + # Context caching is not directly supported in Python Genkit + # Consider implementing your own caching layer or + # using the Google AI SDK directly for caching functionality + ``` + + + +## Available Models + +The Google AI plugin supports various Gemini models: + +- **Text Generation**: `gemini-2.5-flash`, `gemini-2.5-flash-lite`, `gemini-1.5-pro` +- **Embeddings**: `gemini-embedding-001` +- **Text-to-Speech**: `gemini-2.5-flash-preview-tts` +- **Video Generation**: `veo-2.0-generate-001`, `veo-3.0-generate-preview` + +## Next Steps + +- Learn about [generating content](/unified-docs/generating-content) to understand how to use these models effectively +- Explore [tool calling](/unified-docs/tool-calling) to add interactive capabilities to your AI applications +- See [creating flows](/unified-docs/creating-flows) to build structured AI workflows +- Check out [context](/unified-docs/context) for managing information flow in your applications diff --git a/src/content/docs/unified-docs/plugins/mcp.mdx b/src/content/docs/unified-docs/plugins/mcp.mdx new file mode 100644 index 00000000..8ab8f73f --- /dev/null +++ b/src/content/docs/unified-docs/plugins/mcp.mdx @@ -0,0 +1,1048 @@ +--- +title: Model Context Protocol (MCP) Plugin +description: Learn how to integrate MCP servers with Genkit across JavaScript, Go, and Python for extensible tool and resource management. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +The Model Context Protocol (MCP) plugin enables integration between Genkit and the [Model Context Protocol](https://modelcontextprotocol.io), an open standard for connecting AI applications with external tools, resources, and prompts. MCP allows you to: + +- **Consume MCP tools and resources** from external servers as a client +- **Expose Genkit tools and prompts** as an MCP server for other applications +- **Manage multiple MCP connections** for complex workflows + +## Installation and Setup + + + + Install the MCP plugin: + + ```bash + npm install genkit @genkit-ai/mcp + ``` + + Basic setup: + + ```ts + import { genkit } from 'genkit'; + import { googleAI } from '@genkit-ai/googleai'; + import { createMcpHost } from '@genkit-ai/mcp'; + + const ai = genkit({ + plugins: [googleAI()], + }); + + // Create MCP host to manage multiple servers + const mcpHost = createMcpHost({ + name: 'myMcpClients', + mcpServers: { + fs: { + command: 'npx', + args: ['-y', '@modelcontextprotocol/server-filesystem', process.cwd()], + }, + memory: { + command: 'npx', + args: ['-y', '@modelcontextprotocol/server-memory'], + }, + }, + }); + ``` + + + Import the MCP package: + + ```bash + go get github.com/firebase/genkit/go/plugins/mcp + ``` + + Basic setup: + + ```go + package main + + import ( + "context" + "log" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/mcp" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx) + if err != nil { + log.Fatal(err) + } + + // Create MCP manager for multiple servers + manager, err := mcp.NewMCPManager(mcp.MCPManagerOptions{ + Name: "my-app", + MCPServers: []mcp.MCPServerConfig{ + { + Name: "time-server", + Config: mcp.MCPClientOptions{ + Name: "mcp-server-time", + Stdio: &mcp.StdioConfig{ + Command: "uvx", + Args: []string{"mcp-server-time"}, + }, + }, + }, + }, + }) + if err != nil { + log.Fatal(err) + } + } + ``` + + + Install the MCP plugin: + + ```bash + pip install genkit-mcp + ``` + + Basic setup: + + ```python + import asyncio + from genkit.ai import Genkit + from genkit.plugins.mcp import MCPPlugin + + async def main(): + ai = Genkit( + plugins=[ + MCPPlugin( + name="my-mcp-client", + servers={ + "filesystem": { + "command": "npx", + "args": ["-y", "@modelcontextprotocol/server-filesystem", "."], + }, + "memory": { + "command": "npx", + "args": ["-y", "@modelcontextprotocol/server-memory"], + }, + }, + ), + ], + ) + + if __name__ == "__main__": + asyncio.run(main()) + ``` + + + +## MCP Client Usage + +### Connecting to MCP Servers + + + + #### Multiple Servers with MCP Host + + ```ts + import { createMcpHost } from '@genkit-ai/mcp'; + + const mcpHost = createMcpHost({ + name: 'myMcpClients', + mcpServers: { + // Filesystem server + fs: { + command: 'npx', + args: ['-y', '@modelcontextprotocol/server-filesystem', process.cwd()], + }, + // Memory server + memory: { + command: 'npx', + args: ['-y', '@modelcontextprotocol/server-memory'], + }, + // Remote HTTP server + remote: { + url: 'https://api.example.com/mcp', + headers: { + 'Authorization': 'Bearer your-token', + }, + }, + }, + rawToolResponses: false, // Process responses for better compatibility + }); + + // Use tools from all connected servers + const { text } = await ai.generate({ + model: googleAI.model('gemini-2.5-flash'), + prompt: `Analyze all files in ${process.cwd()} and remember key findings.`, + tools: await mcpHost.getActiveTools(ai), + resources: await mcpHost.getActiveResources(ai), + }); + + // Clean up when done + await mcpHost.close(); + ``` + + #### Single Server with MCP Client + + ```ts + import { createMcpClient } from '@genkit-ai/mcp'; + + const fsClient = createMcpClient({ + name: 'myFileSystemClient', + mcpServer: { + command: 'npx', + args: ['-y', '@modelcontextprotocol/server-filesystem', process.cwd()], + }, + }); + + await fsClient.ready(); + + // Get tools from this specific client + const fsTools = await fsClient.getActiveTools(ai); + + const { text } = await ai.generate({ + model: googleAI.model('gemini-2.5-flash'), + prompt: 'List files in the current directory', + tools: fsTools, + }); + + await fsClient.disable(); + ``` + + + #### Multiple Servers with Manager + + ```go + manager, err := mcp.NewMCPManager(mcp.MCPManagerOptions{ + Name: "my-app", + MCPServers: []mcp.MCPServerConfig{ + { + Name: "filesystem", + Config: mcp.MCPClientOptions{ + Name: "filesystem-server", + Stdio: &mcp.StdioConfig{ + Command: "npx", + Args: []string{"-y", "@modelcontextprotocol/server-filesystem", "."}, + }, + }, + }, + { + Name: "time", + Config: mcp.MCPClientOptions{ + Name: "time-server", + Stdio: &mcp.StdioConfig{ + Command: "uvx", + Args: []string{"mcp-server-time"}, + }, + }, + }, + }, + }) + if err != nil { + log.Fatal(err) + } + + // Get all tools from all active servers + tools, err := manager.GetActiveTools(ctx, g) + if err != nil { + log.Fatal(err) + } + + // Use tools in generation + resp, err := genkit.Generate(ctx, g, + ai.WithModel(myModel), + ai.WithPrompt("What time is it and list files in current directory?"), + ai.WithTools(tools...), + ) + if err != nil { + log.Fatal(err) + } + ``` + + #### Single Server Client + + ```go + client, err := mcp.NewGenkitMCPClient(mcp.MCPClientOptions{ + Name: "time-client", + Stdio: &mcp.StdioConfig{ + Command: "uvx", + Args: []string{"mcp-server-time"}, + }, + }) + if err != nil { + log.Fatal(err) + } + + // Get a specific tool + timeTool, err := client.GetTool(ctx, g, "get_current_time") + if err != nil { + log.Fatal(err) + } + + // Use the tool + resp, err := genkit.Generate(ctx, g, + ai.WithModel(myModel), + ai.WithPrompt("What time is it?"), + ai.WithTools(timeTool), + ) + if err != nil { + log.Fatal(err) + } + ``` + + + #### Multiple Servers + + ```python + from genkit.plugins.mcp import MCPPlugin + + mcp_plugin = MCPPlugin( + name="my-mcp-client", + servers={ + "filesystem": { + "command": "npx", + "args": ["-y", "@modelcontextprotocol/server-filesystem", "."], + }, + "memory": { + "command": "npx", + "args": ["-y", "@modelcontextprotocol/server-memory"], + }, + "time": { + "command": "uvx", + "args": ["mcp-server-time"], + }, + }, + ) + + ai = Genkit(plugins=[mcp_plugin]) + + # Get all available tools + tools = await mcp_plugin.get_active_tools() + + # Use tools in generation + response = await ai.generate( + model="googleai/gemini-2.5-flash", + prompt="What time is it and what files are in the current directory?", + tools=tools, + ) + ``` + + #### Single Server + + ```python + from genkit.plugins.mcp import MCPClient + + client = MCPClient( + name="time-client", + server_config={ + "command": "uvx", + "args": ["mcp-server-time"], + }, + ) + + await client.connect() + + # Get specific tool + time_tool = await client.get_tool("get_current_time") + + # Use the tool + response = await ai.generate( + model="googleai/gemini-2.5-flash", + prompt="What time is it?", + tools=[time_tool], + ) + + await client.disconnect() + ``` + + + +### Using MCP Tools and Resources + + + + ```ts + // Get tools from specific servers + const fsTools = await mcpHost.getActiveTools(ai, ['fs']); + const memoryTools = await mcpHost.getActiveTools(ai, ['memory']); + + // Get all tools + const allTools = await mcpHost.getActiveTools(ai); + + // Get resources + const resources = await mcpHost.getActiveResources(ai); + + // Get prompts from a specific server + const prompt = await mcpHost.getPrompt('memory', 'recall_information'); + + // Use in generation with specific tools + const { text } = await ai.generate({ + model: googleAI.model('gemini-2.5-flash'), + prompt: 'Read the README file and remember its contents', + tools: [...fsTools, ...memoryTools], + resources: resources, + }); + + // Tool responses are automatically processed + // Raw responses can be enabled with rawToolResponses: true + ``` + + + ```go + // Get tools from specific server + timeTool, err := manager.GetTool(ctx, g, "time", "get_current_time") + if err != nil { + log.Fatal(err) + } + + // Get prompt from specific server + prompt, err := manager.GetPrompt(ctx, g, "time", "time_prompt", nil) + if err != nil { + log.Fatal(err) + } + + // Get all tools from all servers + allTools, err := manager.GetActiveTools(ctx, g) + if err != nil { + log.Fatal(err) + } + + // Use in generation + resp, err := genkit.Generate(ctx, g, + ai.WithModel(myModel), + ai.WithPrompt("Use available tools to help me"), + ai.WithTools(allTools...), + ) + if err != nil { + log.Fatal(err) + } + + // Dynamic server management + err = manager.Connect("weather", mcp.MCPClientOptions{ + Name: "weather-server", + Stdio: &mcp.StdioConfig{ + Command: "python", + Args: []string{"weather_server.py"}, + }, + }) + if err != nil { + log.Fatal(err) + } + + // Disconnect when done + err = manager.Disconnect("weather") + if err != nil { + log.Fatal(err) + } + ``` + + + ```python + # Get tools from specific servers + fs_tools = await mcp_plugin.get_tools_from_server("filesystem") + memory_tools = await mcp_plugin.get_tools_from_server("memory") + + # Get all available tools + all_tools = await mcp_plugin.get_active_tools() + + # Get resources + resources = await mcp_plugin.get_active_resources() + + # Get prompt from specific server + prompt = await mcp_plugin.get_prompt("memory", "recall_information") + + # Use in generation + response = await ai.generate( + model="googleai/gemini-2.5-flash", + prompt="Read files and remember important information", + tools=fs_tools + memory_tools, + resources=resources, + ) + + # Dynamic server management + await mcp_plugin.connect_server("weather", { + "command": "python", + "args": ["weather_server.py"], + }) + + # Disconnect server + await mcp_plugin.disconnect_server("weather") + ``` + + + +## MCP Server Usage + +### Exposing Genkit as MCP Server + + + + ```ts + import { createMcpServer } from '@genkit-ai/mcp'; + import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; + import { genkit, z } from 'genkit'; + + const ai = genkit({ + plugins: [googleAI()], + }); + + // Define tools to expose + ai.defineTool( + { + name: 'add', + description: 'Add two numbers together', + inputSchema: z.object({ + a: z.number(), + b: z.number() + }), + outputSchema: z.number(), + }, + async ({ a, b }) => a + b + ); + + // Define prompts to expose + ai.definePrompt( + { + name: 'greeting', + description: 'Generate a friendly greeting', + input: { + schema: z.object({ + name: z.string().default('friend').optional(), + }), + }, + }, + `Hello {{name}}! How can I help you today?` + ); + + // Define resources to expose + ai.defineResource( + { + name: 'system-info', + uri: 'system://info', + }, + async () => ({ + content: [{ + text: `System: ${process.platform}, Node: ${process.version}`, + }], + }) + ); + + // Create and start MCP server + const server = createMcpServer(ai, { + name: 'genkit-calculator', + version: '1.0.0', + }); + + server.setup().then(async () => { + await server.start(); + const transport = new StdioServerTransport(); + await server.server?.connect(transport); + }); + ``` + + + ```go + package main + + import ( + "context" + "log" + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/mcp" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx) + if err != nil { + log.Fatal(err) + } + + // Define tools to expose + addTool := genkit.DefineTool(g, "add", "Add two numbers", + func(ctx context.Context, input struct{ A, B int }) (int, error) { + return input.A + input.B, nil + }) + + multiplyTool := genkit.DefineTool(g, "multiply", "Multiply two numbers", + func(ctx context.Context, input struct{ A, B int }) (int, error) { + return input.A * input.B, nil + }) + + // Create MCP server with all tools + server := mcp.NewMCPServer(g, mcp.MCPServerOptions{ + Name: "genkit-calculator", + Version: "1.0.0", + }) + + // Or create server with specific tools only + specificServer := mcp.NewMCPServer(g, mcp.MCPServerOptions{ + Name: "genkit-math", + Version: "1.0.0", + Tools: []ai.Tool{addTool, multiplyTool}, + }) + + // Start the MCP server + log.Println("Starting MCP server...") + if err := server.ServeStdio(ctx); err != nil { + log.Fatal(err) + } + } + ``` + + + ```python + import asyncio + from genkit.ai import Genkit + from genkit.plugins.mcp import MCPServer + + async def main(): + ai = Genkit() + + # Define tools to expose + @ai.define_tool( + name="add", + description="Add two numbers together", + input_schema={ + "type": "object", + "properties": { + "a": {"type": "number"}, + "b": {"type": "number"}, + }, + "required": ["a", "b"], + }, + ) + async def add_tool(a: float, b: float) -> float: + return a + b + + @ai.define_tool( + name="multiply", + description="Multiply two numbers", + input_schema={ + "type": "object", + "properties": { + "a": {"type": "number"}, + "b": {"type": "number"}, + }, + "required": ["a", "b"], + }, + ) + async def multiply_tool(a: float, b: float) -> float: + return a * b + + # Create MCP server + server = MCPServer( + ai=ai, + name="genkit-calculator", + version="1.0.0", + ) + + # Start the server + print("Starting MCP server...") + await server.serve_stdio() + + if __name__ == "__main__": + asyncio.run(main()) + ``` + + + +## Advanced Configuration + +### Transport Options + + + + ```ts + // Stdio transport (default) + const mcpHost = createMcpHost({ + mcpServers: { + local: { + command: 'node', + args: ['server.js'], + env: { DEBUG: '1' }, + cwd: '/path/to/server', + }, + }, + }); + + // HTTP transport + const httpHost = createMcpHost({ + mcpServers: { + remote: { + url: 'https://api.example.com/mcp', + headers: { + 'Authorization': 'Bearer token', + 'X-API-Key': 'key', + }, + requestInit: { + timeout: 30000, + }, + }, + }, + }); + + // Custom transport + const customHost = createMcpHost({ + mcpServers: { + custom: { + transport: myCustomTransport, + }, + }, + }); + ``` + + + ```go + // Stdio transport + client, err := mcp.NewGenkitMCPClient(mcp.MCPClientOptions{ + Name: "stdio-server", + Stdio: &mcp.StdioConfig{ + Command: "python", + Args: []string{"server.py"}, + Env: []string{"DEBUG=1", "API_KEY=secret"}, + }, + }) + + // SSE transport + sseClient, err := mcp.NewGenkitMCPClient(mcp.MCPClientOptions{ + Name: "sse-server", + SSE: &mcp.SSEConfig{ + BaseURL: "http://localhost:3000/sse", + }, + }) + + // Disabled client (can be enabled later) + disabledClient, err := mcp.NewGenkitMCPClient(mcp.MCPClientOptions{ + Name: "optional-server", + Disabled: true, + Stdio: &mcp.StdioConfig{ + Command: "optional-server", + }, + }) + ``` + + + ```python + # Stdio transport + mcp_plugin = MCPPlugin( + servers={ + "local": { + "command": "python", + "args": ["server.py"], + "env": {"DEBUG": "1", "API_KEY": "secret"}, + "cwd": "/path/to/server", + }, + }, + ) + + # HTTP transport + http_plugin = MCPPlugin( + servers={ + "remote": { + "url": "https://api.example.com/mcp", + "headers": { + "Authorization": "Bearer token", + "X-API-Key": "key", + }, + "timeout": 30, + }, + }, + ) + + # Disabled server + disabled_plugin = MCPPlugin( + servers={ + "optional": { + "command": "optional-server", + "disabled": True, + }, + }, + ) + ``` + + + +### Error Handling and Lifecycle Management + + + + ```ts + const mcpHost = createMcpHost({ + name: 'robust-mcp-client', + mcpServers: { + fs: { + command: 'npx', + args: ['-y', '@modelcontextprotocol/server-filesystem', process.cwd()], + }, + }, + }); + + try { + // Wait for connections to be established + await mcpHost.ready(); + + // Check server status + const activeServers = await mcpHost.getActiveServers(); + console.log('Active servers:', activeServers); + + // Use tools with error handling + const tools = await mcpHost.getActiveTools(ai); + + const { text } = await ai.generate({ + model: googleAI.model('gemini-2.5-flash'), + prompt: 'List files safely', + tools: tools, + }); + + } catch (error) { + console.error('MCP operation failed:', error); + } finally { + // Always clean up + await mcpHost.close(); + } + ``` + + + ```go + manager, err := mcp.NewMCPManager(mcp.MCPManagerOptions{ + Name: "robust-app", + MCPServers: []mcp.MCPServerConfig{ + { + Name: "filesystem", + Config: mcp.MCPClientOptions{ + Name: "fs-server", + Stdio: &mcp.StdioConfig{ + Command: "npx", + Args: []string{"-y", "@modelcontextprotocol/server-filesystem", "."}, + }, + }, + }, + }, + }) + if err != nil { + log.Fatal(err) + } + + // Graceful shutdown + defer func() { + if err := manager.Close(); err != nil { + log.Printf("Error closing MCP manager: %v", err) + } + }() + + // Check server health + tools, err := manager.GetActiveTools(ctx, g) + if err != nil { + log.Printf("Failed to get tools: %v", err) + return + } + + // Use tools with error handling + resp, err := genkit.Generate(ctx, g, + ai.WithModel(myModel), + ai.WithPrompt("List files safely"), + ai.WithTools(tools...), + ) + if err != nil { + log.Printf("Generation failed: %v", err) + return + } + ``` + + + ```python + async def robust_mcp_usage(): + mcp_plugin = MCPPlugin( + name="robust-client", + servers={ + "filesystem": { + "command": "npx", + "args": ["-y", "@modelcontextprotocol/server-filesystem", "."], + }, + }, + ) + + try: + # Wait for connections + await mcp_plugin.ready() + + # Check server status + active_servers = await mcp_plugin.get_active_servers() + print(f"Active servers: {active_servers}") + + # Get tools with error handling + tools = await mcp_plugin.get_active_tools() + + response = await ai.generate( + model="googleai/gemini-2.5-flash", + prompt="List files safely", + tools=tools, + ) + + except Exception as error: + print(f"MCP operation failed: {error}") + finally: + # Clean up connections + await mcp_plugin.close() + ``` + + + +## Testing and Development + +### Testing Your MCP Server + + + + ```bash + # Test with MCP Inspector + npx @modelcontextprotocol/inspector node dist/server.js + + # Test with custom client + node test-client.js + ``` + + ```ts + // test-client.js + import { createMcpClient } from '@genkit-ai/mcp'; + + const client = createMcpClient({ + name: 'test-client', + mcpServer: { + command: 'node', + args: ['dist/server.js'], + }, + }); + + await client.ready(); + + // Test tools + const tools = await client.getActiveTools(); + console.log('Available tools:', tools.map(t => t.name)); + + // Test prompts + const prompts = await client.getActivePrompts(); + console.log('Available prompts:', prompts.map(p => p.name)); + + await client.disable(); + ``` + + + ```bash + # Build your server + go build -o server main.go + + # Test with MCP Inspector + npx @modelcontextprotocol/inspector ./server + + # Test with custom client + go run test-client.go + ``` + + ```go + // test-client.go + package main + + import ( + "context" + "log" + "github.com/firebase/genkit/go/plugins/mcp" + ) + + func main() { + ctx := context.Background() + + client, err := mcp.NewGenkitMCPClient(mcp.MCPClientOptions{ + Name: "test-client", + Stdio: &mcp.StdioConfig{ + Command: "./server", + }, + }) + if err != nil { + log.Fatal(err) + } + + // Test server capabilities + tools, err := client.GetActiveTools(ctx, nil) + if err != nil { + log.Fatal(err) + } + + log.Printf("Available tools: %d", len(tools)) + + // Clean up + client.Close() + } + ``` + + + ```bash + # Test with MCP Inspector + npx @modelcontextprotocol/inspector python server.py + + # Test with custom client + python test_client.py + ``` + + ```python + # test_client.py + import asyncio + from genkit.plugins.mcp import MCPClient + + async def test_server(): + client = MCPClient( + name="test-client", + server_config={ + "command": "python", + "args": ["server.py"], + }, + ) + + await client.connect() + + # Test server capabilities + tools = await client.get_active_tools() + print(f"Available tools: {len(tools)}") + + prompts = await client.get_active_prompts() + print(f"Available prompts: {len(prompts)}") + + await client.disconnect() + + if __name__ == "__main__": + asyncio.run(test_server()) + ``` + + + +## Best Practices + +### Security Considerations + +1. **Validate MCP server sources**: Only connect to trusted MCP servers +2. **Sanitize inputs**: Validate all data passed to MCP tools +3. **Limit permissions**: Run MCP servers with minimal required permissions +4. **Monitor resource usage**: Track memory and CPU usage of MCP processes + +### Performance Optimization + +1. **Connection pooling**: Reuse MCP connections when possible +2. **Lazy loading**: Connect to servers only when needed +3. **Timeout configuration**: Set appropriate timeouts for MCP operations +4. **Resource cleanup**: Always close connections and clean up resources + +### Error Handling + +1. **Graceful degradation**: Handle MCP server failures gracefully +2. **Retry logic**: Implement retry mechanisms for transient failures +3. **Logging**: Log MCP operations for debugging and monitoring +4. **Fallback strategies**: Provide alternatives when MCP tools are unavailable + +## Next Steps + +- Learn about [tool calling](/unified-docs/tool-calling) to understand how MCP tools integrate with Genkit +- Explore [creating flows](/unified-docs/creating-flows) to build workflows that leverage MCP capabilities +- See the [MCP Server guide](/unified-docs/mcp-server) for creating your own MCP servers +- Check out the [official MCP documentation](https://modelcontextprotocol.io) for more details on the protocol diff --git a/src/content/docs/unified-docs/plugins/ollama.mdx b/src/content/docs/unified-docs/plugins/ollama.mdx new file mode 100644 index 00000000..f3458f89 --- /dev/null +++ b/src/content/docs/unified-docs/plugins/ollama.mdx @@ -0,0 +1,574 @@ +--- +title: Ollama plugin +description: Learn how to use Ollama for local AI models with Genkit across JavaScript, Go, and Python, including setup, configuration, and usage for both text generation and embeddings. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +The Ollama plugin provides interfaces to local LLMs supported by [Ollama](https://ollama.com/), enabling you to run powerful AI models locally without requiring cloud API keys or internet connectivity. + +## Prerequisites + +Before using the Ollama plugin, you need to install and run the Ollama server locally: + +1. **Download and install Ollama** from [ollama.com/download](https://ollama.com/download) +2. **Download models** using the Ollama CLI: + ```bash + ollama pull gemma + ollama pull llama2 + ollama pull nomic-embed-text # for embeddings + ``` +3. **Start the Ollama server** (usually starts automatically after installation) + +## Installation and Setup + + + + Install the Ollama plugin: + + ```bash + npm install genkitx-ollama + ``` + + Configure the plugin when initializing Genkit: + + ```ts + import { genkit } from 'genkit'; + import { ollama } from 'genkitx-ollama'; + + const ai = genkit({ + plugins: [ + ollama({ + models: [ + { + name: 'gemma', + type: 'generate', // 'chat' | 'generate' | undefined + }, + { + name: 'llama2', + type: 'chat', + }, + ], + serverAddress: 'http://127.0.0.1:11434', // default local address + }), + ], + }); + ``` + + + The Ollama plugin is available through the Ollama package: + + ```go + import ( + "context" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/ollama" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins(&ollama.Ollama{ + ServerAddress: "http://127.0.0.1:11434", + Models: []ollama.ModelConfig{ + {Name: "gemma", Type: "generate"}, + {Name: "llama2", Type: "chat"}, + }, + }), + ) + if err != nil { + log.Fatal(err) + } + } + ``` + + + Install the Ollama plugin: + + ```bash + pip install genkit-plugin-ollama + ``` + + Configure the plugin when initializing Genkit: + + ```python + from genkit.ai import Genkit + from genkit.plugins.ollama import Ollama + + ai = Genkit( + plugins=[Ollama( + server_address="http://127.0.0.1:11434", + models=[ + {"name": "gemma", "type": "generate"}, + {"name": "llama2", "type": "chat"}, + ], + )], + ) + ``` + + + +## Basic Usage + + + + Use Ollama models for text generation: + + ```ts + // Basic text generation + const llmResponse = await ai.generate({ + model: 'ollama/gemma', + prompt: 'Tell me a joke about programming.', + }); + + console.log(llmResponse.text); + + // Chat-style interaction + const chatResponse = await ai.generate({ + model: 'ollama/llama2', + prompt: 'What are the benefits of using local AI models?', + config: { + temperature: 0.7, + maxTokens: 500, + }, + }); + + // Using in a flow + export const localAIFlow = ai.defineFlow( + { + name: 'localAIFlow', + inputSchema: z.object({ question: z.string() }), + outputSchema: z.object({ answer: z.string() }), + }, + async ({ question }) => { + const response = await ai.generate({ + model: 'ollama/gemma', + prompt: `Answer this question: ${question}`, + }); + return { answer: response.text }; + }, + ); + ``` + + + Use Ollama models with the generation API: + + ```go + import ( + "context" + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/ollama" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins(&ollama.Ollama{ + ServerAddress: "http://127.0.0.1:11434", + Models: []ollama.ModelConfig{ + {Name: "gemma", Type: "generate"}, + }, + }), + genkit.WithDefaultModel("ollama/gemma"), + ) + if err != nil { + log.Fatal(err) + } + + // Generate content + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("Tell me a joke about programming."), + ) + if err != nil { + log.Fatal(err) + } + + fmt.Println(resp.Text()) + } + ``` + + + Use Ollama models with the generation API: + + ```python + from genkit.ai import Genkit + from genkit.plugins.ollama import Ollama, ollama_name + + ai = Genkit( + plugins=[Ollama( + server_address="http://127.0.0.1:11434", + models=[ + {"name": "gemma", "type": "generate"}, + {"name": "llama2", "type": "chat"}, + ], + )], + model=ollama_name('gemma'), + ) + + # Generate content + response = await ai.generate('Tell me a joke about programming.') + print(response.text) + + # With configuration + response = await ai.generate( + prompt='What are the benefits of using local AI models?', + model=ollama_name('llama2'), + config={ + 'temperature': 0.7, + 'max_tokens': 500, + } + ) + ``` + + + +## Embeddings + + + + Use Ollama for text embeddings: + + ```ts + const ai = genkit({ + plugins: [ + ollama({ + serverAddress: 'http://localhost:11434', + embedders: [ + { name: 'nomic-embed-text', dimensions: 768 }, + { name: 'all-minilm', dimensions: 384 }, + ], + }), + ], + }); + + // Generate embeddings + const embeddings = await ai.embed({ + embedder: 'ollama/nomic-embed-text', + content: 'Some text to embed!', + }); + + console.log('Embedding dimensions:', embeddings.length); + + // Use with vector databases + const ai = genkit({ + plugins: [ + ollama({ + embedders: [{ name: 'nomic-embed-text', dimensions: 768 }], + }), + chroma([ + { + embedder: 'ollama/nomic-embed-text', + collectionName: 'local-embeddings', + }, + ]), + ], + }); + + // Embedding flow + export const embedFlow = ai.defineFlow( + { + name: 'embedFlow', + inputSchema: z.object({ text: z.string() }), + outputSchema: z.object({ embedding: z.array(z.number()) }), + }, + async ({ text }) => { + const embedding = await ai.embed({ + embedder: 'ollama/nomic-embed-text', + content: text, + }); + return { embedding }; + }, + ); + ``` + + + Generate embeddings using Ollama models: + + ```go + g, err := genkit.Init(ctx, + genkit.WithPlugins(&ollama.Ollama{ + ServerAddress: "http://127.0.0.1:11434", + Embedders: []ollama.EmbedderConfig{ + {Name: "nomic-embed-text", Dimensions: 768}, + }, + }), + ) + + // Generate embeddings + embeddings, err := genkit.Embed(ctx, g, + ai.WithEmbedder("ollama/nomic-embed-text"), + ai.WithEmbedContent("Some text to embed!"), + ) + if err != nil { + log.Fatal(err) + } + + fmt.Printf("Generated %d-dimensional embedding\n", len(embeddings)) + ``` + + + Generate embeddings using Ollama models: + + ```python + ai = Genkit( + plugins=[Ollama( + server_address="http://127.0.0.1:11434", + embedders=[ + {"name": "nomic-embed-text", "dimensions": 768}, + {"name": "all-minilm", "dimensions": 384}, + ], + )], + ) + + # Generate embeddings + embeddings = await ai.embed( + embedder=ollama_name('nomic-embed-text'), + content='Some text to embed!', + ) + + print(f"Generated {len(embeddings)}-dimensional embedding") + ``` + + + +## Authentication and Remote Deployments + + + + For remote Ollama deployments that require authentication: + + ### Static Headers + + ```ts + const ai = genkit({ + plugins: [ + ollama({ + models: [{ name: 'gemma' }], + serverAddress: 'https://my-ollama-deployment.com', + requestHeaders: { + 'api-key': 'your-api-key-here', + 'authorization': 'Bearer your-token', + }, + }), + ], + }); + ``` + + ### Dynamic Headers + + ```ts + import { GoogleAuth } from 'google-auth-library'; + + const ai = genkit({ + plugins: [ + ollama({ + models: [{ name: 'gemma' }], + serverAddress: 'https://my-ollama-deployment.com', + requestHeaders: async (params) => { + const headers = await fetchWithAuthHeader(params.serverAddress); + return { Authorization: headers['Authorization'] }; + }, + }), + ], + }); + + // Function to fetch auth headers + async function fetchWithAuthHeader(url: string) { + const auth = new GoogleAuth(); + const client = await auth.getIdTokenClient(url); + const headers = await client.getRequestHeaders(url); + return headers; + } + ``` + + ### Environment-based Configuration + + ```ts + const ollamaConfig = process.env.NODE_ENV === 'production' + ? { + models: [{ name: 'gemma' }], + serverAddress: 'https://my-ollama-deployment.com', + requestHeaders: { 'api-key': process.env.OLLAMA_API_KEY }, + } + : { + models: [{ name: 'gemma' }], + serverAddress: 'http://127.0.0.1:11434', + }; + + const ai = genkit({ + plugins: [ollama(ollamaConfig)], + }); + ``` + + + For remote Ollama deployments with authentication: + + ```go + g, err := genkit.Init(ctx, + genkit.WithPlugins(&ollama.Ollama{ + ServerAddress: "https://my-ollama-deployment.com", + Models: []ollama.ModelConfig{ + {Name: "gemma", Type: "generate"}, + }, + RequestHeaders: map[string]string{ + "api-key": os.Getenv("OLLAMA_API_KEY"), + "authorization": "Bearer " + os.Getenv("OLLAMA_TOKEN"), + }, + }), + ) + ``` + + + For remote Ollama deployments with authentication: + + ```python + import os + + ai = Genkit( + plugins=[Ollama( + server_address="https://my-ollama-deployment.com", + models=[{"name": "gemma", "type": "generate"}], + request_headers={ + "api-key": os.getenv("OLLAMA_API_KEY"), + "authorization": f"Bearer {os.getenv('OLLAMA_TOKEN')}", + }, + )], + ) + ``` + + + +## Model Configuration + +### Model Types + + + + Configure different model types for different use cases: + + ```ts + const ai = genkit({ + plugins: [ + ollama({ + models: [ + // Chat models for conversational AI + { name: 'llama2', type: 'chat' }, + { name: 'mistral', type: 'chat' }, + + // Generate models for text completion + { name: 'gemma', type: 'generate' }, + { name: 'codellama', type: 'generate' }, + + // Auto-detect type (default) + { name: 'phi' }, // type will be auto-detected + ], + serverAddress: 'http://127.0.0.1:11434', + }), + ], + }); + ``` + + + Configure different model types: + + ```go + g, err := genkit.Init(ctx, + genkit.WithPlugins(&ollama.Ollama{ + ServerAddress: "http://127.0.0.1:11434", + Models: []ollama.ModelConfig{ + {Name: "llama2", Type: "chat"}, + {Name: "mistral", Type: "chat"}, + {Name: "gemma", Type: "generate"}, + {Name: "codellama", Type: "generate"}, + {Name: "phi"}, // auto-detect type + }, + }), + ) + ``` + + + Configure different model types: + + ```python + ai = Genkit( + plugins=[Ollama( + server_address="http://127.0.0.1:11434", + models=[ + {"name": "llama2", "type": "chat"}, + {"name": "mistral", "type": "chat"}, + {"name": "gemma", "type": "generate"}, + {"name": "codellama", "type": "generate"}, + {"name": "phi"}, # auto-detect type + ], + )], + ) + ``` + + + +## Popular Models + +Here are some popular models you can use with Ollama: + +### Text Generation Models +- **Llama 2**: `llama2` (7B, 13B, 70B variants) +- **Gemma**: `gemma` (2B, 7B variants) +- **Mistral**: `mistral` (7B) +- **Code Llama**: `codellama` (7B, 13B, 34B variants) +- **Phi**: `phi` (3B) +- **Qwen**: `qwen` (various sizes) + +### Embedding Models +- **Nomic Embed Text**: `nomic-embed-text` (768 dimensions) +- **All-MiniLM**: `all-minilm` (384 dimensions) +- **BGE**: `bge-large` (1024 dimensions) + +### Specialized Models +- **Llava**: `llava` (multimodal - text and images) +- **Dolphin**: `dolphin-mistral` (uncensored variant) +- **Orca**: `orca-mini` (smaller, efficient model) + +## Configuration Options + +### Generation Parameters +- `temperature`: Randomness (0.0-2.0) +- `top_p`: Nucleus sampling (0.0-1.0) +- `top_k`: Top-k sampling +- `repeat_penalty`: Repetition penalty +- `seed`: Random seed for reproducible outputs +- `num_predict`: Maximum tokens to generate +- `stop`: Stop sequences + +### Performance Tuning +- `num_ctx`: Context window size +- `num_batch`: Batch size for processing +- `num_gpu`: Number of GPU layers to use +- `num_thread`: Number of CPU threads + +## Advantages of Local Models + +### Privacy and Security +- **Data stays local**: No data sent to external APIs +- **No API keys required**: No risk of key exposure +- **Offline capability**: Works without internet connection +- **Full control**: Complete control over model behavior + +### Cost and Performance +- **No usage fees**: No per-token or per-request charges +- **Predictable costs**: Only hardware and electricity costs +- **Low latency**: No network round-trips +- **Customizable**: Fine-tune models for specific use cases + +### Development Benefits +- **Rapid prototyping**: No API rate limits +- **Consistent availability**: No service outages +- **Version control**: Pin specific model versions +- **Experimentation**: Try different models easily + +## Next Steps + +- Learn about [generating content](/unified-docs/generating-content) to understand how to use these models effectively +- Explore [RAG](/unified-docs/rag) to implement retrieval-augmented generation with local embeddings +- See [creating flows](/unified-docs/creating-flows) to build structured AI workflows with local models +- Check out [tool calling](/unified-docs/tool-calling) to add interactive capabilities to your local AI applications diff --git a/src/content/docs/unified-docs/plugins/openai.mdx b/src/content/docs/unified-docs/plugins/openai.mdx new file mode 100644 index 00000000..a090daea --- /dev/null +++ b/src/content/docs/unified-docs/plugins/openai.mdx @@ -0,0 +1,652 @@ +--- +title: OpenAI plugin +description: Learn how to use OpenAI models with Genkit across JavaScript, Go, and Python, including GPT models, DALL-E image generation, Whisper transcription, and text-to-speech capabilities. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +The OpenAI plugin provides access to OpenAI's powerful AI models, including GPT for text generation, DALL-E for image generation, Whisper for speech transcription, and text-to-speech models. + +## Installation and Setup + + + + Install the OpenAI plugin: + + ```bash + npm install @genkit-ai/compat-oai + ``` + + Configure the plugin when initializing Genkit: + + ```ts + import { genkit } from 'genkit'; + import { openAI } from '@genkit-ai/compat-oai/openai'; + + const ai = genkit({ + plugins: [openAI()], + }); + ``` + + :::note + The OpenAI plugin is built on top of the `openAICompatible` plugin and is pre-configured for OpenAI's API endpoints. + ::: + + + The OpenAI plugin is available through the OpenAI-compatible plugin: + + ```go + import ( + "context" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/openai" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins(&openai.OpenAI{ + APIKey: "your-api-key", // or use environment variable + }), + ) + if err != nil { + log.Fatal(err) + } + } + ``` + + + Install the OpenAI plugin: + + ```bash + pip install genkit-plugin-openai + ``` + + Configure the plugin when initializing Genkit: + + ```python + from genkit.ai import Genkit + from genkit.plugins.openai import OpenAI + + ai = Genkit( + plugins=[OpenAI()], + ) + ``` + + + +## API Key Configuration + +The plugin requires an API key for the OpenAI API, which you can get from the [OpenAI Platform](https://platform.openai.com/api-keys). + + + + Configure your API key by doing one of the following: + + - Set the `OPENAI_API_KEY` environment variable: + ```bash + export OPENAI_API_KEY=your_api_key_here + ``` + + - Specify the API key when initializing the plugin: + ```ts + openAI({ apiKey: yourKey }); + ``` + + :::caution + Don't embed your API key directly in code! Use environment variables or a service like Google Cloud Secret Manager. + ::: + + + Set the `OPENAI_API_KEY` environment variable: + + ```bash + export OPENAI_API_KEY=your_api_key_here + ``` + + Or specify it in the plugin configuration: + + ```go + g, err := genkit.Init(ctx, + genkit.WithPlugins(&openai.OpenAI{ + APIKey: os.Getenv("OPENAI_API_KEY"), + }), + ) + ``` + + + Set the `OPENAI_API_KEY` environment variable: + + ```bash + export OPENAI_API_KEY=your_api_key_here + ``` + + The plugin will automatically use this environment variable. + + + +## Text Generation + + + + Use OpenAI's GPT models for text generation: + + ```ts + import { openAI } from '@genkit-ai/compat-oai/openai'; + + const ai = genkit({ + plugins: [openAI()], + }); + + // Basic text generation + const llmResponse = await ai.generate({ + prompt: 'Tell me a joke about programming', + model: openAI.model('gpt-4o'), + }); + + // With configuration + const configuredResponse = await ai.generate({ + prompt: 'Write a creative story about AI', + model: openAI.model('gpt-4o'), + config: { + temperature: 0.7, + maxTokens: 1000, + topP: 0.9, + }, + }); + + // Using in a flow + export const jokeFlow = ai.defineFlow( + { + name: 'jokeFlow', + inputSchema: z.object({ subject: z.string() }), + outputSchema: z.object({ joke: z.string() }), + }, + async ({ subject }) => { + const llmResponse = await ai.generate({ + prompt: `tell me a joke about ${subject}`, + model: openAI.model('gpt-4o'), + }); + return { joke: llmResponse.text }; + }, + ); + ``` + + + Use OpenAI models with the generation API: + + ```go + import ( + "context" + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/openai" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins(&openai.OpenAI{}), + genkit.WithDefaultModel("openai/gpt-4o"), + ) + if err != nil { + log.Fatal(err) + } + + // Generate content + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("Tell me a joke about programming"), + ) + if err != nil { + log.Fatal(err) + } + + fmt.Println(resp.Text()) + } + ``` + + + Use OpenAI models with the generation API: + + ```python + from genkit.ai import Genkit + from genkit.plugins.openai import OpenAI, openai_name + + ai = Genkit( + plugins=[OpenAI()], + model=openai_name('gpt-4o'), + ) + + # Generate content + response = await ai.generate('Tell me a joke about programming') + print(response.text) + + # With configuration + response = await ai.generate( + prompt='Write a creative story about AI', + model=openai_name('gpt-4o'), + config={ + 'temperature': 0.7, + 'max_tokens': 1000, + 'top_p': 0.9, + } + ) + ``` + + + +## Image Generation + + + + Generate images using DALL-E models: + + ```ts + // Basic image generation + const imageResponse = await ai.generate({ + model: openAI.model('dall-e-3'), + prompt: 'A photorealistic image of a cat programming a computer.', + config: { + size: '1024x1024', + style: 'vivid', + quality: 'hd', + }, + }); + + const imageUrl = imageResponse.media()?.url; + + // DALL-E 2 for faster generation + const quickImage = await ai.generate({ + model: openAI.model('dall-e-2'), + prompt: 'A simple cartoon of a robot', + config: { + size: '512x512', + n: 2, // Generate 2 variations + }, + }); + ``` + + + Image generation requires custom implementation using the OpenAI API: + + ```go + // Image generation requires custom implementation + // Use the OpenAI Go SDK directly for DALL-E functionality + ``` + + + Image generation requires custom implementation using the OpenAI API: + + ```python + # Image generation requires custom implementation + # Use the OpenAI Python SDK directly for DALL-E functionality + ``` + + + +## Text Embeddings + + + + Generate text embeddings for vector search and similarity: + + ```ts + // Generate embeddings + const embedding = await ai.embed({ + embedder: openAI.embedder('text-embedding-ada-002'), + content: 'This is a sample text for embedding', + }); + + // Using in a flow + export const embedFlow = ai.defineFlow( + { + name: 'embedFlow', + inputSchema: z.object({ text: z.string() }), + outputSchema: z.object({ embedding: z.string() }), + }, + async ({ text }) => { + const embedding = await ai.embed({ + embedder: openAI.embedder('text-embedding-ada-002'), + content: text, + }); + + return { embedding: JSON.stringify(embedding) }; + }, + ); + + // Use with vector databases + const ai = genkit({ + plugins: [ + openAI(), + chroma([ + { + embedder: openAI.embedder('text-embedding-ada-002'), + collectionName: 'my-collection', + }, + ]), + ], + }); + ``` + + + Generate embeddings using OpenAI models: + + ```go + // Generate embeddings + embeddings, err := genkit.Embed(ctx, g, + ai.WithEmbedder("openai/text-embedding-ada-002"), + ai.WithEmbedContent("This is a sample text for embedding"), + ) + if err != nil { + log.Fatal(err) + } + + fmt.Printf("Generated %d-dimensional embedding\n", len(embeddings)) + ``` + + + Generate embeddings using OpenAI models: + + ```python + # Generate embeddings + embeddings = await ai.embed( + embedder=openai_name('text-embedding-ada-002'), + content='This is a sample text for embedding', + ) + + print(f"Generated {len(embeddings)}-dimensional embedding") + ``` + + + +## Audio Processing + +### Speech-to-Text (Whisper) + + + + Transcribe audio files using Whisper: + + ```ts + import * as fs from 'fs'; + + const whisper = openAI.model('whisper-1'); + const audioFile = fs.readFileSync('path/to/your/audio.mp3'); + + const transcription = await ai.generate({ + model: whisper, + prompt: [ + { + media: { + contentType: 'audio/mp3', + url: `data:audio/mp3;base64,${audioFile.toString('base64')}`, + }, + }, + ], + config: { + language: 'en', // Optional: specify language + temperature: 0, // For more deterministic results + }, + }); + + console.log('Transcription:', transcription.text()); + + // With additional context + const contextualTranscription = await ai.generate({ + model: whisper, + prompt: [ + { text: 'This is a recording of a technical meeting about AI.' }, + { + media: { + contentType: 'audio/wav', + url: `data:audio/wav;base64,${audioFile.toString('base64')}`, + }, + }, + ], + }); + ``` + + + Audio transcription requires custom implementation using the OpenAI API: + + ```go + // Audio transcription requires custom implementation + // Use the OpenAI Go SDK directly for Whisper functionality + ``` + + + Audio transcription requires custom implementation using the OpenAI API: + + ```python + # Audio transcription requires custom implementation + # Use the OpenAI Python SDK directly for Whisper functionality + ``` + + + +### Text-to-Speech + + + + Generate speech from text: + + ```ts + import * as fs from 'fs'; + + const tts = openAI.model('tts-1'); + const speechResponse = await ai.generate({ + model: tts, + prompt: 'Hello, world! This is a test of text-to-speech.', + config: { + voice: 'alloy', // Options: alloy, echo, fable, onyx, nova, shimmer + response_format: 'mp3', // Options: mp3, opus, aac, flac + speed: 1.0, // 0.25 to 4.0 + }, + }); + + const audioData = speechResponse.media(); + if (audioData) { + fs.writeFileSync('output.mp3', Buffer.from(audioData.url.split(',')[1], 'base64')); + } + + // High-quality TTS + const hqSpeech = await ai.generate({ + model: openAI.model('tts-1-hd'), + prompt: 'This is high-quality text-to-speech.', + config: { + voice: 'nova', + response_format: 'wav', + }, + }); + ``` + + + Text-to-speech requires custom implementation using the OpenAI API: + + ```go + // Text-to-speech requires custom implementation + // Use the OpenAI Go SDK directly for TTS functionality + ``` + + + Text-to-speech requires custom implementation using the OpenAI API: + + ```python + # Text-to-speech requires custom implementation + # Use the OpenAI Python SDK directly for TTS functionality + ``` + + + +## Advanced Features + +### Web Search Integration + + + + Some OpenAI models support web search capabilities: + + ```ts + const llmResponse = await ai.generate({ + prompt: 'What was a positive news story from today?', + model: openAI.model('gpt-4o-search-preview'), + config: { + web_search_options: { + max_results: 5, + }, + }, + }); + ``` + + + Web search integration requires custom implementation: + + ```go + // Web search requires custom implementation + // Use the OpenAI API directly for search-enabled models + ``` + + + Web search integration requires custom implementation: + + ```python + # Web search requires custom implementation + # Use the OpenAI API directly for search-enabled models + ``` + + + +### Function Calling + + + + OpenAI models support function calling for tool integration: + + ```ts + const weatherTool = ai.defineTool({ + name: 'getWeather', + description: 'Get the current weather for a location', + inputSchema: z.object({ + location: z.string().describe('The city and state'), + }), + outputSchema: z.object({ + temperature: z.number(), + condition: z.string(), + }), + }, async ({ location }) => { + // Implementation here + return { temperature: 72, condition: 'sunny' }; + }); + + const response = await ai.generate({ + prompt: 'What\'s the weather like in San Francisco?', + model: openAI.model('gpt-4o'), + tools: [weatherTool], + }); + ``` + + + Function calling is supported through Genkit's tool system: + + ```go + // Define tools and use with OpenAI models + // See tool calling documentation for implementation details + ``` + + + Function calling is supported through Genkit's tool system: + + ```python + # Define tools and use with OpenAI models + # See tool calling documentation for implementation details + ``` + + + +### Passthrough Configuration + + + + Access new models and features without updating Genkit: + + ```ts + const llmResponse = await ai.generate({ + prompt: 'Tell me a cool story', + model: openAI.model('gpt-4-new'), // hypothetical new model + config: { + seed: 123, + new_feature_parameter: 'value', // hypothetical config for new model + logprobs: true, + top_logprobs: 5, + }, + }); + ``` + + Genkit passes this config as-is to the OpenAI API, giving you access to new model features. + + + Passthrough configuration allows access to new OpenAI features: + + ```go + // Custom configuration can be passed through to the OpenAI API + // See OpenAI Go SDK documentation for available options + ``` + + + Passthrough configuration allows access to new OpenAI features: + + ```python + # Custom configuration can be passed through to the OpenAI API + # See OpenAI Python SDK documentation for available options + ``` + + + +## Available Models + +### Text Generation +- **GPT-4 family**: `gpt-4o`, `gpt-4o-mini`, `gpt-4-turbo`, `gpt-4` +- **GPT-3.5**: `gpt-3.5-turbo` +- **Search-enabled**: `gpt-4o-search-preview` + +### Image Generation +- **DALL-E 3**: `dall-e-3` (high quality, 1024x1024, 1024x1792, 1792x1024) +- **DALL-E 2**: `dall-e-2` (faster, 256x256, 512x512, 1024x1024) + +### Embeddings +- **Text embeddings**: `text-embedding-ada-002`, `text-embedding-3-small`, `text-embedding-3-large` + +### Audio +- **Speech-to-text**: `whisper-1` +- **Text-to-speech**: `tts-1`, `tts-1-hd` + +## Configuration Options + +### Text Generation +- `temperature`: Randomness (0-2) +- `max_tokens`: Maximum response length +- `top_p`: Nucleus sampling (0-1) +- `frequency_penalty`: Reduce repetition (-2 to 2) +- `presence_penalty`: Encourage new topics (-2 to 2) +- `seed`: Deterministic outputs +- `logprobs`: Return log probabilities + +### Image Generation +- `size`: Image dimensions +- `style`: `vivid` or `natural` +- `quality`: `standard` or `hd` +- `n`: Number of images (1-10 for DALL-E 2) + +### Audio +- `voice`: TTS voice selection +- `response_format`: Audio format +- `speed`: Speech rate (0.25-4.0) +- `language`: Whisper language hint +- `temperature`: Whisper randomness + +## Next Steps + +- Learn about [generating content](/unified-docs/generating-content) to understand how to use these models effectively +- Explore [tool calling](/unified-docs/tool-calling) to integrate OpenAI's function calling capabilities +- See [creating flows](/unified-docs/creating-flows) to build structured AI workflows +- Check out [RAG](/unified-docs/rag) to implement retrieval-augmented generation with OpenAI embeddings diff --git a/src/content/docs/unified-docs/plugins/vertex-ai.mdx b/src/content/docs/unified-docs/plugins/vertex-ai.mdx new file mode 100644 index 00000000..720f23a3 --- /dev/null +++ b/src/content/docs/unified-docs/plugins/vertex-ai.mdx @@ -0,0 +1,799 @@ +--- +title: Vertex AI plugin +description: Learn how to use Google Cloud Vertex AI with Genkit across JavaScript, Go, and Python, including Gemini models, Imagen image generation, evaluation metrics, Vector Search, and text-to-speech capabilities. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +The Vertex AI plugin provides interfaces to several Google Cloud AI services, offering enterprise-grade AI capabilities with advanced features like grounding, evaluation metrics, and vector search. + +## Available Services + +The Vertex AI plugin provides access to: + +- **Google generative AI models**: Gemini text generation, Imagen image generation, text and multimodal embeddings +- **Evaluation metrics**: BLEU, ROUGE, Fluency, Safety, Groundedness, and Summarization metrics through Vertex AI Rapid Evaluation API +- **Vector Search**: Enterprise-grade vector database service +- **Text-to-Speech**: Advanced speech synthesis capabilities +- **Model Garden**: Access to third-party models like Claude 3, Llama 3.1, and Mistral + +## Installation and Setup + + + + Install the Vertex AI plugin: + + ```bash + npm install @genkit-ai/vertexai + ``` + + If you want to locally run flows that use this plugin, you also need the [Google Cloud CLI tool](https://cloud.google.com/sdk/docs/install) installed. + + Configure the plugin when initializing Genkit: + + ```ts + import { genkit } from 'genkit'; + import { vertexAI } from '@genkit-ai/vertexai'; + + const ai = genkit({ + plugins: [vertexAI({ location: 'us-central1' })], + }); + ``` + + + The Vertex AI plugin is included with the Genkit Go package: + + ```go + import ( + "context" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/googlecloud" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlecloud.GoogleCloud{ + ProjectID: "your-project-id", + Location: "us-central1", + }), + ) + if err != nil { + log.Fatal(err) + } + } + ``` + + + Vertex AI support in Python is available through the Google Cloud plugin: + + ```bash + pip install genkit-plugin-google-cloud + ``` + + Configure the plugin when initializing Genkit: + + ```python + from genkit.ai import Genkit + from genkit.plugins.google_cloud import GoogleCloud + + ai = Genkit( + plugins=[GoogleCloud( + project_id="your-project-id", + location="us-central1", + )], + ) + ``` + + + +## Authentication and Configuration + + + + The plugin requires: + + 1. **Google Cloud project ID**: Set via `projectId` in configuration or `GCLOUD_PROJECT` environment variable + 2. **API location**: Set via `location` in configuration or `GCLOUD_LOCATION` environment variable + 3. **Authentication**: Set up Google Cloud Application Default Credentials + + **Local development:** + ```bash + gcloud auth application-default login --project YOUR_PROJECT_ID + ``` + + **Production environments:** + - Google Cloud environments (Cloud Functions, Cloud Run) are automatically authenticated + - Other environments: see [Application Default Credentials](https://cloud.google.com/docs/authentication/provide-credentials-adc) docs + + **Required IAM role:** Vertex AI User (`roles/aiplatform.user`) + + + Configure authentication and project settings: + + ```go + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlecloud.GoogleCloud{ + ProjectID: "your-project-id", + Location: "us-central1", + }), + ) + ``` + + **Authentication setup:** + ```bash + gcloud auth application-default login --project YOUR_PROJECT_ID + ``` + + **Required IAM role:** Vertex AI User (`roles/aiplatform.user`) + + + Configure the plugin with your project details: + + ```python + ai = Genkit( + plugins=[GoogleCloud( + project_id="your-project-id", + location="us-central1", + )], + ) + ``` + + **Authentication setup:** + ```bash + gcloud auth application-default login --project YOUR_PROJECT_ID + ``` + + **Required IAM role:** Vertex AI User (`roles/aiplatform.user`) + + + +## Basic Usage + + + + Use Vertex AI models for text generation: + + ```ts + import { vertexAI } from '@genkit-ai/vertexai'; + + const ai = genkit({ + plugins: [vertexAI({ location: 'us-central1' })], + }); + + const llmResponse = await ai.generate({ + model: vertexAI.model('gemini-2.5-flash'), + prompt: 'What should I do when I visit Melbourne?', + }); + + // Generate embeddings + const embeddings = await ai.embed({ + embedder: vertexAI.embedder('gemini-embedding-001'), + content: 'How many widgets do you have in stock?', + }); + ``` + + + Use Vertex AI models with the generation API: + + ```go + import ( + "context" + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/googlecloud" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlecloud.GoogleCloud{ + ProjectID: "your-project-id", + Location: "us-central1", + }), + genkit.WithDefaultModel("vertexai/gemini-2.5-flash"), + ) + if err != nil { + log.Fatal(err) + } + + // Generate content + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("What should I do when I visit Melbourne?"), + ) + if err != nil { + log.Fatal(err) + } + + fmt.Println(resp.Text()) + } + ``` + + + Use Vertex AI models with the generation API: + + ```python + from genkit.ai import Genkit + from genkit.plugins.google_cloud import GoogleCloud, vertex_ai_name + + ai = Genkit( + plugins=[GoogleCloud( + project_id="your-project-id", + location="us-central1", + )], + model=vertex_ai_name('gemini-2.5-flash'), + ) + + # Generate content + response = await ai.generate('What should I do when I visit Melbourne?') + print(response.text) + + # Generate embeddings + embeddings = await ai.embed( + embedder=vertex_ai_name('gemini-embedding-001'), + content='How many widgets do you have in stock?', + ) + ``` + + + +## Advanced Features + +### Grounding with Google Search and Private Data + + + + Ground Gemini responses using Google Search or your own data: + + ```ts + // Google Search grounding + await ai.generate({ + model: vertexAI.model('gemini-2.5-flash'), + prompt: 'What are the latest developments in AI?', + config: { + googleSearchRetrieval: { + disableAttribution: true, + } + } + }); + + // Private data grounding + await ai.generate({ + model: vertexAI.model('gemini-2.5-flash'), + prompt: 'What does our company policy say about remote work?', + config: { + vertexRetrieval: { + datastore: { + projectId: 'your-cloud-project', + location: 'us-central1', + collection: 'your-collection', + }, + disableAttribution: true, + } + } + }); + ``` + + :::caution[Pricing] + Vertex AI charges additional fees for grounding requests. See [Vertex AI pricing](https://cloud.google.com/vertex-ai/generative-ai/pricing) for details. + ::: + + + Grounding functionality is available through the Vertex AI API. Implement using the Google Cloud SDK directly or through custom configuration: + + ```go + // Grounding requires custom implementation using the Vertex AI API + // See Google Cloud documentation for grounding configuration + ``` + + + Grounding functionality is available through the Vertex AI API. Implement using the Google Cloud SDK directly: + + ```python + # Grounding requires custom implementation using the Vertex AI API + # See Google Cloud documentation for grounding configuration + ``` + + + +### Image Generation with Imagen + + + + Generate images from text prompts: + + ```ts + // Basic image generation + const response = await ai.generate({ + model: vertexAI.model('imagen-3.0-generate-002'), + output: { format: 'media' }, + prompt: 'a banana riding a bicycle', + }); + + // Advanced image editing + const baseImg = fs.readFileSync('base.png', { encoding: 'base64' }); + const maskImg = fs.readFileSync('mask.png', { encoding: 'base64' }); + + const editResponse = await ai.generate({ + model: vertexAI.model('imagen-3.0-generate-002'), + output: { format: 'media' }, + prompt: [ + { media: { url: `data:image/png;base64,${baseImg}` } }, + { + media: { url: `data:image/png;base64,${maskImg}` }, + metadata: { type: 'mask' }, + }, + { text: 'replace the background with a sunset' }, + ], + config: { + editConfig: { + editMode: 'outpainting', + }, + }, + }); + ``` + + + Image generation is available through the Vertex AI API. Implement using the Google Cloud SDK: + + ```go + // Image generation requires custom implementation using the Vertex AI API + // See Vertex AI Imagen documentation for implementation details + ``` + + + Image generation is available through the Vertex AI API. Implement using the Google Cloud SDK: + + ```python + # Image generation requires custom implementation using the Vertex AI API + # See Vertex AI Imagen documentation for implementation details + ``` + + + +### Multimodal Embeddings + + + + Generate embeddings from text, images, and video: + + ```ts + // Text embeddings + const textEmbeddings = await ai.embed({ + embedder: vertexAI.embedder('gemini-embedding-001'), + content: 'How many widgets do you have in stock?', + }); + + // Multimodal embeddings + const multimodalEmbeddings = await ai.embed({ + embedder: vertexAI.embedder('multimodal-embedding-001'), + content: { + content: [ + { + media: { + url: 'gs://cloud-samples-data/generative-ai/video/pixel8.mp4', + contentType: 'video/mp4', + }, + }, + ], + }, + }); + ``` + + + Generate embeddings using Vertex AI models: + + ```go + // Text embeddings + embeddings, err := genkit.Embed(ctx, g, + ai.WithEmbedder("vertexai/gemini-embedding-001"), + ai.WithEmbedContent("How many widgets do you have in stock?"), + ) + if err != nil { + log.Fatal(err) + } + ``` + + + Generate embeddings using Vertex AI models: + + ```python + # Text embeddings + embeddings = await ai.embed( + embedder=vertex_ai_name('gemini-embedding-001'), + content='How many widgets do you have in stock?', + ) + ``` + + + +## Model Garden Integration + + + + Access third-party models through Vertex AI Model Garden: + + ### Claude 3 Models + + ```ts + import { vertexAIModelGarden } from '@genkit-ai/vertexai/modelgarden'; + + const ai = genkit({ + plugins: [ + vertexAIModelGarden({ + location: 'us-central1', + models: ['claude-3-haiku', 'claude-3-sonnet', 'claude-3-opus'], + }), + ], + }); + + const response = await ai.generate({ + model: 'claude-3-sonnet', + prompt: 'What should I do when I visit Melbourne?', + }); + ``` + + ### Llama 3.1 405b + + ```ts + const ai = genkit({ + plugins: [ + vertexAIModelGarden({ + location: 'us-central1', + models: ['llama3-405b-instruct-maas'], + }), + ], + }); + + const response = await ai.generate({ + model: 'llama3-405b-instruct-maas', + prompt: 'Write a function that adds two numbers together', + }); + ``` + + ### Mistral Models + + ```ts + const ai = genkit({ + plugins: [ + vertexAIModelGarden({ + location: 'us-central1', + models: ['mistral-large', 'mistral-nemo', 'codestral'], + }), + ], + }); + + const response = await ai.generate({ + model: 'mistral-large', + prompt: 'Write a function that adds two numbers together', + config: { + version: 'mistral-large-2411', + temperature: 0.7, + maxOutputTokens: 1024, + topP: 0.9, + stopSequences: ['###'], + }, + }); + ``` + + + Model Garden integration requires custom implementation using the Vertex AI API: + + ```go + // Model Garden models require custom implementation + // See Vertex AI Model Garden documentation for setup + ``` + + + Model Garden integration requires custom implementation using the Vertex AI API: + + ```python + # Model Garden models require custom implementation + # See Vertex AI Model Garden documentation for setup + ``` + + + +## Evaluation Metrics + + + + Use Vertex AI Rapid Evaluation API for model evaluation: + + ```ts + import { vertexAIEvaluation, VertexAIEvaluationMetricType } from '@genkit-ai/vertexai/evaluation'; + + const ai = genkit({ + plugins: [ + vertexAIEvaluation({ + location: 'us-central1', + metrics: [ + VertexAIEvaluationMetricType.SAFETY, + { + type: VertexAIEvaluationMetricType.ROUGE, + metricSpec: { + rougeType: 'rougeLsum', + }, + }, + ], + }), + ], + }); + ``` + + Available metrics: + - **BLEU**: Translation quality + - **ROUGE**: Summarization quality + - **Fluency**: Text fluency + - **Safety**: Content safety + - **Groundedness**: Factual accuracy + - **Summarization Quality/Helpfulness/Verbosity**: Summary evaluation + + Run evaluations: + ```bash + genkit eval:run + genkit eval:flow -e vertexai/safety + ``` + + + Evaluation metrics are available through the Vertex AI API: + + ```go + // Evaluation requires custom implementation using the Vertex AI API + // See Vertex AI Rapid Evaluation documentation + ``` + + + Evaluation metrics are available through the Vertex AI API: + + ```python + # Evaluation requires custom implementation using the Vertex AI API + # See Vertex AI Rapid Evaluation documentation + ``` + + + +## Vector Search + + + + Use Vertex AI Vector Search for enterprise-grade vector operations: + + ### Setup + + 1. Create a Vector Search index in the [Google Cloud Console](https://console.cloud.google.com/vertex-ai/matching-engine/indexes) + 2. Configure dimensions based on your embedding model: + - `gemini-embedding-001`: 768 dimensions + - `text-multilingual-embedding-002`: 768 dimensions + - `multimodalEmbedding001`: 128, 256, 512, or 1408 dimensions + 3. Deploy the index to a standard endpoint + + ### Configuration + + ```ts + import { vertexAIVectorSearch } from '@genkit-ai/vertexai/vectorsearch'; + import { getFirestoreDocumentIndexer, getFirestoreDocumentRetriever } from '@genkit-ai/vertexai/vectorsearch'; + + const ai = genkit({ + plugins: [ + vertexAIVectorSearch({ + projectId: 'your-project-id', + location: 'us-central1', + vectorSearchOptions: [ + { + indexId: 'your-index-id', + indexEndpointId: 'your-endpoint-id', + deployedIndexId: 'your-deployed-index-id', + publicDomainName: 'your-domain-name', + documentRetriever: firestoreDocumentRetriever, + documentIndexer: firestoreDocumentIndexer, + embedder: vertexAI.embedder('gemini-embedding-001'), + }, + ], + }), + ], + }); + ``` + + ### Usage + + ```ts + import { vertexAiIndexerRef, vertexAiRetrieverRef } from '@genkit-ai/vertexai/vectorsearch'; + + // Index documents + await ai.index({ + indexer: vertexAiIndexerRef({ + indexId: 'your-index-id', + }), + documents, + }); + + // Retrieve similar documents + const results = await ai.retrieve({ + retriever: vertexAiRetrieverRef({ + indexId: 'your-index-id', + }), + query: queryDocument, + }); + ``` + + :::caution[Pricing] + Vector Search has both ingestion and hosting costs. See [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing#vectorsearch) for details. + ::: + + + Vector Search integration requires custom implementation using the Vertex AI API: + + ```go + // Vector Search requires custom implementation + // See Vertex AI Vector Search documentation for setup + ``` + + + Vector Search integration requires custom implementation using the Vertex AI API: + + ```python + # Vector Search requires custom implementation + # See Vertex AI Vector Search documentation for setup + ``` + + + +## Text-to-Speech + + + + Generate high-quality speech from text: + + ```ts + import { writeFile } from 'node:fs/promises'; + + const response = await ai.generate({ + model: vertexAI.model('gemini-2.5-flash-preview-tts'), + config: { + responseModalities: ['AUDIO'], + speechConfig: { + voiceConfig: { + prebuiltVoiceConfig: { voiceName: 'Algenib' }, + }, + }, + }, + prompt: 'Say that Genkit is an amazing Gen AI library', + }); + + if (response.media?.url) { + const audioBuffer = Buffer.from( + response.media.url.substring(response.media.url.indexOf(',') + 1), + 'base64' + ); + await writeFile('output.wav', audioBuffer); + } + ``` + + ### Multi-speaker Audio + + ```ts + const response = await ai.generate({ + model: vertexAI.model('gemini-2.5-flash-preview-tts'), + config: { + responseModalities: ['AUDIO'], + speechConfig: { + multiSpeakerVoiceConfig: { + speakerVoiceConfigs: [ + { + speaker: 'Speaker1', + voiceConfig: { + prebuiltVoiceConfig: { voiceName: 'Algenib' }, + }, + }, + { + speaker: 'Speaker2', + voiceConfig: { + prebuiltVoiceConfig: { voiceName: 'Achernar' }, + }, + }, + ], + }, + }, + }, + prompt: `Here's the dialog: + Speaker1: "Genkit is an amazing Gen AI library!" + Speaker2: "I thought it was a framework."`, + }); + ``` + + + Text-to-speech functionality requires custom implementation using the Vertex AI API: + + ```go + // TTS requires custom implementation using the Vertex AI API + // See Vertex AI Speech Generation documentation + ``` + + + Text-to-speech functionality requires custom implementation using the Vertex AI API: + + ```python + # TTS requires custom implementation using the Vertex AI API + # See Vertex AI Speech Generation documentation + ``` + + + +## Context Caching + + + + Optimize performance with context caching for large inputs: + + ```ts + const llmResponse = await ai.generate({ + messages: [ + { + role: 'user', + content: [{ text: 'Here is the relevant text from War and Peace.' }], + }, + { + role: 'model', + content: [ + { + text: "Based on War and Peace, here is some analysis of Pierre Bezukhov's character.", + }, + ], + metadata: { + cache: { + ttlSeconds: 300, // Cache for 5 minutes + }, + }, + }, + ], + model: vertexAI.model('gemini-2.5-flash'), + prompt: "Describe Pierre's transformation throughout the novel.", + }); + ``` + + **Supported models**: `gemini-2.5-flash-001`, `gemini-2.0-pro-001` + + + Context caching requires custom implementation using the Vertex AI API: + + ```go + // Context caching requires custom implementation + // See Vertex AI Context Caching documentation + ``` + + + Context caching requires custom implementation using the Vertex AI API: + + ```python + # Context caching requires custom implementation + # See Vertex AI Context Caching documentation + ``` + + + +## Available Models + +### Text Generation +- `gemini-2.5-flash`, `gemini-2.5-flash-lite`, `gemini-1.5-pro` +- `claude-3-haiku`, `claude-3-sonnet`, `claude-3-opus` (Model Garden) +- `llama3-405b-instruct-maas` (Model Garden) +- `mistral-large`, `mistral-nemo`, `codestral` (Model Garden) + +### Embeddings +- `gemini-embedding-001` (768 dimensions) +- `text-multilingual-embedding-002` (768 dimensions) +- `multimodal-embedding-001` (128-1408 dimensions) + +### Image Generation +- `imagen-3.0-generate-002`, `imagen-2.0-generate-001` + +### Text-to-Speech +- `gemini-2.5-flash-preview-tts` + +## Next Steps + +- Learn about [generating content](/unified-docs/generating-content) to understand how to use these models effectively +- Explore [evaluation](/unified-docs/evaluation) to leverage Vertex AI's evaluation metrics +- See [RAG](/unified-docs/rag) to implement retrieval-augmented generation with Vector Search +- Check out [creating flows](/unified-docs/creating-flows) to build structured AI workflows diff --git a/src/content/docs/unified-docs/plugins/xai.mdx b/src/content/docs/unified-docs/plugins/xai.mdx new file mode 100644 index 00000000..40da4136 --- /dev/null +++ b/src/content/docs/unified-docs/plugins/xai.mdx @@ -0,0 +1,895 @@ +--- +title: xAI (Grok) Plugin +description: Learn how to use xAI's Grok models with Genkit across JavaScript, Go, and Python, including text generation, image generation, and advanced configuration. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +The xAI plugin provides access to xAI's powerful Grok family of models, including advanced text generation and image generation capabilities. Grok models are known for their real-time information access and conversational abilities. + +## Installation and Setup + + + + Install the xAI plugin: + + ```bash + npm install @genkit-ai/compat-oai + ``` + + Configure the plugin when initializing Genkit: + + ```ts + import { genkit } from 'genkit'; + import { xAI } from '@genkit-ai/compat-oai/xai'; + + const ai = genkit({ + plugins: [xAI()], + }); + ``` + + ### API Key Configuration + + Set your xAI API key using one of these methods: + + ```bash + # Environment variable (recommended) + export XAI_API_KEY=your_xai_api_key + ``` + + ```ts + // Or pass directly to plugin (not recommended for production) + const ai = genkit({ + plugins: [xAI({ apiKey: 'your_xai_api_key' })], + }); + ``` + + Get your API key from [xAI Console](https://console.x.ai/). + + + For Go applications, use the OpenAI-compatible client with xAI endpoints: + + ```go + package main + + import ( + "context" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/openai" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins(&openai.OpenAI{ + APIKey: os.Getenv("XAI_API_KEY"), + BaseURL: "https://api.x.ai/v1", + Models: []openai.ModelConfig{ + {Name: "grok-3-mini", Type: "chat"}, + {Name: "grok-3", Type: "chat"}, + {Name: "grok-image", Type: "generate"}, + }, + }), + ) + if err != nil { + log.Fatal(err) + } + } + ``` + + ### Environment Configuration + + ```bash + export XAI_API_KEY=your_xai_api_key + ``` + + + For Python applications, use the OpenAI-compatible client: + + ```bash + pip install genkit-plugin-openai + ``` + + ```python + from genkit.ai import Genkit + from genkit.plugins.openai import OpenAI + + ai = Genkit( + plugins=[OpenAI( + api_key=os.getenv("XAI_API_KEY"), + base_url="https://api.x.ai/v1", + models=[ + {"name": "grok-3-mini", "type": "chat"}, + {"name": "grok-3", "type": "chat"}, + {"name": "grok-image", "type": "generate"}, + ], + )], + ) + ``` + + ### Environment Configuration + + ```bash + export XAI_API_KEY=your_xai_api_key + ``` + + + +## Basic Usage + +### Text Generation + + + + Use Grok models for text generation: + + ```ts + import { genkit, z } from 'genkit'; + import { xAI } from '@genkit-ai/compat-oai/xai'; + + const ai = genkit({ + plugins: [xAI()], + }); + + // Basic text generation + const response = await ai.generate({ + model: xAI.model('grok-3-mini'), + prompt: 'Explain quantum computing in simple terms', + }); + + console.log(response.text); + + // Flow with Grok + export const grokFlow = ai.defineFlow( + { + name: 'grokFlow', + inputSchema: z.object({ subject: z.string() }), + outputSchema: z.object({ fact: z.string() }), + }, + async ({ subject }) => { + const llmResponse = await ai.generate({ + model: xAI.model('grok-3-mini'), + prompt: `Tell me a fun fact about ${subject}`, + }); + return { fact: llmResponse.text }; + }, + ); + + // Real-time information queries + const newsResponse = await ai.generate({ + model: xAI.model('grok-3'), + prompt: 'What are the latest developments in AI this week?', + config: { + temperature: 0.7, + maxTokens: 500, + }, + }); + ``` + + + Use Grok models with the generation API: + + ```go + import ( + "context" + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + ) + + func main() { + ctx := context.Background() + + // Basic text generation + resp, err := genkit.Generate(ctx, g, + ai.WithModel("grok-3-mini"), + ai.WithPrompt("Explain quantum computing in simple terms"), + ) + if err != nil { + log.Fatal(err) + } + + fmt.Println(resp.Text()) + + // Real-time information queries + newsResp, err := genkit.Generate(ctx, g, + ai.WithModel("grok-3"), + ai.WithPrompt("What are the latest developments in AI this week?"), + ai.WithConfig(map[string]interface{}{ + "temperature": 0.7, + "max_tokens": 500, + }), + ) + if err != nil { + log.Fatal(err) + } + + fmt.Println(newsResp.Text()) + } + ``` + + + Use Grok models with the generation API: + + ```python + from genkit.ai import Genkit + from genkit.plugins.openai import OpenAI, openai_name + + ai = Genkit( + plugins=[OpenAI( + api_key=os.getenv("XAI_API_KEY"), + base_url="https://api.x.ai/v1", + models=[ + {"name": "grok-3-mini", "type": "chat"}, + {"name": "grok-3", "type": "chat"}, + ], + )], + ) + + # Basic text generation + response = await ai.generate( + model=openai_name('grok-3-mini'), + prompt='Explain quantum computing in simple terms' + ) + print(response.text) + + # Real-time information queries + news_response = await ai.generate( + model=openai_name('grok-3'), + prompt='What are the latest developments in AI this week?', + config={ + 'temperature': 0.7, + 'max_tokens': 500, + } + ) + print(news_response.text) + ``` + + + +### Image Generation + + + + Use Grok for image generation: + + ```ts + // Image generation with Grok + const imageResponse = await ai.generate({ + model: xAI.model('grok-image'), + prompt: 'A futuristic cityscape with flying cars and neon lights', + config: { + size: '1024x1024', + quality: 'hd', + style: 'vivid', + }, + }); + + // Image generation flow + export const imageFlow = ai.defineFlow( + { + name: 'imageFlow', + inputSchema: z.object({ + description: z.string(), + style: z.string().optional(), + }), + outputSchema: z.object({ imageUrl: z.string() }), + }, + async ({ description, style }) => { + const prompt = style + ? `${description} in ${style} style` + : description; + + const response = await ai.generate({ + model: xAI.model('grok-image'), + prompt, + config: { + size: '1024x1024', + quality: 'hd', + }, + }); + + return { imageUrl: response.media?.url || '' }; + }, + ); + ``` + + + Use Grok for image generation: + + ```go + // Image generation + imageResp, err := genkit.Generate(ctx, g, + ai.WithModel("grok-image"), + ai.WithPrompt("A futuristic cityscape with flying cars and neon lights"), + ai.WithConfig(map[string]interface{}{ + "size": "1024x1024", + "quality": "hd", + "style": "vivid", + }), + ) + if err != nil { + log.Fatal(err) + } + + // Access generated image + if imageResp.Media() != nil { + fmt.Printf("Generated image URL: %s\n", imageResp.Media().URL) + } + ``` + + + Use Grok for image generation: + + ```python + # Image generation + image_response = await ai.generate( + model=openai_name('grok-image'), + prompt='A futuristic cityscape with flying cars and neon lights', + config={ + 'size': '1024x1024', + 'quality': 'hd', + 'style': 'vivid', + } + ) + + # Access generated image + if image_response.media: + print(f"Generated image URL: {image_response.media.url}") + ``` + + + +## Advanced Features + +### Real-time Information Access + + + + Leverage Grok's real-time information capabilities: + + ```ts + // Current events and news + const newsFlow = ai.defineFlow( + { + name: 'newsFlow', + inputSchema: z.object({ topic: z.string() }), + outputSchema: z.object({ summary: z.string() }), + }, + async ({ topic }) => { + const response = await ai.generate({ + model: xAI.model('grok-3'), + prompt: `Provide a current summary of recent news about ${topic}. Include the latest developments and key information.`, + config: { + temperature: 0.3, // Lower temperature for factual content + maxTokens: 800, + }, + }); + return { summary: response.text }; + }, + ); + + // Market data and trends + const marketFlow = ai.defineFlow( + { + name: 'marketFlow', + inputSchema: z.object({ symbol: z.string() }), + outputSchema: z.object({ analysis: z.string() }), + }, + async ({ symbol }) => { + const response = await ai.generate({ + model: xAI.model('grok-3'), + prompt: `Analyze the current market situation for ${symbol}. Include recent price movements, news, and relevant factors.`, + config: { + temperature: 0.4, + maxTokens: 600, + }, + }); + return { analysis: response.text }; + }, + ); + ``` + + + Leverage Grok's real-time information capabilities: + + ```go + // Current events and news + func getNewsAnalysis(ctx context.Context, topic string) (string, error) { + resp, err := genkit.Generate(ctx, g, + ai.WithModel("grok-3"), + ai.WithPrompt(fmt.Sprintf( + "Provide a current summary of recent news about %s. Include the latest developments and key information.", + topic, + )), + ai.WithConfig(map[string]interface{}{ + "temperature": 0.3, + "max_tokens": 800, + }), + ) + if err != nil { + return "", err + } + return resp.Text(), nil + } + + // Market data and trends + func getMarketAnalysis(ctx context.Context, symbol string) (string, error) { + resp, err := genkit.Generate(ctx, g, + ai.WithModel("grok-3"), + ai.WithPrompt(fmt.Sprintf( + "Analyze the current market situation for %s. Include recent price movements, news, and relevant factors.", + symbol, + )), + ai.WithConfig(map[string]interface{}{ + "temperature": 0.4, + "max_tokens": 600, + }), + ) + if err != nil { + return "", err + } + return resp.Text(), nil + } + ``` + + + Leverage Grok's real-time information capabilities: + + ```python + # Current events and news + async def get_news_analysis(topic: str) -> str: + response = await ai.generate( + model=openai_name('grok-3'), + prompt=f"Provide a current summary of recent news about {topic}. Include the latest developments and key information.", + config={ + 'temperature': 0.3, + 'max_tokens': 800, + } + ) + return response.text + + # Market data and trends + async def get_market_analysis(symbol: str) -> str: + response = await ai.generate( + model=openai_name('grok-3'), + prompt=f"Analyze the current market situation for {symbol}. Include recent price movements, news, and relevant factors.", + config={ + 'temperature': 0.4, + 'max_tokens': 600, + } + ) + return response.text + ``` + + + +### Conversational AI + + + + Build conversational applications with Grok: + + ```ts + // Conversational chat flow + export const chatFlow = ai.defineFlow( + { + name: 'chatFlow', + inputSchema: z.object({ + message: z.string(), + history: z.array(z.object({ + role: z.enum(['user', 'assistant']), + content: z.string(), + })).optional(), + }), + outputSchema: z.object({ response: z.string() }), + }, + async ({ message, history = [] }) => { + // Build conversation context + const messages = [ + { role: 'system', content: 'You are Grok, a helpful and witty AI assistant with access to real-time information.' }, + ...history, + { role: 'user', content: message }, + ]; + + const response = await ai.generate({ + model: xAI.model('grok-3'), + messages, + config: { + temperature: 0.8, + maxTokens: 1000, + }, + }); + + return { response: response.text }; + }, + ); + + // Personality-driven responses + export const personalityFlow = ai.defineFlow( + { + name: 'personalityFlow', + inputSchema: z.object({ + query: z.string(), + personality: z.enum(['witty', 'professional', 'casual', 'technical']), + }), + outputSchema: z.object({ response: z.string() }), + }, + async ({ query, personality }) => { + const personalityPrompts = { + witty: 'Respond with humor and wit, making clever observations.', + professional: 'Respond in a professional, formal tone.', + casual: 'Respond in a casual, friendly manner.', + technical: 'Respond with technical depth and precision.', + }; + + const response = await ai.generate({ + model: xAI.model('grok-3-mini'), + prompt: `${personalityPrompts[personality]} Query: ${query}`, + config: { + temperature: personality === 'witty' ? 0.9 : 0.6, + maxTokens: 600, + }, + }); + + return { response: response.text }; + }, + ); + ``` + + + Build conversational applications with Grok: + + ```go + type ChatMessage struct { + Role string `json:"role"` + Content string `json:"content"` + } + + func handleChat(ctx context.Context, message string, history []ChatMessage) (string, error) { + // Build conversation context + messages := []ChatMessage{ + {Role: "system", Content: "You are Grok, a helpful and witty AI assistant with access to real-time information."}, + } + messages = append(messages, history...) + messages = append(messages, ChatMessage{Role: "user", Content: message}) + + resp, err := genkit.Generate(ctx, g, + ai.WithModel("grok-3"), + ai.WithMessages(messages), + ai.WithConfig(map[string]interface{}{ + "temperature": 0.8, + "max_tokens": 1000, + }), + ) + if err != nil { + return "", err + } + + return resp.Text(), nil + } + ``` + + + Build conversational applications with Grok: + + ```python + from typing import List, Dict + + async def handle_chat(message: str, history: List[Dict[str, str]] = None) -> str: + if history is None: + history = [] + + # Build conversation context + messages = [ + {"role": "system", "content": "You are Grok, a helpful and witty AI assistant with access to real-time information."}, + *history, + {"role": "user", "content": message}, + ] + + response = await ai.generate( + model=openai_name('grok-3'), + messages=messages, + config={ + 'temperature': 0.8, + 'max_tokens': 1000, + } + ) + + return response.text + ``` + + + +## Model Comparison + +### Available Models + +| Model | Capabilities | Best For | Context Window | +|-------|-------------|----------|----------------| +| **grok-3-mini** | Fast text generation | Quick responses, simple tasks | 128K tokens | +| **grok-3** | Advanced reasoning, real-time data | Complex analysis, current events | 128K tokens | +| **grok-image** | Image generation | Creative visuals, concept art | N/A | + +### Performance Characteristics + + + + ```ts + // Performance comparison example + const performanceTest = async () => { + const prompt = "Explain the impact of AI on modern society"; + + // Fast response with grok-3-mini + const startMini = Date.now(); + const miniResponse = await ai.generate({ + model: xAI.model('grok-3-mini'), + prompt, + }); + const miniTime = Date.now() - startMini; + + // Detailed response with grok-3 + const startFull = Date.now(); + const fullResponse = await ai.generate({ + model: xAI.model('grok-3'), + prompt, + }); + const fullTime = Date.now() - startFull; + + console.log(`Mini: ${miniTime}ms, Full: ${fullTime}ms`); + console.log(`Mini length: ${miniResponse.text.length}, Full length: ${fullResponse.text.length}`); + }; + ``` + + + ```go + func performanceTest(ctx context.Context) { + prompt := "Explain the impact of AI on modern society" + + // Fast response with grok-3-mini + startMini := time.Now() + miniResp, _ := genkit.Generate(ctx, g, + ai.WithModel("grok-3-mini"), + ai.WithPrompt(prompt), + ) + miniTime := time.Since(startMini) + + // Detailed response with grok-3 + startFull := time.Now() + fullResp, _ := genkit.Generate(ctx, g, + ai.WithModel("grok-3"), + ai.WithPrompt(prompt), + ) + fullTime := time.Since(startFull) + + fmt.Printf("Mini: %v, Full: %v\n", miniTime, fullTime) + fmt.Printf("Mini length: %d, Full length: %d\n", + len(miniResp.Text()), len(fullResp.Text())) + } + ``` + + + ```python + import time + + async def performance_test(): + prompt = "Explain the impact of AI on modern society" + + # Fast response with grok-3-mini + start_mini = time.time() + mini_response = await ai.generate( + model=openai_name('grok-3-mini'), + prompt=prompt + ) + mini_time = time.time() - start_mini + + # Detailed response with grok-3 + start_full = time.time() + full_response = await ai.generate( + model=openai_name('grok-3'), + prompt=prompt + ) + full_time = time.time() - start_full + + print(f"Mini: {mini_time:.2f}s, Full: {full_time:.2f}s") + print(f"Mini length: {len(mini_response.text)}, Full length: {len(full_response.text)}") + ``` + + + +## Advanced Configuration + +### Custom Model Configuration + + + + ```ts + // Advanced configuration with passthrough options + const response = await ai.generate({ + model: xAI.model('grok-3'), + prompt: 'Analyze the latest tech trends', + config: { + temperature: 0.7, + maxTokens: 1000, + topP: 0.9, + frequencyPenalty: 0.1, + presencePenalty: 0.1, + // Passthrough configuration for new features + stream: true, + logprobs: true, + top_logprobs: 5, + }, + }); + + // Environment-specific configuration + const environmentConfig = { + development: { + model: xAI.model('grok-3-mini'), + temperature: 0.8, + maxTokens: 500, + }, + production: { + model: xAI.model('grok-3'), + temperature: 0.6, + maxTokens: 1000, + }, + }; + + const config = environmentConfig[process.env.NODE_ENV || 'development']; + ``` + + + ```go + // Advanced configuration + resp, err := genkit.Generate(ctx, g, + ai.WithModel("grok-3"), + ai.WithPrompt("Analyze the latest tech trends"), + ai.WithConfig(map[string]interface{}{ + "temperature": 0.7, + "max_tokens": 1000, + "top_p": 0.9, + "frequency_penalty": 0.1, + "presence_penalty": 0.1, + "stream": true, + "logprobs": true, + "top_logprobs": 5, + }), + ) + ``` + + + ```python + # Advanced configuration + response = await ai.generate( + model=openai_name('grok-3'), + prompt='Analyze the latest tech trends', + config={ + 'temperature': 0.7, + 'max_tokens': 1000, + 'top_p': 0.9, + 'frequency_penalty': 0.1, + 'presence_penalty': 0.1, + 'stream': True, + 'logprobs': True, + 'top_logprobs': 5, + } + ) + ``` + + + +## Best Practices + +### Optimizing for Real-time Information + +1. **Use appropriate models**: Use `grok-3` for current events and real-time data +2. **Set proper temperature**: Lower temperature (0.3-0.5) for factual content +3. **Specify time context**: Include "current", "latest", or "recent" in prompts +4. **Verify information**: Cross-reference important facts when possible + +### Cost Optimization + +1. **Choose the right model**: Use `grok-3-mini` for simple tasks +2. **Optimize token usage**: Be concise in prompts and set appropriate `maxTokens` +3. **Cache responses**: Cache frequently requested information +4. **Batch requests**: Group similar requests when possible + +### Error Handling + + + + ```ts + const robustGrokFlow = ai.defineFlow( + { + name: 'robustGrokFlow', + inputSchema: z.object({ query: z.string() }), + outputSchema: z.object({ response: z.string() }), + }, + async ({ query }) => { + try { + const response = await ai.generate({ + model: xAI.model('grok-3'), + prompt: query, + config: { + temperature: 0.7, + maxTokens: 800, + }, + }); + return { response: response.text }; + } catch (error) { + if (error.message.includes('rate_limit')) { + // Fallback to mini model + const fallbackResponse = await ai.generate({ + model: xAI.model('grok-3-mini'), + prompt: query, + }); + return { response: fallbackResponse.text }; + } + throw error; + } + }, + ); + ``` + + + ```go + func robustGrokGenerate(ctx context.Context, query string) (string, error) { + resp, err := genkit.Generate(ctx, g, + ai.WithModel("grok-3"), + ai.WithPrompt(query), + ai.WithConfig(map[string]interface{}{ + "temperature": 0.7, + "max_tokens": 800, + }), + ) + + if err != nil { + if strings.Contains(err.Error(), "rate_limit") { + // Fallback to mini model + fallbackResp, fallbackErr := genkit.Generate(ctx, g, + ai.WithModel("grok-3-mini"), + ai.WithPrompt(query), + ) + if fallbackErr != nil { + return "", fallbackErr + } + return fallbackResp.Text(), nil + } + return "", err + } + + return resp.Text(), nil + } + ``` + + + ```python + async def robust_grok_generate(query: str) -> str: + try: + response = await ai.generate( + model=openai_name('grok-3'), + prompt=query, + config={ + 'temperature': 0.7, + 'max_tokens': 800, + } + ) + return response.text + except Exception as error: + if 'rate_limit' in str(error): + # Fallback to mini model + fallback_response = await ai.generate( + model=openai_name('grok-3-mini'), + prompt=query + ) + return fallback_response.text + raise error + ``` + + + +## Next Steps + +- Learn about [generating content](/unified-docs/generating-content) to understand how to use these models effectively +- Explore [tool calling](/unified-docs/tool-calling) to add interactive capabilities to your Grok applications +- See [creating flows](/unified-docs/creating-flows) to build structured AI workflows with real-time information +- Check out [deployment guides](/unified-docs/deployment) for production deployment strategies diff --git a/src/content/docs/unified-docs/vector-databases/astra-db.mdx b/src/content/docs/unified-docs/vector-databases/astra-db.mdx new file mode 100644 index 00000000..b2a3dfb2 --- /dev/null +++ b/src/content/docs/unified-docs/vector-databases/astra-db.mdx @@ -0,0 +1,726 @@ +--- +title: Astra DB Vector Database +description: Learn how to use DataStax Astra DB with Genkit across JavaScript, Go, and Python for serverless vector storage, semantic search, and RAG applications. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +DataStax Astra DB is a serverless vector database built on Apache Cassandra. It provides scalable vector storage with built-in embedding generation capabilities through Astra DB Vectorize, making it ideal for production AI applications that need reliable, distributed vector search. + +## Installation and Setup + + + + Install the Astra DB plugin: + + ```bash + npm install genkitx-astra-db + ``` + + Configure the plugin when initializing Genkit: + + ```ts + import { genkit } from 'genkit'; + import { astraDB } from 'genkitx-astra-db'; + import { googleAI } from '@genkit-ai/googleai'; + + const ai = genkit({ + plugins: [ + googleAI(), + astraDB([ + { + clientParams: { + applicationToken: process.env.ASTRA_DB_APPLICATION_TOKEN, + apiEndpoint: process.env.ASTRA_DB_API_ENDPOINT, + keyspace: 'default_keyspace', + }, + collectionName: 'documents', + embedder: googleAI.embedder('gemini-embedding-001'), + }, + ]), + ], + }); + ``` + + ### Prerequisites + + 1. **DataStax Account**: [Sign up for a free DataStax account](https://astra.datastax.com/signup) + 2. **Astra DB Database**: Create a Serverless Vector database + 3. **Collection**: Create a collection with dimensions matching your embedding model + 4. **Credentials**: Get your Application Token and API Endpoint + + ### Environment Variables + + ```bash + export ASTRA_DB_APPLICATION_TOKEN=your_application_token + export ASTRA_DB_API_ENDPOINT=your_astra_db_endpoint + ``` + + ### Using Astra DB Vectorize + + You can use Astra DB's built-in embedding generation: + + ```ts + const ai = genkit({ + plugins: [ + astraDB([ + { + clientParams: { + applicationToken: process.env.ASTRA_DB_APPLICATION_TOKEN, + apiEndpoint: process.env.ASTRA_DB_API_ENDPOINT, + keyspace: 'default_keyspace', + }, + collectionName: 'documents', + // No embedder needed - Astra DB Vectorize handles embedding generation + }, + ]), + ], + }); + ``` + + + For Go applications, you can use Astra DB through the DataStax Go driver: + + ```bash + go get github.com/datastax/astra-db-go + ``` + + ```go + package main + + import ( + "context" + "os" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/astradb" + "github.com/firebase/genkit/go/plugins/googleai" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins( + &googleai.GoogleAI{}, + &astradb.AstraDB{ + ApplicationToken: os.Getenv("ASTRA_DB_APPLICATION_TOKEN"), + APIEndpoint: os.Getenv("ASTRA_DB_API_ENDPOINT"), + Keyspace: "default_keyspace", + Collections: []astradb.CollectionConfig{ + { + Name: "documents", + Embedder: "googleai/gemini-embedding-001", + }, + }, + }, + ), + ) + if err != nil { + log.Fatal(err) + } + } + ``` + + + For Python applications, install the Astra DB client: + + ```bash + pip install astrapy genkit-plugin-astradb + ``` + + ```python + import os + from genkit.ai import Genkit + from genkit.plugins.astradb import AstraDB + from genkit.plugins.google_genai import GoogleGenAI + + ai = Genkit( + plugins=[ + GoogleGenAI(), + AstraDB( + application_token=os.getenv("ASTRA_DB_APPLICATION_TOKEN"), + api_endpoint=os.getenv("ASTRA_DB_API_ENDPOINT"), + keyspace="default_keyspace", + collections=[ + { + "name": "documents", + "embedder": "googleai/gemini-embedding-001", + } + ], + ), + ], + ) + ``` + + + +## Basic Usage + +### Document Indexing + + + + Index documents for semantic search: + + ```ts + import { astraDBIndexerRef } from 'genkitx-astra-db'; + import { Document } from 'genkit'; + + // Create indexer reference + const documentsIndexer = astraDBIndexerRef({ + collectionName: 'documents', + }); + + // Prepare documents for indexing + const documents: Document[] = [ + { + content: 'Astra DB is a serverless vector database built on Apache Cassandra.', + metadata: { + title: 'Astra DB Overview', + category: 'database', + source: 'documentation', + score: 95, + }, + }, + { + content: 'Serverless databases provide automatic scaling and management.', + metadata: { + title: 'Serverless Architecture', + category: 'technology', + source: 'blog', + score: 88, + }, + }, + ]; + + // Index documents + await ai.index({ + indexer: documentsIndexer, + documents, + }); + + // Batch indexing for large datasets + const batchSize = 100; + for (let i = 0; i < largeDocumentSet.length; i += batchSize) { + const batch = largeDocumentSet.slice(i, i + batchSize); + await ai.index({ + indexer: documentsIndexer, + documents: batch, + }); + } + ``` + + + Index documents for semantic search: + + ```go + import ( + "context" + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + ) + + func indexDocuments(ctx context.Context) error { + documents := []ai.Document{ + { + Content: "Astra DB is a serverless vector database built on Apache Cassandra.", + Metadata: map[string]interface{}{ + "title": "Astra DB Overview", + "category": "database", + "source": "documentation", + "score": 95, + }, + }, + { + Content: "Serverless databases provide automatic scaling and management.", + Metadata: map[string]interface{}{ + "title": "Serverless Architecture", + "category": "technology", + "source": "blog", + "score": 88, + }, + }, + } + + // Index documents + err := genkit.Index(ctx, g, + ai.WithIndexer("astradb/documents"), + ai.WithDocuments(documents), + ) + if err != nil { + return fmt.Errorf("failed to index documents: %w", err) + } + + return nil + } + + // Batch indexing function + func batchIndexDocuments(ctx context.Context, documents []ai.Document, batchSize int) error { + for i := 0; i < len(documents); i += batchSize { + end := i + batchSize + if end > len(documents) { + end = len(documents) + } + + batch := documents[i:end] + err := genkit.Index(ctx, g, + ai.WithIndexer("astradb/documents"), + ai.WithDocuments(batch), + ) + if err != nil { + return fmt.Errorf("failed to index batch: %w", err) + } + } + return nil + } + ``` + + + Index documents for semantic search: + + ```python + from typing import List, Dict, Any + + # Prepare documents for indexing + documents = [ + { + "content": "Astra DB is a serverless vector database built on Apache Cassandra.", + "metadata": { + "title": "Astra DB Overview", + "category": "database", + "source": "documentation", + "score": 95, + }, + }, + { + "content": "Serverless databases provide automatic scaling and management.", + "metadata": { + "title": "Serverless Architecture", + "category": "technology", + "source": "blog", + "score": 88, + }, + }, + ] + + # Index documents + async def index_documents(docs: List[Dict[str, Any]], collection_name: str = "documents"): + try: + indexer = f"astradb/{collection_name}" + + await ai.index( + indexer=indexer, + documents=docs + ) + + return {"indexed": len(docs), "success": True} + except Exception as error: + print(f"Indexing failed: {error}") + return {"indexed": 0, "success": False} + + # Batch indexing for large datasets + async def batch_index_documents( + docs: List[Dict[str, Any]], + collection_name: str = "documents", + batch_size: int = 100 + ): + total_indexed = 0 + + for i in range(0, len(docs), batch_size): + batch = docs[i:i + batch_size] + + try: + await ai.index( + indexer=f"astradb/{collection_name}", + documents=batch + ) + total_indexed += len(batch) + except Exception as error: + print(f"Batch indexing failed: {error}") + break + + return {"indexed": total_indexed, "success": total_indexed == len(docs)} + ``` + + + +### Document Retrieval + + + + Retrieve relevant documents using semantic search: + + ```ts + import { astraDBRetrieverRef } from 'genkitx-astra-db'; + + // Create retriever reference + const documentsRetriever = astraDBRetrieverRef({ + collectionName: 'documents', + }); + + // Basic retrieval + const query = "What is a serverless database?"; + const docs = await ai.retrieve({ + retriever: documentsRetriever, + query, + options: { + k: 5, // Number of documents to retrieve + }, + }); + + console.log('Retrieved documents:', docs); + + // Advanced retrieval with filtering + type DocumentSchema = { + _id: string; + text: string; + score: number; + category: string; + }; + + const typedRetriever = astraDBRetrieverRef({ + collectionName: 'documents', + }); + + const filteredDocs = await ai.retrieve({ + retriever: typedRetriever, + query, + options: { + k: 3, + filter: { + score: { $gt: 90 }, // Only documents with score > 90 + category: 'database', // Only database-related documents + }, + }, + }); + ``` + + + Retrieve relevant documents using semantic search: + + ```go + // Basic retrieval + func retrieveDocuments(ctx context.Context, query string) ([]ai.Document, error) { + docs, err := genkit.Retrieve(ctx, g, + ai.WithRetriever("astradb/documents"), + ai.WithQuery(query), + ai.WithOptions(map[string]interface{}{ + "k": 5, + }), + ) + if err != nil { + return nil, fmt.Errorf("retrieval failed: %w", err) + } + + return docs, nil + } + + // Advanced retrieval with filtering + func advancedRetrieve(ctx context.Context, query string, limit int, filter map[string]interface{}) ([]ai.Document, error) { + docs, err := genkit.Retrieve(ctx, g, + ai.WithRetriever("astradb/documents"), + ai.WithQuery(query), + ai.WithOptions(map[string]interface{}{ + "k": limit, + "filter": filter, + }), + ) + if err != nil { + return nil, fmt.Errorf("advanced retrieval failed: %w", err) + } + + return docs, nil + } + + // Usage example with filtering + func searchHighQualityDocuments(ctx context.Context, query string) ([]ai.Document, error) { + filter := map[string]interface{}{ + "score": map[string]interface{}{ + "$gt": 90, + }, + "category": "database", + } + + return advancedRetrieve(ctx, query, 3, filter) + } + ``` + + + Retrieve relevant documents using semantic search: + + ```python + from typing import List, Dict, Any, Optional + + # Basic retrieval + async def retrieve_documents(query: str, collection_name: str = "documents", k: int = 5) -> List[Dict[str, Any]]: + try: + retriever = f"astradb/{collection_name}" + docs = await ai.retrieve( + retriever=retriever, + query=query, + options={"k": k} + ) + return docs + except Exception as error: + print(f"Retrieval failed: {error}") + return [] + + # Advanced retrieval with filtering + async def advanced_retrieve( + query: str, + collection_name: str = "documents", + k: int = 5, + filter_criteria: Optional[Dict[str, Any]] = None + ) -> List[Dict[str, Any]]: + try: + retriever = f"astradb/{collection_name}" + + options = {"k": k} + if filter_criteria: + options["filter"] = filter_criteria + + docs = await ai.retrieve( + retriever=retriever, + query=query, + options=options + ) + + return docs + except Exception as error: + print(f"Advanced retrieval failed: {error}") + return [] + + # Usage examples + async def search_high_quality_documents(query: str) -> List[Dict[str, Any]]: + # Search for high-quality database documents + filter_criteria = { + "score": {"$gt": 90}, + "category": "database" + } + + return await advanced_retrieve( + query=query, + k=3, + filter_criteria=filter_criteria + ) + ``` + + + +## Advanced Features + +### Hybrid Search with Filtering + + + + Combine vector similarity with metadata filtering: + + ```ts + // Complex filtering with multiple conditions + const complexRetriever = astraDBRetrieverRef({ + collectionName: 'documents', + }); + + const complexSearch = await ai.retrieve({ + retriever: complexRetriever, + query: "database performance optimization", + options: { + k: 10, + filter: { + $and: [ + { score: { $gte: 85 } }, + { category: { $in: ['database', 'performance'] } }, + { source: { $ne: 'deprecated' } }, + ], + }, + }, + }); + + // Range-based filtering + const recentDocuments = await ai.retrieve({ + retriever: complexRetriever, + query: "latest database features", + options: { + k: 5, + filter: { + score: { $gte: 80, $lte: 100 }, + category: 'database', + }, + }, + }); + + // Text-based filtering + const specificSource = await ai.retrieve({ + retriever: complexRetriever, + query: "vector search capabilities", + options: { + k: 3, + filter: { + source: { $regex: 'official.*docs' }, + category: 'database', + }, + }, + }); + ``` + + + Combine vector similarity with metadata filtering: + + ```go + // Complex filtering function + func performComplexSearch(ctx context.Context, query string) ([]ai.Document, error) { + // Multiple condition filtering + complexFilter := map[string]interface{}{ + "$and": []map[string]interface{}{ + {"score": map[string]interface{}{"$gte": 85}}, + {"category": map[string]interface{}{"$in": []string{"database", "performance"}}}, + {"source": map[string]interface{}{"$ne": "deprecated"}}, + }, + } + + docs, err := genkit.Retrieve(ctx, g, + ai.WithRetriever("astradb/documents"), + ai.WithQuery(query), + ai.WithOptions(map[string]interface{}{ + "k": 10, + "filter": complexFilter, + }), + ) + if err != nil { + return nil, fmt.Errorf("complex search failed: %w", err) + } + + return docs, nil + } + + // Range-based filtering + func searchByScoreRange(ctx context.Context, query string, minScore, maxScore int) ([]ai.Document, error) { + filter := map[string]interface{}{ + "score": map[string]interface{}{ + "$gte": minScore, + "$lte": maxScore, + }, + "category": "database", + } + + return advancedRetrieve(ctx, query, 5, filter) + } + + // Text pattern filtering + func searchBySourcePattern(ctx context.Context, query, pattern string) ([]ai.Document, error) { + filter := map[string]interface{}{ + "source": map[string]interface{}{ + "$regex": pattern, + }, + "category": "database", + } + + return advancedRetrieve(ctx, query, 3, filter) + } + ``` + + + Combine vector similarity with metadata filtering: + + ```python + # Complex filtering with multiple conditions + async def perform_complex_search(query: str) -> List[Dict[str, Any]]: + complex_filter = { + "$and": [ + {"score": {"$gte": 85}}, + {"category": {"$in": ["database", "performance"]}}, + {"source": {"$ne": "deprecated"}}, + ] + } + + return await advanced_retrieve( + query=query, + k=10, + filter_criteria=complex_filter + ) + + # Range-based filtering + async def search_by_score_range( + query: str, + min_score: int = 80, + max_score: int = 100 + ) -> List[Dict[str, Any]]: + filter_criteria = { + "score": {"$gte": min_score, "$lte": max_score}, + "category": "database" + } + + return await advanced_retrieve( + query=query, + k=5, + filter_criteria=filter_criteria + ) + + # Text pattern filtering + async def search_by_source_pattern(query: str, pattern: str) -> List[Dict[str, Any]]: + filter_criteria = { + "source": {"$regex": pattern}, + "category": "database" + } + + return await advanced_retrieve( + query=query, + k=3, + filter_criteria=filter_criteria + ) + + # Comprehensive search example + async def comprehensive_search_example(): + # Search for high-quality recent database documentation + results = await perform_complex_search("vector database optimization") + + # Search within score range + range_results = await search_by_score_range("serverless architecture", 85, 95) + + # Search official documentation + official_docs = await search_by_source_pattern("database features", "official.*docs") + + return { + "complex_search": results, + "score_range": range_results, + "official_docs": official_docs + } + ``` + + + +## Best Practices + +### Database Configuration + +1. **Collection Design**: Choose appropriate dimensions for your embedding model +2. **Keyspace Organization**: Use keyspaces to organize different data types +3. **Indexing Strategy**: Leverage Astra DB's automatic indexing capabilities +4. **Schema Design**: Structure metadata for effective filtering + +### Performance Optimization + +1. **Batch Operations**: Index documents in batches for better throughput +2. **Connection Pooling**: Reuse connections for multiple operations +3. **Filtering Strategy**: Use metadata filters to reduce search space +4. **Embedding Strategy**: Consider using Astra DB Vectorize for built-in embedding generation + +### Production Deployment + +1. **Security**: Use secure application tokens and rotate them regularly +2. **Monitoring**: Monitor query performance and database metrics +3. **Scaling**: Leverage Astra DB's automatic scaling capabilities +4. **Backup**: Implement backup strategies for critical data + +### Cost Optimization + +1. **Efficient Queries**: Use appropriate k values and filters +2. **Data Lifecycle**: Archive or delete old documents when appropriate +3. **Resource Monitoring**: Monitor usage to optimize costs +4. **Vectorize Usage**: Consider Astra DB Vectorize to reduce external embedding costs + +## Next Steps + +- Learn about [RAG implementation](/unified-docs/rag) to build complete retrieval-augmented generation systems +- Explore [creating flows](/unified-docs/creating-flows) to build structured AI workflows with vector search +- See [deployment guides](/unified-docs/deployment) for production deployment strategies +- Check out other vector database options for different use cases diff --git a/src/content/docs/unified-docs/vector-databases/chromadb.mdx b/src/content/docs/unified-docs/vector-databases/chromadb.mdx new file mode 100644 index 00000000..fb906912 --- /dev/null +++ b/src/content/docs/unified-docs/vector-databases/chromadb.mdx @@ -0,0 +1,571 @@ +--- +title: ChromaDB Vector Database +description: Learn how to use ChromaDB with Genkit across JavaScript, Go, and Python for vector storage, semantic search, and RAG applications. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +ChromaDB is an open-source vector database designed for AI applications. It provides efficient vector storage, similarity search, and metadata filtering capabilities. ChromaDB can run in-memory, as a standalone server, or in client/server mode, making it flexible for both development and production use. + +## Installation and Setup + + + + Install the ChromaDB plugin: + + ```bash + npm install genkitx-chromadb + ``` + + Configure the plugin when initializing Genkit: + + ```ts + import { genkit } from 'genkit'; + import { chroma } from 'genkitx-chromadb'; + import { googleAI } from '@genkit-ai/googleai'; + + const ai = genkit({ + plugins: [ + chroma([ + { + collectionName: 'my-documents', + embedder: googleAI.embedder('gemini-embedding-001'), + }, + ]), + ], + }); + ``` + + ### Configuration Options + + ```ts + // Advanced configuration + const ai = genkit({ + plugins: [ + chroma([ + { + collectionName: 'my-documents', + embedder: googleAI.embedder('gemini-embedding-001'), + clientParams: { + path: 'http://localhost:8000', // Custom Chroma server + // auth: { ... }, // Authentication if needed + }, + embedderOptions: { + taskType: 'RETRIEVAL_DOCUMENT', + }, + }, + ]), + ], + }); + ``` + + + For Go applications, you can use ChromaDB through the Go client: + + ```bash + go get github.com/chroma-core/chroma/go + ``` + + ```go + package main + + import ( + "context" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/chroma" + "github.com/firebase/genkit/go/plugins/googleai" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins( + &googleai.GoogleAI{}, + &chroma.ChromaDB{ + ServerURL: "http://localhost:8000", + Collections: []chroma.CollectionConfig{ + { + Name: "my-documents", + Embedder: "googleai/gemini-embedding-001", + }, + }, + }, + ), + ) + if err != nil { + log.Fatal(err) + } + } + ``` + + + For Python applications, install the ChromaDB client: + + ```bash + pip install chromadb genkit-plugin-chromadb + ``` + + ```python + from genkit.ai import Genkit + from genkit.plugins.chromadb import ChromaDB + from genkit.plugins.google_genai import GoogleGenAI + + ai = Genkit( + plugins=[ + GoogleGenAI(), + ChromaDB( + server_url="http://localhost:8000", + collections=[ + { + "name": "my-documents", + "embedder": "googleai/gemini-embedding-001", + } + ], + ), + ], + ) + ``` + + + +## Basic Usage + +### Document Indexing + + + + Index documents for semantic search: + + ```ts + import { chromaIndexerRef } from 'genkitx-chromadb'; + import { Document } from 'genkit'; + + // Prepare documents for indexing + const documents: Document[] = [ + { + content: 'ChromaDB is an open-source vector database for AI applications.', + metadata: { + title: 'ChromaDB Overview', + category: 'database', + source: 'documentation', + }, + }, + { + content: 'Vector databases enable semantic search and similarity matching.', + metadata: { + title: 'Vector Search', + category: 'technology', + source: 'blog', + }, + }, + ]; + + // Index documents using the default configured collection + await ai.index({ + indexer: chromaIndexerRef, + documents, + }); + + // Or specify a specific collection + const documentsIndexer = chromaIndexerRef({ + collectionName: 'my-documents', + }); + + await ai.index({ + indexer: documentsIndexer, + documents, + }); + ``` + + + Index documents for semantic search: + + ```go + import ( + "context" + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + ) + + func indexDocuments(ctx context.Context) error { + documents := []ai.Document{ + { + Content: "ChromaDB is an open-source vector database for AI applications.", + Metadata: map[string]interface{}{ + "title": "ChromaDB Overview", + "category": "database", + "source": "documentation", + }, + }, + { + Content: "Vector databases enable semantic search and similarity matching.", + Metadata: map[string]interface{}{ + "title": "Vector Search", + "category": "technology", + "source": "blog", + }, + }, + } + + // Index documents + err := genkit.Index(ctx, g, + ai.WithIndexer("chromadb/my-documents"), + ai.WithDocuments(documents), + ) + if err != nil { + return fmt.Errorf("failed to index documents: %w", err) + } + + return nil + } + ``` + + + Index documents for semantic search: + + ```python + from typing import List, Dict, Any + + # Prepare documents for indexing + documents = [ + { + "content": "ChromaDB is an open-source vector database for AI applications.", + "metadata": { + "title": "ChromaDB Overview", + "category": "database", + "source": "documentation", + }, + }, + { + "content": "Vector databases enable semantic search and similarity matching.", + "metadata": { + "title": "Vector Search", + "category": "technology", + "source": "blog", + }, + }, + ] + + # Index documents + async def index_documents(docs: List[Dict[str, Any]], collection_name: str = None): + try: + indexer = f"chromadb/{collection_name}" if collection_name else "chromadb/my-documents" + + await ai.index( + indexer=indexer, + documents=docs + ) + + return {"indexed": len(docs), "success": True} + except Exception as error: + print(f"Indexing failed: {error}") + return {"indexed": 0, "success": False} + ``` + + + +### Document Retrieval + + + + Retrieve relevant documents using semantic search: + + ```ts + import { chromaRetrieverRef } from 'genkitx-chromadb'; + + // Basic retrieval + const query = "What is a vector database?"; + const docs = await ai.retrieve({ + retriever: chromaRetrieverRef, + query, + }); + + console.log('Retrieved documents:', docs); + + // Advanced retrieval with specific collection and options + const documentsRetriever = chromaRetrieverRef({ + collectionName: 'my-documents', + }); + + const advancedDocs = await ai.retrieve({ + retriever: documentsRetriever, + query, + options: { + k: 5, // Number of documents to retrieve + where: { + category: 'database', // Metadata filtering + }, + }, + }); + ``` + + + Retrieve relevant documents using semantic search: + + ```go + // Basic retrieval + func retrieveDocuments(ctx context.Context, query string) ([]ai.Document, error) { + docs, err := genkit.Retrieve(ctx, g, + ai.WithRetriever("chromadb/my-documents"), + ai.WithQuery(query), + ) + if err != nil { + return nil, fmt.Errorf("retrieval failed: %w", err) + } + + return docs, nil + } + + // Advanced retrieval with options + func advancedRetrieve(ctx context.Context, query, collectionName string, limit int, filter map[string]interface{}) ([]ai.Document, error) { + retriever := fmt.Sprintf("chromadb/%s", collectionName) + + docs, err := genkit.Retrieve(ctx, g, + ai.WithRetriever(retriever), + ai.WithQuery(query), + ai.WithOptions(map[string]interface{}{ + "k": limit, + "where": filter, + }), + ) + if err != nil { + return nil, fmt.Errorf("advanced retrieval failed: %w", err) + } + + return docs, nil + } + ``` + + + Retrieve relevant documents using semantic search: + + ```python + from typing import List, Dict, Any, Optional + + # Basic retrieval + async def retrieve_documents(query: str, collection_name: str = "my-documents") -> List[Dict[str, Any]]: + try: + retriever = f"chromadb/{collection_name}" + docs = await ai.retrieve( + retriever=retriever, + query=query + ) + return docs + except Exception as error: + print(f"Retrieval failed: {error}") + return [] + + # Advanced retrieval with options + async def advanced_retrieve( + query: str, + collection_name: str = "my-documents", + limit: int = 5, + filter_criteria: Optional[Dict[str, Any]] = None + ) -> List[Dict[str, Any]]: + try: + retriever = f"chromadb/{collection_name}" + + options = {"k": limit} + if filter_criteria: + options["where"] = filter_criteria + + docs = await ai.retrieve( + retriever=retriever, + query=query, + options=options + ) + + return docs + except Exception as error: + print(f"Advanced retrieval failed: {error}") + return [] + ``` + + + +## ChromaDB Server Setup + +### Running ChromaDB Server + + + + Start a ChromaDB server for production use: + + ```bash + # Install ChromaDB server + pip install chromadb + + # Run the server + chroma run --host 0.0.0.0 --port 8000 + ``` + + Connect to the server in your application: + + ```ts + const ai = genkit({ + plugins: [ + chroma([ + { + collectionName: 'my-documents', + embedder: googleAI.embedder('gemini-embedding-001'), + clientParams: { + path: 'http://your-chroma-server:8000', + }, + }, + ]), + ], + }); + ``` + + + Connect to a ChromaDB server: + + ```go + g, err := genkit.Init(ctx, + genkit.WithPlugins( + &chroma.ChromaDB{ + ServerURL: "http://your-chroma-server:8000", + Collections: []chroma.CollectionConfig{ + { + Name: "my-documents", + Embedder: "googleai/gemini-embedding-001", + }, + }, + }, + ), + ) + ``` + + + Connect to a ChromaDB server: + + ```python + ai = Genkit( + plugins=[ + ChromaDB( + server_url="http://your-chroma-server:8000", + collections=[ + { + "name": "my-documents", + "embedder": "googleai/gemini-embedding-001", + } + ], + ), + ], + ) + ``` + + + +## Advanced Features + +### Metadata Filtering + + + + Use metadata filtering for precise retrieval: + + ```ts + // Category-based filtering + const techDocs = await ai.retrieve({ + retriever: chromaRetrieverRef, + query: "database concepts", + options: { + k: 5, + where: { + category: 'technology', + }, + }, + }); + + // Complex filtering with multiple conditions + const complexFilter = await ai.retrieve({ + retriever: chromaRetrieverRef, + query: "AI applications", + options: { + k: 10, + where: { + $and: [ + { category: { $in: ['technology', 'database'] } }, + { source: 'documentation' }, + ], + }, + }, + }); + ``` + + + Use metadata filtering for precise retrieval: + + ```go + // Category-based filtering + func retrieveByCategory(ctx context.Context, query, category string) ([]ai.Document, error) { + docs, err := genkit.Retrieve(ctx, g, + ai.WithRetriever("chromadb/my-documents"), + ai.WithQuery(query), + ai.WithOptions(map[string]interface{}{ + "k": 5, + "where": map[string]interface{}{ + "category": category, + }, + }), + ) + if err != nil { + return nil, fmt.Errorf("category filtering failed: %w", err) + } + return docs, nil + } + ``` + + + Use metadata filtering for precise retrieval: + + ```python + # Category-based filtering + async def retrieve_by_category(query: str, category: str, limit: int = 5) -> List[Dict[str, Any]]: + try: + docs = await ai.retrieve( + retriever="chromadb/my-documents", + query=query, + options={ + "k": limit, + "where": {"category": category} + } + ) + return docs + except Exception as error: + print(f"Category filtering failed: {error}") + return [] + ``` + + + +## Best Practices + +### Collection Management + +1. **Use descriptive collection names**: Choose names that reflect the content type +2. **Organize by domain**: Separate collections for different data types or domains +3. **Consider collection size**: Balance between too many small collections and few large ones +4. **Plan for scaling**: Design collection structure for future growth + +### Performance Optimization + +1. **Batch operations**: Index documents in batches for better performance +2. **Optimize embeddings**: Choose appropriate embedding models for your use case +3. **Use metadata filtering**: Combine semantic search with metadata filters +4. **Monitor memory usage**: ChromaDB loads collections into memory + +### Production Deployment + +1. **Use persistent storage**: Configure ChromaDB with persistent storage +2. **Set up monitoring**: Monitor collection sizes and query performance +3. **Backup collections**: Implement regular backup strategies +4. **Scale horizontally**: Consider distributed deployment for large datasets + +## Next Steps + +- Learn about [RAG implementation](/unified-docs/rag) to build complete retrieval-augmented generation systems +- Explore [creating flows](/unified-docs/creating-flows) to build structured AI workflows with vector search +- See [deployment guides](/unified-docs/deployment) for production deployment strategies +- Check out other vector database options for different use cases diff --git a/src/content/docs/unified-docs/vector-databases/cloud-sql-postgresql.mdx b/src/content/docs/unified-docs/vector-databases/cloud-sql-postgresql.mdx new file mode 100644 index 00000000..a0e62f57 --- /dev/null +++ b/src/content/docs/unified-docs/vector-databases/cloud-sql-postgresql.mdx @@ -0,0 +1,913 @@ +--- +title: Cloud SQL for PostgreSQL Vector Database +description: Learn how to use Google Cloud SQL for PostgreSQL with pgvector extension and Genkit across JavaScript, Go, and Python for managed vector storage and semantic search. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +Google Cloud SQL for PostgreSQL with the pgvector extension provides a fully managed PostgreSQL database with vector search capabilities. It combines the reliability and scalability of Google Cloud with the power of PostgreSQL and pgvector, making it ideal for production AI applications that need managed vector storage with enterprise-grade features. + +## Installation and Setup + + + + Install the Cloud SQL PostgreSQL plugin: + + ```bash + npm install genkitx-cloud-sql-pg + ``` + + Configure the plugin when initializing Genkit: + + ```ts + import { genkit } from 'genkit'; + import { postgres, PostgresEngine } from 'genkitx-cloud-sql-pg'; + import { vertexAI } from '@genkit-ai/vertexai'; + + // Create PostgresEngine instance + const engine = await PostgresEngine.fromInstance( + 'my-project', + 'us-central1', + 'my-instance', + 'my-database' + ); + + // Initialize vector store table + await engine.initVectorstoreTable('documents', 768, { + schemaName: 'public', + contentColumn: 'content', + embeddingColumn: 'embedding', + idColumn: 'id', + metadataColumns: [ + { name: 'title', dataType: 'TEXT' }, + { name: 'category', dataType: 'TEXT' }, + { name: 'source', dataType: 'TEXT' }, + ], + metadataJsonColumn: 'metadata', + storeMetadata: true, + overwriteExisting: false, + }); + + const ai = genkit({ + plugins: [ + vertexAI(), + postgres([ + { + tableName: 'documents', + engine: engine, + embedder: vertexAI.embedder('gemini-embedding-001'), + schemaName: 'public', + contentColumn: 'content', + embeddingColumn: 'embedding', + idColumn: 'id', + metadataColumns: ['title', 'category', 'source'], + metadataJsonColumn: 'metadata', + }, + ]), + ], + }); + ``` + + ### Prerequisites + + 1. **Google Cloud Project**: Set up a Google Cloud project + 2. **Cloud SQL Instance**: Create a PostgreSQL instance with pgvector extension + 3. **Authentication**: Configure Google Cloud authentication + 4. **Network Access**: Configure VPC or authorized networks + + ### Cloud SQL Instance Setup + + ```bash + # Create Cloud SQL PostgreSQL instance + gcloud sql instances create my-instance \ + --database-version=POSTGRES_15 \ + --tier=db-standard-2 \ + --region=us-central1 \ + --storage-type=SSD \ + --storage-size=100GB \ + --database-flags=shared_preload_libraries=vector + + # Create database + gcloud sql databases create my-database --instance=my-instance + + # Enable pgvector extension + gcloud sql connect my-instance --user=postgres --database=my-database + # Then run: CREATE EXTENSION IF NOT EXISTS vector; + ``` + + + For Go applications, you can use Cloud SQL through the Google Cloud SQL Go connector: + + ```bash + go get cloud.google.com/go/cloudsqlconn + go get github.com/lib/pq + go get github.com/pgvector/pgvector-go + ``` + + ```go + package main + + import ( + "context" + "database/sql" + "net" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/cloudsql" + "github.com/firebase/genkit/go/plugins/vertexai" + "cloud.google.com/go/cloudsqlconn" + ) + + func main() { + ctx := context.Background() + + // Create Cloud SQL connector + d, err := cloudsqlconn.NewDialer(ctx) + if err != nil { + log.Fatal(err) + } + defer d.Close() + + // Configure database connection + dsn := "user=postgres dbname=my-database sslmode=disable" + config, err := pq.ParseURL(dsn) + if err != nil { + log.Fatal(err) + } + + // Connect to Cloud SQL + db, err := sql.Open("postgres", config) + if err != nil { + log.Fatal(err) + } + defer db.Close() + + // Initialize Genkit + g, err := genkit.Init(ctx, + genkit.WithPlugins( + &vertexai.VertexAI{}, + &cloudsql.CloudSQL{ + Database: db, + Tables: []cloudsql.TableConfig{ + { + Name: "documents", + Embedder: "vertexai/gemini-embedding-001", + Schema: cloudsql.TableSchema{ + ContentColumn: "content", + EmbeddingColumn: "embedding", + IDColumn: "id", + MetadataColumns: []string{"title", "category", "source"}, + }, + }, + }, + }, + ), + ) + if err != nil { + log.Fatal(err) + } + } + ``` + + + For Python applications, install the Cloud SQL connector: + + ```bash + pip install cloud-sql-python-connector psycopg2-binary pgvector + ``` + + ```python + import os + from google.cloud.sql.connector import Connector + import psycopg2 + from pgvector.psycopg2 import register_vector + from genkit.ai import Genkit + from genkit.plugins.cloudsql import CloudSQL + from genkit.plugins.vertexai import VertexAI + + # Initialize Cloud SQL connector + def create_connection(): + connector = Connector() + + def getconn(): + conn = connector.connect( + "my-project:us-central1:my-instance", + "pg8000", + user="postgres", + password=os.getenv("DB_PASSWORD"), + db="my-database" + ) + register_vector(conn) + return conn + + return getconn + + # Initialize Genkit + ai = Genkit( + plugins=[ + VertexAI(), + CloudSQL( + connection_factory=create_connection(), + tables=[ + { + "name": "documents", + "embedder": "vertexai/gemini-embedding-001", + "schema": { + "content_column": "content", + "embedding_column": "embedding", + "id_column": "id", + "metadata_columns": ["title", "category", "source"], + }, + } + ], + ), + ], + ) + ``` + + + +## Basic Usage + +### Document Indexing + + + + Index documents with custom metadata handling: + + ```ts + import { postgresIndexerRef } from 'genkitx-cloud-sql-pg'; + import { Document } from 'genkit'; + + // Create indexer reference + const documentsIndexer = postgresIndexerRef({ + tableName: 'documents', + idColumn: 'id', + metadataColumns: ['title', 'category', 'source'], + }); + + // Prepare documents for indexing + const documents: Document[] = [ + { + content: 'Cloud SQL for PostgreSQL provides managed database services with vector capabilities.', + metadata: { + id: 'doc-1', + title: 'Cloud SQL Overview', + category: 'database', + source: 'documentation', + tags: ['cloud', 'sql', 'postgresql'], + }, + }, + { + content: 'Managed databases reduce operational overhead and provide automatic scaling.', + metadata: { + id: 'doc-2', + title: 'Managed Database Benefits', + category: 'technology', + source: 'blog', + tags: ['managed', 'scaling', 'operations'], + }, + }, + ]; + + // Index documents + await ai.index({ + indexer: documentsIndexer, + documents, + options: { + batchSize: 100, // Process documents in batches + }, + }); + + // Batch indexing for large datasets + const batchSize = 50; + for (let i = 0; i < largeDocumentSet.length; i += batchSize) { + const batch = largeDocumentSet.slice(i, i + batchSize); + await ai.index({ + indexer: documentsIndexer, + documents: batch, + options: { batchSize }, + }); + } + ``` + + + Index documents with custom metadata handling: + + ```go + import ( + "context" + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + ) + + func indexDocuments(ctx context.Context) error { + documents := []ai.Document{ + { + Content: "Cloud SQL for PostgreSQL provides managed database services with vector capabilities.", + Metadata: map[string]interface{}{ + "id": "doc-1", + "title": "Cloud SQL Overview", + "category": "database", + "source": "documentation", + "tags": []string{"cloud", "sql", "postgresql"}, + }, + }, + { + Content: "Managed databases reduce operational overhead and provide automatic scaling.", + Metadata: map[string]interface{}{ + "id": "doc-2", + "title": "Managed Database Benefits", + "category": "technology", + "source": "blog", + "tags": []string{"managed", "scaling", "operations"}, + }, + }, + } + + // Index documents + err := genkit.Index(ctx, g, + ai.WithIndexer("cloudsql/documents"), + ai.WithDocuments(documents), + ai.WithOptions(map[string]interface{}{ + "batchSize": 100, + }), + ) + if err != nil { + return fmt.Errorf("failed to index documents: %w", err) + } + + return nil + } + + // Batch indexing function + func batchIndexDocuments(ctx context.Context, documents []ai.Document, batchSize int) error { + for i := 0; i < len(documents); i += batchSize { + end := i + batchSize + if end > len(documents) { + end = len(documents) + } + + batch := documents[i:end] + err := genkit.Index(ctx, g, + ai.WithIndexer("cloudsql/documents"), + ai.WithDocuments(batch), + ai.WithOptions(map[string]interface{}{ + "batchSize": batchSize, + }), + ) + if err != nil { + return fmt.Errorf("failed to index batch: %w", err) + } + } + return nil + } + ``` + + + Index documents with custom metadata handling: + + ```python + from typing import List, Dict, Any + + # Prepare documents for indexing + documents = [ + { + "content": "Cloud SQL for PostgreSQL provides managed database services with vector capabilities.", + "metadata": { + "id": "doc-1", + "title": "Cloud SQL Overview", + "category": "database", + "source": "documentation", + "tags": ["cloud", "sql", "postgresql"], + }, + }, + { + "content": "Managed databases reduce operational overhead and provide automatic scaling.", + "metadata": { + "id": "doc-2", + "title": "Managed Database Benefits", + "category": "technology", + "source": "blog", + "tags": ["managed", "scaling", "operations"], + }, + }, + ] + + # Index documents + async def index_documents(docs: List[Dict[str, Any]], table_name: str = "documents"): + try: + indexer = f"cloudsql/{table_name}" + + await ai.index( + indexer=indexer, + documents=docs, + options={"batch_size": 100} + ) + + return {"indexed": len(docs), "success": True} + except Exception as error: + print(f"Indexing failed: {error}") + return {"indexed": 0, "success": False} + + # Batch indexing for large datasets + async def batch_index_documents( + docs: List[Dict[str, Any]], + table_name: str = "documents", + batch_size: int = 50 + ): + total_indexed = 0 + + for i in range(0, len(docs), batch_size): + batch = docs[i:i + batch_size] + + try: + await ai.index( + indexer=f"cloudsql/{table_name}", + documents=batch, + options={"batch_size": batch_size} + ) + total_indexed += len(batch) + except Exception as error: + print(f"Batch indexing failed: {error}") + break + + return {"indexed": total_indexed, "success": total_indexed == len(docs)} + ``` + + + +### Document Retrieval + + + + Retrieve documents with advanced filtering and distance strategies: + + ```ts + import { postgresRetrieverRef, DistanceStrategy } from 'genkitx-cloud-sql-pg'; + + // Create retriever reference with distance strategy + const documentsRetriever = postgresRetrieverRef({ + tableName: 'documents', + idColumn: 'id', + metadataColumns: ['title', 'category', 'source'], + distanceStrategy: DistanceStrategy.COSINE_DISTANCE, + }); + + // Basic retrieval + const query = "What are managed database benefits?"; + const docs = await ai.retrieve({ + retriever: documentsRetriever, + query, + options: { + k: 5, // Number of documents to retrieve (max 1000) + }, + }); + + console.log('Retrieved documents:', docs); + + // Advanced retrieval with SQL filtering + const filteredDocs = await ai.retrieve({ + retriever: documentsRetriever, + query: "cloud database services", + options: { + k: 3, + filter: "category = 'database' AND source = 'documentation'", + }, + }); + + // Complex filtering with multiple conditions + const complexDocs = await ai.retrieve({ + retriever: documentsRetriever, + query: "database scaling solutions", + options: { + k: 10, + filter: "category IN ('database', 'technology') AND source != 'deprecated'", + }, + }); + + // Different distance strategies + const euclideanRetriever = postgresRetrieverRef({ + tableName: 'documents', + distanceStrategy: DistanceStrategy.EUCLIDEAN_DISTANCE, + }); + + const dotProductRetriever = postgresRetrieverRef({ + tableName: 'documents', + distanceStrategy: DistanceStrategy.DOT_PRODUCT, + }); + ``` + + + Retrieve documents with advanced filtering and distance strategies: + + ```go + // Basic retrieval + func retrieveDocuments(ctx context.Context, query string) ([]ai.Document, error) { + docs, err := genkit.Retrieve(ctx, g, + ai.WithRetriever("cloudsql/documents"), + ai.WithQuery(query), + ai.WithOptions(map[string]interface{}{ + "k": 5, + }), + ) + if err != nil { + return nil, fmt.Errorf("retrieval failed: %w", err) + } + + return docs, nil + } + + // Advanced retrieval with SQL filtering + func advancedRetrieve(ctx context.Context, query string, limit int, filter string) ([]ai.Document, error) { + docs, err := genkit.Retrieve(ctx, g, + ai.WithRetriever("cloudsql/documents"), + ai.WithQuery(query), + ai.WithOptions(map[string]interface{}{ + "k": limit, + "filter": filter, + }), + ) + if err != nil { + return nil, fmt.Errorf("advanced retrieval failed: %w", err) + } + + return docs, nil + } + + // Complex filtering examples + func searchDatabaseDocuments(ctx context.Context, query string) ([]ai.Document, error) { + filter := "category = 'database' AND source = 'documentation'" + return advancedRetrieve(ctx, query, 3, filter) + } + + func searchMultiCategoryDocuments(ctx context.Context, query string) ([]ai.Document, error) { + filter := "category IN ('database', 'technology') AND source != 'deprecated'" + return advancedRetrieve(ctx, query, 10, filter) + } + + // Usage example + func performSearches(ctx context.Context) error { + // Basic search + docs, err := retrieveDocuments(ctx, "What are managed database benefits?") + if err != nil { + return err + } + + // Filtered search + dbDocs, err := searchDatabaseDocuments(ctx, "cloud database services") + if err != nil { + return err + } + + // Complex search + multiDocs, err := searchMultiCategoryDocuments(ctx, "database scaling solutions") + if err != nil { + return err + } + + fmt.Printf("Found %d basic, %d database, %d multi-category documents\n", + len(docs), len(dbDocs), len(multiDocs)) + return nil + } + ``` + + + Retrieve documents with advanced filtering and distance strategies: + + ```python + from typing import List, Dict, Any, Optional + + # Basic retrieval + async def retrieve_documents(query: str, table_name: str = "documents", k: int = 5) -> List[Dict[str, Any]]: + try: + retriever = f"cloudsql/{table_name}" + docs = await ai.retrieve( + retriever=retriever, + query=query, + options={"k": k} + ) + return docs + except Exception as error: + print(f"Retrieval failed: {error}") + return [] + + # Advanced retrieval with SQL filtering + async def advanced_retrieve( + query: str, + table_name: str = "documents", + k: int = 5, + filter_clause: Optional[str] = None + ) -> List[Dict[str, Any]]: + try: + retriever = f"cloudsql/{table_name}" + + options = {"k": k} + if filter_clause: + options["filter"] = filter_clause + + docs = await ai.retrieve( + retriever=retriever, + query=query, + options=options + ) + + return docs + except Exception as error: + print(f"Advanced retrieval failed: {error}") + return [] + + # Specific search functions + async def search_database_documents(query: str) -> List[Dict[str, Any]]: + filter_clause = "category = 'database' AND source = 'documentation'" + return await advanced_retrieve(query, k=3, filter_clause=filter_clause) + + async def search_multi_category_documents(query: str) -> List[Dict[str, Any]]: + filter_clause = "category IN ('database', 'technology') AND source != 'deprecated'" + return await advanced_retrieve(query, k=10, filter_clause=filter_clause) + + # Comprehensive search example + async def perform_comprehensive_search(): + # Basic search + basic_docs = await retrieve_documents("What are managed database benefits?", k=5) + + # Database-specific search + db_docs = await search_database_documents("cloud database services") + + # Multi-category search + multi_docs = await search_multi_category_documents("database scaling solutions") + + return { + "basic_search": basic_docs, + "database_search": db_docs, + "multi_category_search": multi_docs, + "total_results": len(basic_docs) + len(db_docs) + len(multi_docs) + } + ``` + + + +## Advanced Features + +### Custom Table Configuration + + + + Configure custom table schemas for specific use cases: + + ```ts + // Advanced table configuration + await engine.initVectorstoreTable('custom_documents', 1536, { + schemaName: 'ai_data', + contentColumn: 'document_text', + embeddingColumn: 'text_embedding', + idColumn: 'document_id', + metadataColumns: [ + { name: 'title', dataType: 'TEXT' }, + { name: 'author', dataType: 'TEXT' }, + { name: 'created_date', dataType: 'TIMESTAMP' }, + { name: 'version', dataType: 'INTEGER' }, + { name: 'tags', dataType: 'TEXT[]' }, + { name: 'score', dataType: 'REAL' }, + ], + metadataJsonColumn: 'additional_metadata', + storeMetadata: true, + overwriteExisting: false, + }); + + // Configure retriever for custom table + const customRetriever = postgresRetrieverRef({ + tableName: 'custom_documents', + schemaName: 'ai_data', + contentColumn: 'document_text', + embeddingColumn: 'text_embedding', + idColumn: 'document_id', + metadataColumns: ['title', 'author', 'created_date', 'version', 'tags', 'score'], + metadataJsonColumn: 'additional_metadata', + distanceStrategy: DistanceStrategy.COSINE_DISTANCE, + }); + + // Advanced filtering with custom columns + const advancedSearch = await ai.retrieve({ + retriever: customRetriever, + query: "latest documentation updates", + options: { + k: 5, + filter: ` + created_date >= '2024-01-01' + AND version >= 2 + AND score > 0.8 + AND 'technical' = ANY(tags) + `, + }, + }); + ``` + + + Configure custom table schemas for specific use cases: + + ```go + // Custom table configuration + type CustomTableConfig struct { + SchemaName string + ContentColumn string + EmbeddingColumn string + IDColumn string + MetadataColumns []string + MetadataJSONColumn string + DistanceStrategy string + } + + func setupCustomTable(ctx context.Context, db *sql.DB) error { + // Create custom table with advanced schema + createTableSQL := ` + CREATE TABLE IF NOT EXISTS ai_data.custom_documents ( + document_id TEXT PRIMARY KEY, + document_text TEXT NOT NULL, + text_embedding vector(1536), + title TEXT, + author TEXT, + created_date TIMESTAMP, + version INTEGER, + tags TEXT[], + score REAL, + additional_metadata JSONB + ); + + CREATE INDEX IF NOT EXISTS custom_documents_embedding_idx + ON ai_data.custom_documents + USING ivfflat (text_embedding vector_cosine_ops) + WITH (lists = 100); + ` + + _, err := db.ExecContext(ctx, createTableSQL) + return err + } + + // Advanced retrieval with custom filtering + func advancedCustomRetrieve(ctx context.Context, query string) ([]ai.Document, error) { + filter := ` + created_date >= '2024-01-01' + AND version >= 2 + AND score > 0.8 + AND 'technical' = ANY(tags) + ` + + docs, err := genkit.Retrieve(ctx, g, + ai.WithRetriever("cloudsql/custom_documents"), + ai.WithQuery(query), + ai.WithOptions(map[string]interface{}{ + "k": 5, + "filter": filter, + "schema": "ai_data", + }), + ) + if err != nil { + return nil, fmt.Errorf("custom retrieval failed: %w", err) + } + + return docs, nil + } + ``` + + + Configure custom table schemas for specific use cases: + + ```python + # Custom table configuration + async def setup_custom_table(connection): + """Set up a custom table with advanced schema""" + cursor = connection.cursor() + + try: + # Create custom schema and table + cursor.execute(""" + CREATE SCHEMA IF NOT EXISTS ai_data; + + CREATE TABLE IF NOT EXISTS ai_data.custom_documents ( + document_id TEXT PRIMARY KEY, + document_text TEXT NOT NULL, + text_embedding vector(1536), + title TEXT, + author TEXT, + created_date TIMESTAMP, + version INTEGER, + tags TEXT[], + score REAL, + additional_metadata JSONB + ); + + CREATE INDEX IF NOT EXISTS custom_documents_embedding_idx + ON ai_data.custom_documents + USING ivfflat (text_embedding vector_cosine_ops) + WITH (lists = 100); + """) + + connection.commit() + return {"success": True} + except Exception as error: + connection.rollback() + print(f"Custom table setup failed: {error}") + return {"success": False, "error": str(error)} + finally: + cursor.close() + + # Advanced retrieval with custom filtering + async def advanced_custom_retrieve(query: str) -> List[Dict[str, Any]]: + filter_clause = """ + created_date >= '2024-01-01' + AND version >= 2 + AND score > 0.8 + AND 'technical' = ANY(tags) + """ + + try: + retriever = "cloudsql/custom_documents" + docs = await ai.retrieve( + retriever=retriever, + query=query, + options={ + "k": 5, + "filter": filter_clause, + "schema": "ai_data" + } + ) + return docs + except Exception as error: + print(f"Custom retrieval failed: {error}") + return [] + + # Complex metadata search + async def search_by_metadata_criteria( + query: str, + min_score: float = 0.8, + min_version: int = 2, + required_tags: List[str] = None, + date_range: tuple = None + ) -> List[Dict[str, Any]]: + """Search with complex metadata criteria""" + + filter_parts = [f"score >= {min_score}", f"version >= {min_version}"] + + if required_tags: + tag_conditions = " OR ".join([f"'{tag}' = ANY(tags)" for tag in required_tags]) + filter_parts.append(f"({tag_conditions})") + + if date_range: + start_date, end_date = date_range + filter_parts.append(f"created_date BETWEEN '{start_date}' AND '{end_date}'") + + filter_clause = " AND ".join(filter_parts) + + return await advanced_retrieve( + query=query, + table_name="custom_documents", + k=10, + filter_clause=filter_clause + ) + ``` + + + +## Best Practices + +### Database Configuration + +1. **Instance Sizing**: Choose appropriate machine types for your workload +2. **Storage Configuration**: Use SSD storage for better performance +3. **Connection Pooling**: Configure connection pooling for high-traffic applications +4. **Backup Strategy**: Set up automated backups and point-in-time recovery + +### Vector Optimization + +1. **Index Configuration**: Optimize pgvector index parameters for your data +2. **Embedding Dimensions**: Match vector dimensions to your embedding model +3. **Distance Strategy**: Choose the right distance function for your use case +4. **Batch Operations**: Use appropriate batch sizes for indexing + +### Performance Optimization + +1. **Query Optimization**: Use efficient SQL filters to reduce search space +2. **Index Management**: Monitor and maintain vector indexes +3. **Connection Management**: Use connection pooling and proper connection lifecycle +4. **Monitoring**: Set up Cloud Monitoring for database metrics + +### Production Deployment + +1. **High Availability**: Configure regional persistent disks and failover replicas +2. **Security**: Use private IP, SSL connections, and IAM authentication +3. **Scaling**: Configure read replicas for read-heavy workloads +4. **Maintenance**: Schedule maintenance windows and updates + +### Cost Optimization + +1. **Right-sizing**: Monitor resource usage and adjust instance sizes +2. **Storage Management**: Use appropriate storage types and sizes +3. **Connection Efficiency**: Minimize connection overhead +4. **Query Efficiency**: Optimize queries to reduce compute costs + +## Next Steps + +- Learn about [RAG implementation](/unified-docs/rag) to build complete retrieval-augmented generation systems +- Explore [creating flows](/unified-docs/creating-flows) to build structured AI workflows with managed vector search +- See [deployment guides](/unified-docs/deployment) for production deployment strategies +- Check out other vector database options for different use cases diff --git a/src/content/docs/unified-docs/vector-databases/lancedb.mdx b/src/content/docs/unified-docs/vector-databases/lancedb.mdx new file mode 100644 index 00000000..6bf4898b --- /dev/null +++ b/src/content/docs/unified-docs/vector-databases/lancedb.mdx @@ -0,0 +1,913 @@ +--- +title: LanceDB Vector Database +description: Learn how to use LanceDB with Genkit across JavaScript, Go, and Python for embedded vector storage, semantic search, and RAG applications. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +LanceDB is an open-source vector database designed for AI applications. It provides embedded vector storage with high performance, making it ideal for applications that need fast vector similarity search without the complexity of managing a separate database server. + +## Installation and Setup + + + + Install the LanceDB plugin: + + ```bash + npm install genkitx-lancedb + ``` + + Configure the plugin when initializing Genkit: + + ```ts + import { genkit } from 'genkit'; + import { lancedb } from 'genkitx-lancedb'; + import { googleAI } from '@genkit-ai/googleai'; + + const ai = genkit({ + plugins: [ + googleAI(), + lancedb([ + { + dbUri: '.db', // Database directory + tableName: 'documents', + embedder: googleAI.embedder('gemini-embedding-001'), + }, + ]), + ], + }); + ``` + + ### Configuration Options + + ```ts + // Advanced configuration + const ai = genkit({ + plugins: [ + googleAI(), + lancedb([ + { + dbUri: './vector-db', // Custom database directory + tableName: 'my-documents', + embedder: googleAI.embedder('gemini-embedding-001'), + // Additional LanceDB options can be specified here + }, + ]), + ], + }); + ``` + + + For Go applications, you can use LanceDB through the Go client: + + ```bash + go get github.com/lancedb/lancedb-go + ``` + + ```go + package main + + import ( + "context" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/lancedb" + "github.com/firebase/genkit/go/plugins/googleai" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins( + &googleai.GoogleAI{}, + &lancedb.LanceDB{ + DatabaseURI: "./vector-db", + Tables: []lancedb.TableConfig{ + { + Name: "documents", + Embedder: "googleai/gemini-embedding-001", + }, + }, + }, + ), + ) + if err != nil { + log.Fatal(err) + } + } + ``` + + + For Python applications, install the LanceDB client: + + ```bash + pip install lancedb genkit-plugin-lancedb + ``` + + ```python + from genkit.ai import Genkit + from genkit.plugins.lancedb import LanceDB + from genkit.plugins.google_genai import GoogleGenAI + + ai = Genkit( + plugins=[ + GoogleGenAI(), + LanceDB( + database_uri="./vector-db", + tables=[ + { + "name": "documents", + "embedder": "googleai/gemini-embedding-001", + } + ], + ), + ], + ) + ``` + + + +## Basic Usage + +### Document Indexing + + + + Index documents for semantic search: + + ```ts + import { lancedbIndexerRef, WriteMode } from 'genkitx-lancedb'; + import { Document } from 'genkit'; + + // Create indexer reference + const documentsIndexer = lancedbIndexerRef({ + tableName: 'documents', + }); + + // Prepare documents for indexing + const documents: Document[] = [ + { + content: 'LanceDB is an open-source vector database for AI applications.', + metadata: { + title: 'LanceDB Overview', + category: 'database', + source: 'documentation', + }, + }, + { + content: 'Embedded vector databases provide fast local search capabilities.', + metadata: { + title: 'Embedded Databases', + category: 'technology', + source: 'blog', + }, + }, + ]; + + // Index documents + await ai.index({ + indexer: documentsIndexer, + documents, + options: { + writeMode: WriteMode.Overwrite, // or WriteMode.Append + }, + }); + + // Batch indexing for better performance + const batchSize = 100; + for (let i = 0; i < largeDocumentSet.length; i += batchSize) { + const batch = largeDocumentSet.slice(i, i + batchSize); + await ai.index({ + indexer: documentsIndexer, + documents: batch, + options: { + writeMode: i === 0 ? WriteMode.Overwrite : WriteMode.Append, + }, + }); + } + ``` + + + Index documents for semantic search: + + ```go + import ( + "context" + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + ) + + func indexDocuments(ctx context.Context) error { + documents := []ai.Document{ + { + Content: "LanceDB is an open-source vector database for AI applications.", + Metadata: map[string]interface{}{ + "title": "LanceDB Overview", + "category": "database", + "source": "documentation", + }, + }, + { + Content: "Embedded vector databases provide fast local search capabilities.", + Metadata: map[string]interface{}{ + "title": "Embedded Databases", + "category": "technology", + "source": "blog", + }, + }, + } + + // Index documents + err := genkit.Index(ctx, g, + ai.WithIndexer("lancedb/documents"), + ai.WithDocuments(documents), + ai.WithOptions(map[string]interface{}{ + "writeMode": "overwrite", // or "append" + }), + ) + if err != nil { + return fmt.Errorf("failed to index documents: %w", err) + } + + return nil + } + + // Batch indexing + func batchIndexDocuments(ctx context.Context, documents []ai.Document, batchSize int) error { + for i := 0; i < len(documents); i += batchSize { + end := i + batchSize + if end > len(documents) { + end = len(documents) + } + + batch := documents[i:end] + writeMode := "append" + if i == 0 { + writeMode = "overwrite" + } + + err := genkit.Index(ctx, g, + ai.WithIndexer("lancedb/documents"), + ai.WithDocuments(batch), + ai.WithOptions(map[string]interface{}{ + "writeMode": writeMode, + }), + ) + if err != nil { + return fmt.Errorf("failed to index batch: %w", err) + } + } + return nil + } + ``` + + + Index documents for semantic search: + + ```python + from typing import List, Dict, Any + + # Prepare documents for indexing + documents = [ + { + "content": "LanceDB is an open-source vector database for AI applications.", + "metadata": { + "title": "LanceDB Overview", + "category": "database", + "source": "documentation", + }, + }, + { + "content": "Embedded vector databases provide fast local search capabilities.", + "metadata": { + "title": "Embedded Databases", + "category": "technology", + "source": "blog", + }, + }, + ] + + # Index documents + async def index_documents(docs: List[Dict[str, Any]], table_name: str = "documents"): + try: + indexer = f"lancedb/{table_name}" + + await ai.index( + indexer=indexer, + documents=docs, + options={ + "write_mode": "overwrite" # or "append" + } + ) + + return {"indexed": len(docs), "success": True} + except Exception as error: + print(f"Indexing failed: {error}") + return {"indexed": 0, "success": False} + + # Batch indexing for better performance + async def batch_index_documents( + docs: List[Dict[str, Any]], + table_name: str = "documents", + batch_size: int = 100 + ): + total_indexed = 0 + + for i in range(0, len(docs), batch_size): + batch = docs[i:i + batch_size] + write_mode = "overwrite" if i == 0 else "append" + + try: + await ai.index( + indexer=f"lancedb/{table_name}", + documents=batch, + options={"write_mode": write_mode} + ) + total_indexed += len(batch) + except Exception as error: + print(f"Batch indexing failed: {error}") + break + + return {"indexed": total_indexed, "success": total_indexed == len(docs)} + ``` + + + +### Document Retrieval + + + + Retrieve relevant documents using semantic search: + + ```ts + import { lancedbRetrieverRef } from 'genkitx-lancedb'; + + // Create retriever reference + const documentsRetriever = lancedbRetrieverRef({ + tableName: 'documents', + displayName: 'Documents', + }); + + // Basic retrieval + const query = "What is an embedded vector database?"; + const docs = await ai.retrieve({ + retriever: documentsRetriever, + query, + options: { + k: 5, // Number of documents to retrieve + }, + }); + + console.log('Retrieved documents:', docs); + + // Advanced retrieval with filtering + const filteredDocs = await ai.retrieve({ + retriever: documentsRetriever, + query, + options: { + k: 3, + filter: { + category: 'database', // Metadata filtering + }, + }, + }); + ``` + + + Retrieve relevant documents using semantic search: + + ```go + // Basic retrieval + func retrieveDocuments(ctx context.Context, query string) ([]ai.Document, error) { + docs, err := genkit.Retrieve(ctx, g, + ai.WithRetriever("lancedb/documents"), + ai.WithQuery(query), + ai.WithOptions(map[string]interface{}{ + "k": 5, + }), + ) + if err != nil { + return nil, fmt.Errorf("retrieval failed: %w", err) + } + + return docs, nil + } + + // Advanced retrieval with filtering + func advancedRetrieve(ctx context.Context, query string, limit int, filter map[string]interface{}) ([]ai.Document, error) { + docs, err := genkit.Retrieve(ctx, g, + ai.WithRetriever("lancedb/documents"), + ai.WithQuery(query), + ai.WithOptions(map[string]interface{}{ + "k": limit, + "filter": filter, + }), + ) + if err != nil { + return nil, fmt.Errorf("advanced retrieval failed: %w", err) + } + + return docs, nil + } + + // Usage example + func searchDocuments(ctx context.Context) error { + // Basic search + docs, err := retrieveDocuments(ctx, "What is an embedded vector database?") + if err != nil { + return err + } + + // Filtered search + filteredDocs, err := advancedRetrieve(ctx, + "database concepts", + 3, + map[string]interface{}{ + "category": "database", + }, + ) + if err != nil { + return err + } + + fmt.Printf("Found %d documents, %d filtered\n", len(docs), len(filteredDocs)) + return nil + } + ``` + + + Retrieve relevant documents using semantic search: + + ```python + from typing import List, Dict, Any, Optional + + # Basic retrieval + async def retrieve_documents(query: str, table_name: str = "documents", k: int = 5) -> List[Dict[str, Any]]: + try: + retriever = f"lancedb/{table_name}" + docs = await ai.retrieve( + retriever=retriever, + query=query, + options={"k": k} + ) + return docs + except Exception as error: + print(f"Retrieval failed: {error}") + return [] + + # Advanced retrieval with filtering + async def advanced_retrieve( + query: str, + table_name: str = "documents", + k: int = 5, + filter_criteria: Optional[Dict[str, Any]] = None + ) -> List[Dict[str, Any]]: + try: + retriever = f"lancedb/{table_name}" + + options = {"k": k} + if filter_criteria: + options["filter"] = filter_criteria + + docs = await ai.retrieve( + retriever=retriever, + query=query, + options=options + ) + + return docs + except Exception as error: + print(f"Advanced retrieval failed: {error}") + return [] + + # Usage examples + async def search_examples(): + # Basic search + docs = await retrieve_documents("What is an embedded vector database?", k=5) + + # Filtered search + filtered_docs = await advanced_retrieve( + query="database concepts", + k=3, + filter_criteria={"category": "database"} + ) + + print(f"Found {len(docs)} documents, {len(filtered_docs)} filtered") + return docs, filtered_docs + ``` + + + +## Advanced Features + +### Complete RAG Implementation + + + + Build a complete RAG system with document processing: + + ```ts + import { lancedbIndexerRef, lancedbRetrieverRef, WriteMode } from 'genkitx-lancedb'; + import { chunk } from 'llm-chunk'; + import { readFile } from 'fs/promises'; + import pdf from 'pdf-parse'; + + // Document processing configuration + const chunkingConfig = { + minLength: 1000, + maxLength: 2000, + splitter: 'sentence', + overlap: 100, + }; + + // PDF text extraction + async function extractTextFromPdf(filePath: string): Promise { + const dataBuffer = await readFile(filePath); + const data = await pdf(dataBuffer); + return data.text; + } + + // Document indexing flow + export const indexDocumentFlow = ai.defineFlow( + { + name: 'indexDocument', + inputSchema: z.object({ + filePath: z.string(), + tableName: z.string().optional().default('documents'), + }), + outputSchema: z.object({ + success: z.boolean(), + documentsIndexed: z.number(), + error: z.string().optional(), + }), + }, + async ({ filePath, tableName }) => { + try { + // Extract text from PDF + const pdfText = await ai.run('extract-text', () => + extractTextFromPdf(filePath) + ); + + // Chunk the text + const chunks = await ai.run('chunk-text', () => + chunk(pdfText, chunkingConfig) + ); + + // Convert to documents + const documents = chunks.map((text, index) => ({ + content: text, + metadata: { + filePath, + chunkIndex: index, + source: 'pdf', + }, + })); + + // Index documents + const indexer = lancedbIndexerRef({ tableName }); + await ai.index({ + indexer, + documents, + options: { writeMode: WriteMode.Overwrite }, + }); + + return { + success: true, + documentsIndexed: documents.length, + }; + } catch (error) { + return { + success: false, + documentsIndexed: 0, + error: error instanceof Error ? error.message : String(error), + }; + } + }, + ); + + // RAG query flow + export const ragQueryFlow = ai.defineFlow( + { + name: 'ragQuery', + inputSchema: z.object({ + query: z.string(), + tableName: z.string().optional().default('documents'), + }), + outputSchema: z.object({ + answer: z.string(), + sources: z.array(z.string()), + }), + }, + async ({ query, tableName }) => { + // Retrieve relevant documents + const retriever = lancedbRetrieverRef({ tableName }); + const docs = await ai.retrieve({ + retriever, + query, + options: { k: 3 }, + }); + + // Generate answer using retrieved context + const { text } = await ai.generate({ + model: googleAI.model('gemini-2.5-flash'), + prompt: ` + Answer the following question using only the provided context. + If you cannot answer based on the context, say so. + + Context: + ${docs.map(doc => doc.content).join('\n\n')} + + Question: ${query} + `, + }); + + return { + answer: text, + sources: docs.map(doc => doc.metadata?.filePath || 'unknown').filter(Boolean), + }; + }, + ); + ``` + + + Build a complete RAG system with document processing: + + ```go + import ( + "context" + "fmt" + "strings" + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + ) + + // Document processing and indexing + func indexDocumentFromText(ctx context.Context, text, source string, tableName string) error { + // Simple text chunking (in production, use a proper chunking library) + chunks := chunkText(text, 1000, 100) + + var documents []ai.Document + for i, chunk := range chunks { + documents = append(documents, ai.Document{ + Content: chunk, + Metadata: map[string]interface{}{ + "source": source, + "chunkIndex": i, + }, + }) + } + + // Index documents + err := genkit.Index(ctx, g, + ai.WithIndexer(fmt.Sprintf("lancedb/%s", tableName)), + ai.WithDocuments(documents), + ai.WithOptions(map[string]interface{}{ + "writeMode": "overwrite", + }), + ) + if err != nil { + return fmt.Errorf("failed to index documents: %w", err) + } + + return nil + } + + // Simple text chunking function + func chunkText(text string, chunkSize, overlap int) []string { + words := strings.Fields(text) + var chunks []string + + for i := 0; i < len(words); i += chunkSize - overlap { + end := i + chunkSize + if end > len(words) { + end = len(words) + } + + chunk := strings.Join(words[i:end], " ") + chunks = append(chunks, chunk) + + if end == len(words) { + break + } + } + + return chunks + } + + // RAG query function + func performRAGQuery(ctx context.Context, query, tableName string) (string, []string, error) { + // Retrieve relevant documents + docs, err := genkit.Retrieve(ctx, g, + ai.WithRetriever(fmt.Sprintf("lancedb/%s", tableName)), + ai.WithQuery(query), + ai.WithOptions(map[string]interface{}{ + "k": 3, + }), + ) + if err != nil { + return "", nil, fmt.Errorf("retrieval failed: %w", err) + } + + // Build context from retrieved documents + var contextParts []string + var sources []string + for _, doc := range docs { + contextParts = append(contextParts, doc.Content) + if source, ok := doc.Metadata["source"].(string); ok { + sources = append(sources, source) + } + } + context := strings.Join(contextParts, "\n\n") + + // Generate answer + prompt := fmt.Sprintf(` + Answer the following question using only the provided context. + If you cannot answer based on the context, say so. + + Context: + %s + + Question: %s + `, context, query) + + resp, err := genkit.Generate(ctx, g, + ai.WithModel("googleai/gemini-2.5-flash"), + ai.WithPrompt(prompt), + ) + if err != nil { + return "", nil, fmt.Errorf("generation failed: %w", err) + } + + return resp.Text(), sources, nil + } + ``` + + + Build a complete RAG system with document processing: + + ```python + import re + from typing import List, Dict, Any, Tuple + + # Simple text chunking function + def chunk_text(text: str, chunk_size: int = 1000, overlap: int = 100) -> List[str]: + words = text.split() + chunks = [] + + for i in range(0, len(words), chunk_size - overlap): + end = min(i + chunk_size, len(words)) + chunk = ' '.join(words[i:end]) + chunks.append(chunk) + + if end == len(words): + break + + return chunks + + # Document indexing function + async def index_document_from_text( + text: str, + source: str, + table_name: str = "documents" + ) -> Dict[str, Any]: + try: + # Chunk the text + chunks = chunk_text(text, chunk_size=1000, overlap=100) + + # Convert to documents + documents = [ + { + "content": chunk, + "metadata": { + "source": source, + "chunk_index": i, + } + } + for i, chunk in enumerate(chunks) + ] + + # Index documents + await ai.index( + indexer=f"lancedb/{table_name}", + documents=documents, + options={"write_mode": "overwrite"} + ) + + return { + "success": True, + "documents_indexed": len(documents), + } + except Exception as error: + return { + "success": False, + "documents_indexed": 0, + "error": str(error), + } + + # RAG query function + async def perform_rag_query( + query: str, + table_name: str = "documents", + k: int = 3 + ) -> Tuple[str, List[str]]: + try: + # Retrieve relevant documents + docs = await ai.retrieve( + retriever=f"lancedb/{table_name}", + query=query, + options={"k": k} + ) + + # Build context and collect sources + context_parts = [doc["content"] for doc in docs] + context = "\n\n".join(context_parts) + + sources = [ + doc.get("metadata", {}).get("source", "unknown") + for doc in docs + ] + + # Generate answer + prompt = f""" + Answer the following question using only the provided context. + If you cannot answer based on the context, say so. + + Context: + {context} + + Question: {query} + """ + + response = await ai.generate( + model="googleai/gemini-2.5-flash", + prompt=prompt + ) + + return response.text, list(set(sources)) # Remove duplicates + + except Exception as error: + print(f"RAG query failed: {error}") + return "I'm sorry, I couldn't process your query.", [] + + # Complete RAG workflow example + async def rag_workflow_example(): + # Index a document + sample_text = """ + LanceDB is an open-source vector database that provides embedded + vector storage capabilities. It's designed for AI applications + that need fast similarity search without managing a separate + database server. + """ + + index_result = await index_document_from_text( + text=sample_text, + source="lancedb_overview.txt", + table_name="knowledge_base" + ) + + if index_result["success"]: + # Query the indexed documents + answer, sources = await perform_rag_query( + query="What is LanceDB?", + table_name="knowledge_base" + ) + + return { + "answer": answer, + "sources": sources, + "indexed_documents": index_result["documents_indexed"] + } + else: + return {"error": "Failed to index documents"} + ``` + + + +## Best Practices + +### Performance Optimization + +1. **Batch operations**: Index documents in batches for better performance +2. **Appropriate chunk sizes**: Balance between context and retrieval precision +3. **Embedding model selection**: Choose models that match your use case +4. **Database location**: Use local storage for development, consider cloud storage for production + +### Data Management + +1. **Write modes**: Use `Overwrite` for complete rebuilds, `Append` for incremental updates +2. **Metadata design**: Structure metadata for effective filtering +3. **Version control**: Track document versions and updates +4. **Backup strategies**: Regular backups of the database directory + +### Production Deployment + +1. **Database persistence**: Ensure database directory is persistent in containerized environments +2. **Resource allocation**: Allocate sufficient memory for large datasets +3. **Monitoring**: Track query performance and database size +4. **Scaling**: Consider partitioning large datasets across multiple tables + +## Next Steps + +- Learn about [RAG implementation](/unified-docs/rag) to build complete retrieval-augmented generation systems +- Explore [creating flows](/unified-docs/creating-flows) to build structured AI workflows with vector search +- See [deployment guides](/unified-docs/deployment) for production deployment strategies +- Check out other vector database options for different use cases diff --git a/src/content/docs/unified-docs/vector-databases/neo4j.mdx b/src/content/docs/unified-docs/vector-databases/neo4j.mdx new file mode 100644 index 00000000..c258dbea --- /dev/null +++ b/src/content/docs/unified-docs/vector-databases/neo4j.mdx @@ -0,0 +1,726 @@ +--- +title: Neo4j Graph Vector Database +description: Learn how to use Neo4j with Genkit across JavaScript, Go, and Python for graph-based vector storage, semantic search, and knowledge graph applications. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +Neo4j is a graph database that combines the power of graph relationships with vector search capabilities. It enables you to store documents as nodes with vector embeddings while maintaining rich relationships between entities, making it ideal for knowledge graphs, recommendation systems, and complex AI applications that need both semantic search and graph traversal. + +## Installation and Setup + + + + Install the Neo4j plugin: + + ```bash + npm install genkitx-neo4j + ``` + + Configure the plugin when initializing Genkit: + + ```ts + import { genkit } from 'genkit'; + import { neo4j } from 'genkitx-neo4j'; + import { googleAI } from '@genkit-ai/googleai'; + + const ai = genkit({ + plugins: [ + googleAI(), + neo4j([ + { + indexId: 'documents-index', + embedder: googleAI.embedder('gemini-embedding-001'), + clientParams: { + url: process.env.NEO4J_URI || 'bolt://localhost:7687', + username: process.env.NEO4J_USERNAME || 'neo4j', + password: process.env.NEO4J_PASSWORD, + database: process.env.NEO4J_DATABASE || 'neo4j', + }, + }, + ]), + ], + }); + ``` + + ### Environment Variables + + ```bash + export NEO4J_URI=bolt://localhost:7687 + export NEO4J_USERNAME=neo4j + export NEO4J_PASSWORD=your_password + export NEO4J_DATABASE=neo4j + ``` + + ### Prerequisites + + 1. **Neo4j Database**: Install Neo4j Desktop, Neo4j AuraDB, or run Neo4j in Docker + 2. **Vector Index**: Create a vector index in your Neo4j database + 3. **Credentials**: Configure authentication credentials + + ### Creating Vector Index in Neo4j + + ```cypher + // Create a vector index for document embeddings + CREATE VECTOR INDEX documents_vector_index + FOR (n:Document) + ON n.embedding + OPTIONS {indexConfig: { + `vector.dimensions`: 768, + `vector.similarity_function`: 'cosine' + }} + ``` + + + For Go applications, you can use Neo4j through the official Go driver: + + ```bash + go get github.com/neo4j/neo4j-go-driver/v5/neo4j + ``` + + ```go + package main + + import ( + "context" + "os" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/neo4j" + "github.com/firebase/genkit/go/plugins/googleai" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins( + &googleai.GoogleAI{}, + &neo4j.Neo4j{ + URI: os.Getenv("NEO4J_URI"), + Username: os.Getenv("NEO4J_USERNAME"), + Password: os.Getenv("NEO4J_PASSWORD"), + Database: os.Getenv("NEO4J_DATABASE"), + Indexes: []neo4j.IndexConfig{ + { + Name: "documents-index", + Embedder: "googleai/gemini-embedding-001", + }, + }, + }, + ), + ) + if err != nil { + log.Fatal(err) + } + } + ``` + + + For Python applications, install the Neo4j driver: + + ```bash + pip install neo4j genkit-plugin-neo4j + ``` + + ```python + import os + from genkit.ai import Genkit + from genkit.plugins.neo4j import Neo4j + from genkit.plugins.google_genai import GoogleGenAI + + ai = Genkit( + plugins=[ + GoogleGenAI(), + Neo4j( + uri=os.getenv("NEO4J_URI", "bolt://localhost:7687"), + username=os.getenv("NEO4J_USERNAME", "neo4j"), + password=os.getenv("NEO4J_PASSWORD"), + database=os.getenv("NEO4J_DATABASE", "neo4j"), + indexes=[ + { + "name": "documents-index", + "embedder": "googleai/gemini-embedding-001", + } + ], + ), + ], + ) + ``` + + + +## Basic Usage + +### Document Indexing + + + + Index documents as graph nodes with vector embeddings: + + ```ts + import { neo4jIndexerRef } from 'genkitx-neo4j'; + import { Document } from 'genkit'; + + // Create indexer reference + const documentsIndexer = neo4jIndexerRef({ + indexId: 'documents-index', + }); + + // Prepare documents for indexing + const documents: Document[] = [ + { + content: 'Neo4j combines graph database capabilities with vector search.', + metadata: { + title: 'Neo4j Overview', + category: 'database', + author: 'Neo4j Team', + tags: ['graph', 'vector', 'database'], + nodeId: 'doc-1', + }, + }, + { + content: 'Knowledge graphs represent entities and their relationships.', + metadata: { + title: 'Knowledge Graphs', + category: 'technology', + author: 'AI Researcher', + tags: ['knowledge', 'graph', 'ai'], + nodeId: 'doc-2', + }, + }, + ]; + + // Index documents + await ai.index({ + indexer: documentsIndexer, + documents, + }); + + // Create relationships between documents + const createRelationships = async () => { + // This would typically be done through direct Neo4j queries + // after indexing to establish relationships between nodes + console.log('Documents indexed as graph nodes with embeddings'); + }; + ``` + + + Index documents as graph nodes with vector embeddings: + + ```go + import ( + "context" + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + ) + + func indexDocuments(ctx context.Context) error { + documents := []ai.Document{ + { + Content: "Neo4j combines graph database capabilities with vector search.", + Metadata: map[string]interface{}{ + "title": "Neo4j Overview", + "category": "database", + "author": "Neo4j Team", + "tags": []string{"graph", "vector", "database"}, + "nodeId": "doc-1", + }, + }, + { + Content: "Knowledge graphs represent entities and their relationships.", + Metadata: map[string]interface{}{ + "title": "Knowledge Graphs", + "category": "technology", + "author": "AI Researcher", + "tags": []string{"knowledge", "graph", "ai"}, + "nodeId": "doc-2", + }, + }, + } + + // Index documents + err := genkit.Index(ctx, g, + ai.WithIndexer("neo4j/documents-index"), + ai.WithDocuments(documents), + ) + if err != nil { + return fmt.Errorf("failed to index documents: %w", err) + } + + return nil + } + + // Create relationships between indexed documents + func createDocumentRelationships(ctx context.Context) error { + // This would typically involve direct Neo4j queries + // to establish relationships between document nodes + fmt.Println("Documents indexed as graph nodes with embeddings") + return nil + } + ``` + + + Index documents as graph nodes with vector embeddings: + + ```python + from typing import List, Dict, Any + + # Prepare documents for indexing + documents = [ + { + "content": "Neo4j combines graph database capabilities with vector search.", + "metadata": { + "title": "Neo4j Overview", + "category": "database", + "author": "Neo4j Team", + "tags": ["graph", "vector", "database"], + "node_id": "doc-1", + }, + }, + { + "content": "Knowledge graphs represent entities and their relationships.", + "metadata": { + "title": "Knowledge Graphs", + "category": "technology", + "author": "AI Researcher", + "tags": ["knowledge", "graph", "ai"], + "node_id": "doc-2", + }, + }, + ] + + # Index documents + async def index_documents(docs: List[Dict[str, Any]], index_name: str = "documents-index"): + try: + indexer = f"neo4j/{index_name}" + + await ai.index( + indexer=indexer, + documents=docs + ) + + return {"indexed": len(docs), "success": True} + except Exception as error: + print(f"Indexing failed: {error}") + return {"indexed": 0, "success": False} + + # Create relationships between documents + async def create_document_relationships(): + # This would typically involve direct Neo4j queries + # to establish relationships between document nodes + print("Documents indexed as graph nodes with embeddings") + return {"relationships_created": True} + ``` + + + +### Document Retrieval + + + + Retrieve relevant documents using vector similarity: + + ```ts + import { neo4jRetrieverRef } from 'genkitx-neo4j'; + + // Create retriever reference + const documentsRetriever = neo4jRetrieverRef({ + indexId: 'documents-index', + displayName: 'Knowledge Base', + }); + + // Basic retrieval + const query = "What is a knowledge graph?"; + const docs = await ai.retrieve({ + retriever: documentsRetriever, + query, + options: { + k: 5, // Number of documents to retrieve (max 1000) + }, + }); + + console.log('Retrieved documents:', docs); + + // Advanced retrieval with custom scoring + const advancedDocs = await ai.retrieve({ + retriever: documentsRetriever, + query: "graph database relationships", + options: { + k: 3, + // Additional Neo4j-specific options can be passed here + }, + }); + + // Retrieve with specific author filter (would require custom Cypher) + const authorSpecificRetriever = neo4jRetrieverRef({ + indexId: 'documents-index', + displayName: 'Author-Specific Search', + }); + ``` + + + Retrieve relevant documents using vector similarity: + + ```go + // Basic retrieval + func retrieveDocuments(ctx context.Context, query string) ([]ai.Document, error) { + docs, err := genkit.Retrieve(ctx, g, + ai.WithRetriever("neo4j/documents-index"), + ai.WithQuery(query), + ai.WithOptions(map[string]interface{}{ + "k": 5, + }), + ) + if err != nil { + return nil, fmt.Errorf("retrieval failed: %w", err) + } + + return docs, nil + } + + // Advanced retrieval with custom options + func advancedRetrieve(ctx context.Context, query string, limit int) ([]ai.Document, error) { + docs, err := genkit.Retrieve(ctx, g, + ai.WithRetriever("neo4j/documents-index"), + ai.WithQuery(query), + ai.WithOptions(map[string]interface{}{ + "k": limit, + // Additional Neo4j-specific options + }), + ) + if err != nil { + return nil, fmt.Errorf("advanced retrieval failed: %w", err) + } + + return docs, nil + } + + // Usage example + func searchKnowledgeBase(ctx context.Context) error { + // Basic search + docs, err := retrieveDocuments(ctx, "What is a knowledge graph?") + if err != nil { + return err + } + + // Advanced search + advancedDocs, err := advancedRetrieve(ctx, "graph database relationships", 3) + if err != nil { + return err + } + + fmt.Printf("Found %d documents, %d advanced results\n", len(docs), len(advancedDocs)) + return nil + } + ``` + + + Retrieve relevant documents using vector similarity: + + ```python + from typing import List, Dict, Any, Optional + + # Basic retrieval + async def retrieve_documents(query: str, index_name: str = "documents-index", k: int = 5) -> List[Dict[str, Any]]: + try: + retriever = f"neo4j/{index_name}" + docs = await ai.retrieve( + retriever=retriever, + query=query, + options={"k": k} + ) + return docs + except Exception as error: + print(f"Retrieval failed: {error}") + return [] + + # Advanced retrieval with custom options + async def advanced_retrieve( + query: str, + index_name: str = "documents-index", + k: int = 5, + custom_options: Optional[Dict[str, Any]] = None + ) -> List[Dict[str, Any]]: + try: + retriever = f"neo4j/{index_name}" + + options = {"k": k} + if custom_options: + options.update(custom_options) + + docs = await ai.retrieve( + retriever=retriever, + query=query, + options=options + ) + + return docs + except Exception as error: + print(f"Advanced retrieval failed: {error}") + return [] + + # Usage examples + async def search_knowledge_base(): + # Basic search + docs = await retrieve_documents("What is a knowledge graph?", k=5) + + # Advanced search + advanced_docs = await advanced_retrieve( + query="graph database relationships", + k=3, + custom_options={"include_metadata": True} + ) + + print(f"Found {len(docs)} documents, {len(advanced_docs)} advanced results") + return docs, advanced_docs + ``` + + + +## Advanced Features + +### Graph-Enhanced Retrieval + + + + Combine vector search with graph traversal for enhanced results: + + ```ts + // Custom retrieval that combines vector search with graph relationships + const graphEnhancedRetrieval = async (query: string) => { + // First, perform vector search + const vectorResults = await ai.retrieve({ + retriever: documentsRetriever, + query, + options: { k: 3 }, + }); + + // Then, use Neo4j driver directly for graph traversal + // This would require additional Neo4j driver setup + const enhancedResults = vectorResults.map(doc => ({ + ...doc, + relatedNodes: [], // Would be populated by graph traversal + })); + + return enhancedResults; + }; + + // Knowledge graph construction + const buildKnowledgeGraph = async (documents: Document[]) => { + // Index documents first + await ai.index({ + indexer: documentsIndexer, + documents, + }); + + // Create relationships based on content similarity or metadata + // This would involve direct Cypher queries to Neo4j + console.log('Knowledge graph constructed with vector-enabled nodes'); + }; + + // Entity relationship extraction + const extractEntityRelationships = async (text: string) => { + // Use AI to extract entities and relationships + const { text: entities } = await ai.generate({ + model: googleAI.model('gemini-2.5-flash'), + prompt: `Extract entities and relationships from: ${text} + + Format as JSON with entities and relationships arrays.`, + }); + + return JSON.parse(entities); + }; + ``` + + + Combine vector search with graph traversal for enhanced results: + + ```go + import ( + "encoding/json" + "fmt" + "github.com/neo4j/neo4j-go-driver/v5/neo4j" + ) + + // Graph-enhanced retrieval combining vector search with graph traversal + func graphEnhancedRetrieval(ctx context.Context, query string) ([]map[string]interface{}, error) { + // First, perform vector search + vectorResults, err := genkit.Retrieve(ctx, g, + ai.WithRetriever("neo4j/documents-index"), + ai.WithQuery(query), + ai.WithOptions(map[string]interface{}{"k": 3}), + ) + if err != nil { + return nil, fmt.Errorf("vector search failed: %w", err) + } + + // Enhance with graph traversal (requires Neo4j driver) + enhancedResults := make([]map[string]interface{}, len(vectorResults)) + for i, doc := range vectorResults { + enhancedResults[i] = map[string]interface{}{ + "document": doc, + "relatedNodes": []interface{}{}, // Would be populated by graph traversal + } + } + + return enhancedResults, nil + } + + // Knowledge graph construction + func buildKnowledgeGraph(ctx context.Context, documents []ai.Document) error { + // Index documents first + err := genkit.Index(ctx, g, + ai.WithIndexer("neo4j/documents-index"), + ai.WithDocuments(documents), + ) + if err != nil { + return fmt.Errorf("failed to index documents: %w", err) + } + + // Create relationships based on content similarity or metadata + // This would involve direct Cypher queries to Neo4j + fmt.Println("Knowledge graph constructed with vector-enabled nodes") + return nil + } + + // Entity relationship extraction + func extractEntityRelationships(ctx context.Context, text string) (map[string]interface{}, error) { + prompt := fmt.Sprintf(`Extract entities and relationships from: %s + + Format as JSON with entities and relationships arrays.`, text) + + resp, err := genkit.Generate(ctx, g, + ai.WithModel("googleai/gemini-2.5-flash"), + ai.WithPrompt(prompt), + ) + if err != nil { + return nil, fmt.Errorf("entity extraction failed: %w", err) + } + + var result map[string]interface{} + err = json.Unmarshal([]byte(resp.Text()), &result) + if err != nil { + return nil, fmt.Errorf("failed to parse entities: %w", err) + } + + return result, nil + } + ``` + + + Combine vector search with graph traversal for enhanced results: + + ```python + import json + from typing import List, Dict, Any + + # Graph-enhanced retrieval combining vector search with graph traversal + async def graph_enhanced_retrieval(query: str) -> List[Dict[str, Any]]: + try: + # First, perform vector search + vector_results = await retrieve_documents(query, k=3) + + # Enhance with graph traversal (would require Neo4j driver) + enhanced_results = [] + for doc in vector_results: + enhanced_doc = { + "document": doc, + "related_nodes": [], # Would be populated by graph traversal + } + enhanced_results.append(enhanced_doc) + + return enhanced_results + except Exception as error: + print(f"Graph-enhanced retrieval failed: {error}") + return [] + + # Knowledge graph construction + async def build_knowledge_graph(documents: List[Dict[str, Any]]) -> Dict[str, Any]: + try: + # Index documents first + index_result = await index_documents(documents) + + if index_result["success"]: + # Create relationships based on content similarity or metadata + # This would involve direct Cypher queries to Neo4j + print("Knowledge graph constructed with vector-enabled nodes") + return {"graph_built": True, "nodes": len(documents)} + else: + return {"graph_built": False, "error": "Failed to index documents"} + except Exception as error: + print(f"Knowledge graph construction failed: {error}") + return {"graph_built": False, "error": str(error)} + + # Entity relationship extraction + async def extract_entity_relationships(text: str) -> Dict[str, Any]: + try: + prompt = f"""Extract entities and relationships from: {text} + + Format as JSON with entities and relationships arrays.""" + + response = await ai.generate( + model="googleai/gemini-2.5-flash", + prompt=prompt + ) + + entities = json.loads(response.text) + return entities + except Exception as error: + print(f"Entity extraction failed: {error}") + return {"entities": [], "relationships": []} + + # Comprehensive knowledge graph workflow + async def knowledge_graph_workflow(documents: List[Dict[str, Any]], query: str): + # Build knowledge graph + graph_result = await build_knowledge_graph(documents) + + # Extract entities from query + query_entities = await extract_entity_relationships(query) + + # Perform graph-enhanced retrieval + enhanced_results = await graph_enhanced_retrieval(query) + + return { + "graph_construction": graph_result, + "query_entities": query_entities, + "enhanced_results": enhanced_results + } + ``` + + + +## Best Practices + +### Graph Design + +1. **Node Structure**: Design nodes with meaningful labels and properties +2. **Relationship Types**: Use descriptive relationship types for better traversal +3. **Index Strategy**: Create appropriate vector and property indexes +4. **Schema Design**: Plan your graph schema for optimal query performance + +### Vector Integration + +1. **Embedding Dimensions**: Match vector dimensions to your embedding model +2. **Similarity Functions**: Choose appropriate similarity functions (cosine, euclidean) +3. **Index Configuration**: Optimize vector index settings for your use case +4. **Hybrid Queries**: Combine vector search with graph traversal effectively + +### Performance Optimization + +1. **Query Optimization**: Use efficient Cypher queries for graph operations +2. **Index Management**: Maintain both vector and property indexes +3. **Connection Pooling**: Use connection pooling for better performance +4. **Memory Management**: Monitor memory usage for large graphs + +### Production Deployment + +1. **Clustering**: Use Neo4j clustering for high availability +2. **Backup Strategies**: Implement regular backup procedures +3. **Monitoring**: Monitor query performance and graph metrics +4. **Security**: Implement proper authentication and authorization + +## Next Steps + +- Learn about [RAG implementation](/unified-docs/rag) to build complete retrieval-augmented generation systems +- Explore [creating flows](/unified-docs/creating-flows) to build structured AI workflows with graph-enhanced search +- See [deployment guides](/unified-docs/deployment) for production deployment strategies +- Check out other vector database options for different use cases diff --git a/src/content/docs/unified-docs/vector-databases/pgvector.mdx b/src/content/docs/unified-docs/vector-databases/pgvector.mdx new file mode 100644 index 00000000..7b09d174 --- /dev/null +++ b/src/content/docs/unified-docs/vector-databases/pgvector.mdx @@ -0,0 +1,938 @@ +--- +title: pgvector (PostgreSQL Vector Extension) +description: Learn how to use pgvector with Genkit across JavaScript, Go, and Python for vector storage, semantic search, and RAG applications using PostgreSQL. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +pgvector is a PostgreSQL extension that adds vector similarity search capabilities to PostgreSQL databases. It provides efficient storage and querying of high-dimensional vectors, making it ideal for AI applications that need both relational and vector data in a single database. + +## Installation and Setup + +### PostgreSQL with pgvector Extension + + + + Install the required dependencies: + + ```bash + npm install postgres pgvector + ``` + + Set up your PostgreSQL database with pgvector: + + ```sql + -- Enable the pgvector extension + CREATE EXTENSION IF NOT EXISTS vector; + + -- Create a table for storing documents with embeddings + CREATE TABLE documents ( + id SERIAL PRIMARY KEY, + content TEXT NOT NULL, + embedding vector(768), -- Adjust dimension based on your embedding model + metadata JSONB, + created_at TIMESTAMP DEFAULT NOW() + ); + + -- Create an index for efficient vector similarity search + CREATE INDEX ON documents USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100); + ``` + + Configure the database connection: + + ```ts + import { genkit, z } from 'genkit'; + import { googleAI } from '@genkit-ai/googleai'; + import postgres from 'postgres'; + import { toSql } from 'pgvector'; + + const ai = genkit({ + plugins: [googleAI()], + }); + + const sql = postgres({ + host: 'localhost', + port: 5432, + database: 'your_database', + username: 'your_username', + password: 'your_password', + ssl: false, // Enable for production + }); + ``` + + + Install the required dependencies: + + ```bash + go get github.com/lib/pq + go get github.com/pgvector/pgvector-go + ``` + + Set up your PostgreSQL database with pgvector: + + ```sql + -- Enable the pgvector extension + CREATE EXTENSION IF NOT EXISTS vector; + + -- Create a table for storing documents with embeddings + CREATE TABLE documents ( + id SERIAL PRIMARY KEY, + content TEXT NOT NULL, + embedding vector(768), -- Adjust dimension based on your embedding model + metadata JSONB, + created_at TIMESTAMP DEFAULT NOW() + ); + + -- Create an index for efficient vector similarity search + CREATE INDEX ON documents USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100); + ``` + + Configure the database connection: + + ```go + package main + + import ( + "database/sql" + "context" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/googleai" + "github.com/lib/pq" + "github.com/pgvector/pgvector-go" + ) + + func main() { + ctx := context.Background() + + // Initialize Genkit + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googleai.GoogleAI{}), + ) + if err != nil { + log.Fatal(err) + } + + // Connect to PostgreSQL + db, err := sql.Open("postgres", "postgres://username:password@localhost/dbname?sslmode=disable") + if err != nil { + log.Fatal(err) + } + defer db.Close() + } + ``` + + + Install the required dependencies: + + ```bash + pip install psycopg2-binary pgvector + ``` + + Set up your PostgreSQL database with pgvector: + + ```sql + -- Enable the pgvector extension + CREATE EXTENSION IF NOT EXISTS vector; + + -- Create a table for storing documents with embeddings + CREATE TABLE documents ( + id SERIAL PRIMARY KEY, + content TEXT NOT NULL, + embedding vector(768), -- Adjust dimension based on your embedding model + metadata JSONB, + created_at TIMESTAMP DEFAULT NOW() + ); + + -- Create an index for efficient vector similarity search + CREATE INDEX ON documents USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100); + ``` + + Configure the database connection: + + ```python + import psycopg2 + from pgvector.psycopg2 import register_vector + from genkit.ai import Genkit + from genkit.plugins.google_genai import GoogleGenAI + + # Initialize Genkit + ai = Genkit( + plugins=[GoogleGenAI()], + ) + + # Connect to PostgreSQL + conn = psycopg2.connect( + host="localhost", + database="your_database", + user="your_username", + password="your_password" + ) + + # Register pgvector types + register_vector(conn) + ``` + + + +## Basic Usage + +### Document Indexing + + + + Create a custom indexer for pgvector: + + ```ts + import { Document } from 'genkit'; + + const pgvectorIndexer = ai.defineIndexer( + { + name: 'pgvector-indexer', + configSchema: z.object({ + tableName: z.string().optional().default('documents'), + }), + }, + async (docs: Document[], options) => { + const tableName = options.tableName || 'documents'; + + for (const doc of docs) { + // Generate embedding for the document + const embedding = await ai.embed({ + embedder: googleAI.embedder('gemini-embedding-001'), + content: doc.content, + }); + + // Insert document with embedding into PostgreSQL + await sql` + INSERT INTO ${sql(tableName)} (content, embedding, metadata) + VALUES ( + ${doc.content}, + ${toSql(embedding[0].embedding)}, + ${JSON.stringify(doc.metadata || {})} + ) + `; + } + }, + ); + + // Usage + const documents: Document[] = [ + { + content: 'PostgreSQL is a powerful relational database with vector capabilities.', + metadata: { + title: 'PostgreSQL Overview', + category: 'database', + source: 'documentation', + }, + }, + { + content: 'pgvector extends PostgreSQL with efficient vector similarity search.', + metadata: { + title: 'pgvector Extension', + category: 'technology', + source: 'blog', + }, + }, + ]; + + await ai.index({ + indexer: pgvectorIndexer, + documents, + }); + ``` + + + Create a custom indexer for pgvector: + + ```go + import ( + "context" + "database/sql" + "encoding/json" + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + "github.com/pgvector/pgvector-go" + ) + + func createPgvectorIndexer(db *sql.DB) ai.Indexer { + return genkit.DefineIndexer(g, "pgvector-indexer", + func(ctx context.Context, docs []ai.Document) error { + for _, doc := range docs { + // Generate embedding + embedding, err := genkit.Embed(ctx, g, + ai.WithEmbedder("googleai/gemini-embedding-001"), + ai.WithContent(doc.Content), + ) + if err != nil { + return fmt.Errorf("failed to generate embedding: %w", err) + } + + // Convert metadata to JSON + metadataJSON, err := json.Marshal(doc.Metadata) + if err != nil { + return fmt.Errorf("failed to marshal metadata: %w", err) + } + + // Insert into PostgreSQL + _, err = db.ExecContext(ctx, ` + INSERT INTO documents (content, embedding, metadata) + VALUES ($1, $2, $3) + `, doc.Content, pgvector.NewVector(embedding[0].Embedding), metadataJSON) + + if err != nil { + return fmt.Errorf("failed to insert document: %w", err) + } + } + return nil + }, + ) + } + + // Usage + func indexDocuments(ctx context.Context, db *sql.DB) error { + indexer := createPgvectorIndexer(db) + + documents := []ai.Document{ + { + Content: "PostgreSQL is a powerful relational database with vector capabilities.", + Metadata: map[string]interface{}{ + "title": "PostgreSQL Overview", + "category": "database", + "source": "documentation", + }, + }, + { + Content: "pgvector extends PostgreSQL with efficient vector similarity search.", + Metadata: map[string]interface{}{ + "title": "pgvector Extension", + "category": "technology", + "source": "blog", + }, + }, + } + + return genkit.Index(ctx, g, + ai.WithIndexer(indexer), + ai.WithDocuments(documents), + ) + } + ``` + + + Create a custom indexer for pgvector: + + ```python + import json + from typing import List, Dict, Any + from pgvector.psycopg2 import register_vector + + async def create_pgvector_indexer(conn): + """Create a custom pgvector indexer""" + + async def index_documents(docs: List[Dict[str, Any]], table_name: str = "documents"): + cursor = conn.cursor() + + try: + for doc in docs: + # Generate embedding + embedding_response = await ai.embed( + embedder="googleai/gemini-embedding-001", + content=doc["content"] + ) + embedding = embedding_response[0]["embedding"] + + # Insert document with embedding + cursor.execute(""" + INSERT INTO %s (content, embedding, metadata) + VALUES (%%s, %%s, %%s) + """ % table_name, ( + doc["content"], + embedding, + json.dumps(doc.get("metadata", {})) + )) + + conn.commit() + return {"indexed": len(docs), "success": True} + + except Exception as error: + conn.rollback() + print(f"Indexing failed: {error}") + return {"indexed": 0, "success": False} + finally: + cursor.close() + + return index_documents + + # Usage + async def index_documents_example(): + indexer = await create_pgvector_indexer(conn) + + documents = [ + { + "content": "PostgreSQL is a powerful relational database with vector capabilities.", + "metadata": { + "title": "PostgreSQL Overview", + "category": "database", + "source": "documentation", + }, + }, + { + "content": "pgvector extends PostgreSQL with efficient vector similarity search.", + "metadata": { + "title": "pgvector Extension", + "category": "technology", + "source": "blog", + }, + }, + ] + + result = await indexer(documents) + return result + ``` + + + +### Document Retrieval + + + + Create a custom retriever for pgvector: + + ```ts + const pgvectorRetriever = ai.defineRetriever( + { + name: 'pgvector-retriever', + configSchema: z.object({ + tableName: z.string().optional().default('documents'), + k: z.number().optional().default(5), + threshold: z.number().optional(), + where: z.record(z.any()).optional(), + }), + }, + async (query: string, options) => { + const { tableName = 'documents', k = 5, threshold, where } = options; + + // Generate embedding for the query + const embedding = await ai.embed({ + embedder: googleAI.embedder('gemini-embedding-001'), + content: query, + }); + + // Build WHERE clause for metadata filtering + let whereClause = ''; + let whereParams: any[] = []; + if (where) { + const conditions = Object.entries(where).map(([key, value], index) => { + whereParams.push(value); + return `metadata->>'${key}' = $${whereParams.length + 2}`; + }); + whereClause = conditions.length > 0 ? `AND ${conditions.join(' AND ')}` : ''; + } + + // Query similar documents + const queryText = ` + SELECT content, metadata, 1 - (embedding <=> $1) as similarity + FROM ${tableName} + WHERE 1=1 ${whereClause} + ${threshold ? `AND 1 - (embedding <=> $1) >= $${whereParams.length + 2}` : ''} + ORDER BY embedding <=> $1 + LIMIT $2 + `; + + const params = [toSql(embedding[0].embedding), k, ...whereParams]; + if (threshold) params.push(threshold); + + const results = await sql.unsafe(queryText, params); + + return { + documents: results.map((row: any) => ({ + content: row.content, + metadata: { + ...row.metadata, + similarity: row.similarity, + }, + })), + }; + }, + ); + + // Usage + const docs = await ai.retrieve({ + retriever: pgvectorRetriever, + query: "What is PostgreSQL?", + options: { + k: 3, + threshold: 0.7, + where: { category: 'database' }, + }, + }); + ``` + + + Create a custom retriever for pgvector: + + ```go + func createPgvectorRetriever(db *sql.DB) ai.Retriever { + return genkit.DefineRetriever(g, "pgvector-retriever", + func(ctx context.Context, query string, options map[string]interface{}) ([]ai.Document, error) { + // Generate embedding for query + embedding, err := genkit.Embed(ctx, g, + ai.WithEmbedder("googleai/gemini-embedding-001"), + ai.WithContent(query), + ) + if err != nil { + return nil, fmt.Errorf("failed to generate query embedding: %w", err) + } + + // Extract options + k := 5 + if kVal, ok := options["k"].(int); ok { + k = kVal + } + + tableName := "documents" + if tableVal, ok := options["tableName"].(string); ok { + tableName = tableVal + } + + // Query similar documents + queryText := fmt.Sprintf(` + SELECT content, metadata, 1 - (embedding <=> $1) as similarity + FROM %s + ORDER BY embedding <=> $1 + LIMIT $2 + `, tableName) + + rows, err := db.QueryContext(ctx, queryText, + pgvector.NewVector(embedding[0].Embedding), k) + if err != nil { + return nil, fmt.Errorf("failed to query documents: %w", err) + } + defer rows.Close() + + var documents []ai.Document + for rows.Next() { + var content string + var metadataJSON []byte + var similarity float64 + + err := rows.Scan(&content, &metadataJSON, &similarity) + if err != nil { + return nil, fmt.Errorf("failed to scan row: %w", err) + } + + var metadata map[string]interface{} + if err := json.Unmarshal(metadataJSON, &metadata); err != nil { + metadata = make(map[string]interface{}) + } + metadata["similarity"] = similarity + + documents = append(documents, ai.Document{ + Content: content, + Metadata: metadata, + }) + } + + return documents, nil + }, + ) + } + + // Usage + func retrieveDocuments(ctx context.Context, db *sql.DB, query string) ([]ai.Document, error) { + retriever := createPgvectorRetriever(db) + + return genkit.Retrieve(ctx, g, + ai.WithRetriever(retriever), + ai.WithQuery(query), + ai.WithOptions(map[string]interface{}{ + "k": 3, + "tableName": "documents", + }), + ) + } + ``` + + + Create a custom retriever for pgvector: + + ```python + async def create_pgvector_retriever(conn): + """Create a custom pgvector retriever""" + + async def retrieve_documents( + query: str, + table_name: str = "documents", + k: int = 5, + threshold: float = None, + where: Dict[str, Any] = None + ) -> List[Dict[str, Any]]: + cursor = conn.cursor() + + try: + # Generate embedding for query + embedding_response = await ai.embed( + embedder="googleai/gemini-embedding-001", + content=query + ) + embedding = embedding_response[0]["embedding"] + + # Build WHERE clause for metadata filtering + where_clause = "" + params = [embedding, k] + + if where: + conditions = [] + for key, value in where.items(): + conditions.append(f"metadata->>%s = %s") + params.extend([key, value]) + if conditions: + where_clause = f"AND {' AND '.join(conditions)}" + + if threshold: + where_clause += f" AND 1 - (embedding <=> %s) >= %s" + params.extend([embedding, threshold]) + + # Query similar documents + query_text = f""" + SELECT content, metadata, 1 - (embedding <=> %s) as similarity + FROM {table_name} + WHERE 1=1 {where_clause} + ORDER BY embedding <=> %s + LIMIT %s + """ + + cursor.execute(query_text, params) + results = cursor.fetchall() + + documents = [] + for row in results: + content, metadata, similarity = row + if isinstance(metadata, str): + metadata = json.loads(metadata) + metadata["similarity"] = similarity + + documents.append({ + "content": content, + "metadata": metadata, + }) + + return documents + + except Exception as error: + print(f"Retrieval failed: {error}") + return [] + finally: + cursor.close() + + return retrieve_documents + + # Usage + async def retrieve_documents_example(): + retriever = await create_pgvector_retriever(conn) + + docs = await retriever( + query="What is PostgreSQL?", + k=3, + threshold=0.7, + where={"category": "database"} + ) + + return docs + ``` + + + +## Advanced Features + +### Hybrid Search (Vector + Text) + + + + Combine vector similarity with traditional text search: + + ```ts + const hybridRetriever = ai.defineRetriever( + { + name: 'pgvector-hybrid-retriever', + configSchema: z.object({ + tableName: z.string().optional().default('documents'), + k: z.number().optional().default(5), + vectorWeight: z.number().optional().default(0.7), + textWeight: z.number().optional().default(0.3), + }), + }, + async (query: string, options) => { + const { tableName = 'documents', k = 5, vectorWeight = 0.7, textWeight = 0.3 } = options; + + // Generate embedding for vector search + const embedding = await ai.embed({ + embedder: googleAI.embedder('gemini-embedding-001'), + content: query, + }); + + // Hybrid search combining vector similarity and text search + const results = await sql` + WITH vector_search AS ( + SELECT + content, + metadata, + 1 - (embedding <=> ${toSql(embedding[0].embedding)}) as vector_score + FROM ${sql(tableName)} + ), + text_search AS ( + SELECT + content, + metadata, + ts_rank(to_tsvector('english', content), plainto_tsquery('english', ${query})) as text_score + FROM ${sql(tableName)} + WHERE to_tsvector('english', content) @@ plainto_tsquery('english', ${query}) + ) + SELECT + v.content, + v.metadata, + (${vectorWeight} * COALESCE(v.vector_score, 0) + ${textWeight} * COALESCE(t.text_score, 0)) as combined_score + FROM vector_search v + FULL OUTER JOIN text_search t ON v.content = t.content + ORDER BY combined_score DESC + LIMIT ${k} + `; + + return { + documents: results.map((row: any) => ({ + content: row.content, + metadata: { + ...row.metadata, + combined_score: row.combined_score, + }, + })), + }; + }, + ); + ``` + + + Combine vector similarity with traditional text search: + + ```go + func createHybridRetriever(db *sql.DB) ai.Retriever { + return genkit.DefineRetriever(g, "pgvector-hybrid-retriever", + func(ctx context.Context, query string, options map[string]interface{}) ([]ai.Document, error) { + // Generate embedding + embedding, err := genkit.Embed(ctx, g, + ai.WithEmbedder("googleai/gemini-embedding-001"), + ai.WithContent(query), + ) + if err != nil { + return nil, fmt.Errorf("failed to generate embedding: %w", err) + } + + // Extract options + k := 5 + vectorWeight := 0.7 + textWeight := 0.3 + tableName := "documents" + + if kVal, ok := options["k"].(int); ok { + k = kVal + } + if vwVal, ok := options["vectorWeight"].(float64); ok { + vectorWeight = vwVal + } + if twVal, ok := options["textWeight"].(float64); ok { + textWeight = twVal + } + if tnVal, ok := options["tableName"].(string); ok { + tableName = tnVal + } + + // Hybrid search query + queryText := fmt.Sprintf(` + WITH vector_search AS ( + SELECT + content, + metadata, + 1 - (embedding <=> $1) as vector_score + FROM %s + ), + text_search AS ( + SELECT + content, + metadata, + ts_rank(to_tsvector('english', content), plainto_tsquery('english', $2)) as text_score + FROM %s + WHERE to_tsvector('english', content) @@ plainto_tsquery('english', $2) + ) + SELECT + v.content, + v.metadata, + ($3 * COALESCE(v.vector_score, 0) + $4 * COALESCE(t.text_score, 0)) as combined_score + FROM vector_search v + FULL OUTER JOIN text_search t ON v.content = t.content + ORDER BY combined_score DESC + LIMIT $5 + `, tableName, tableName) + + rows, err := db.QueryContext(ctx, queryText, + pgvector.NewVector(embedding[0].Embedding), + query, + vectorWeight, + textWeight, + k, + ) + if err != nil { + return nil, fmt.Errorf("hybrid search failed: %w", err) + } + defer rows.Close() + + var documents []ai.Document + for rows.Next() { + var content string + var metadataJSON []byte + var combinedScore float64 + + err := rows.Scan(&content, &metadataJSON, &combinedScore) + if err != nil { + return nil, fmt.Errorf("failed to scan row: %w", err) + } + + var metadata map[string]interface{} + if err := json.Unmarshal(metadataJSON, &metadata); err != nil { + metadata = make(map[string]interface{}) + } + metadata["combined_score"] = combinedScore + + documents = append(documents, ai.Document{ + Content: content, + Metadata: metadata, + }) + } + + return documents, nil + }, + ) + } + ``` + + + Combine vector similarity with traditional text search: + + ```python + async def create_hybrid_retriever(conn): + """Create a hybrid retriever combining vector and text search""" + + async def hybrid_search( + query: str, + table_name: str = "documents", + k: int = 5, + vector_weight: float = 0.7, + text_weight: float = 0.3 + ) -> List[Dict[str, Any]]: + cursor = conn.cursor() + + try: + # Generate embedding + embedding_response = await ai.embed( + embedder="googleai/gemini-embedding-001", + content=query + ) + embedding = embedding_response[0]["embedding"] + + # Hybrid search query + query_text = f""" + WITH vector_search AS ( + SELECT + content, + metadata, + 1 - (embedding <=> %s) as vector_score + FROM {table_name} + ), + text_search AS ( + SELECT + content, + metadata, + ts_rank(to_tsvector('english', content), plainto_tsquery('english', %s)) as text_score + FROM {table_name} + WHERE to_tsvector('english', content) @@ plainto_tsquery('english', %s) + ) + SELECT + v.content, + v.metadata, + (%s * COALESCE(v.vector_score, 0) + %s * COALESCE(t.text_score, 0)) as combined_score + FROM vector_search v + FULL OUTER JOIN text_search t ON v.content = t.content + ORDER BY combined_score DESC + LIMIT %s + """ + + cursor.execute(query_text, [ + embedding, query, query, vector_weight, text_weight, k + ]) + results = cursor.fetchall() + + documents = [] + for row in results: + content, metadata, combined_score = row + if isinstance(metadata, str): + metadata = json.loads(metadata) + metadata["combined_score"] = combined_score + + documents.append({ + "content": content, + "metadata": metadata, + }) + + return documents + + except Exception as error: + print(f"Hybrid search failed: {error}") + return [] + finally: + cursor.close() + + return hybrid_search + ``` + + + +## Best Practices + +### Database Optimization + +1. **Choose the right index type**: + - `ivfflat`: Good for most use cases, faster builds + - `hnsw`: Better recall, slower builds + +2. **Optimize index parameters**: + ```sql + -- For ivfflat + CREATE INDEX ON documents USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100); + + -- For hnsw (PostgreSQL 14+) + CREATE INDEX ON documents USING hnsw (embedding vector_cosine_ops) WITH (m = 16, ef_construction = 64); + ``` + +3. **Use appropriate vector dimensions**: Match your embedding model's output dimensions + +4. **Consider partitioning**: For large datasets, partition tables by metadata + +### Performance Optimization + +1. **Batch operations**: Insert/update documents in batches +2. **Connection pooling**: Use connection pools for production applications +3. **Vacuum regularly**: Keep statistics updated for optimal query planning +4. **Monitor query performance**: Use `EXPLAIN ANALYZE` to optimize queries + +### Production Deployment + +1. **Use managed PostgreSQL**: Consider cloud providers with pgvector support +2. **Set up replication**: Configure read replicas for scaling reads +3. **Backup strategies**: Regular backups including vector data +4. **Monitoring**: Track query performance and index usage + +## Next Steps + +- Learn about [RAG implementation](/unified-docs/rag) to build complete retrieval-augmented generation systems +- Explore [creating flows](/unified-docs/creating-flows) to build structured AI workflows with vector search +- See [deployment guides](/unified-docs/deployment) for production deployment strategies +- Check out other vector database options for different use cases diff --git a/src/content/docs/unified-docs/vector-databases/pinecone.mdx b/src/content/docs/unified-docs/vector-databases/pinecone.mdx new file mode 100644 index 00000000..49b1364d --- /dev/null +++ b/src/content/docs/unified-docs/vector-databases/pinecone.mdx @@ -0,0 +1,612 @@ +--- +title: Pinecone Vector Database +description: Learn how to use Pinecone cloud vector database with Genkit across JavaScript, Go, and Python for RAG applications, semantic search, and vector storage. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +Pinecone is a fully managed cloud vector database that provides high-performance vector search capabilities. The Pinecone integration with Genkit enables you to build powerful RAG (Retrieval-Augmented Generation) applications with semantic search, document indexing, and intelligent retrieval. + +## Installation and Setup + + + + Install the Pinecone plugin: + + ```bash + npm install genkitx-pinecone + ``` + + Configure the plugin when initializing Genkit: + + ```ts + import { genkit } from 'genkit'; + import { pinecone } from 'genkitx-pinecone'; + import { googleAI } from '@genkit-ai/googleai'; + + const ai = genkit({ + plugins: [ + pinecone([ + { + indexId: 'my-knowledge-base', + embedder: googleAI.embedder('gemini-embedding-001'), + }, + ]), + ], + }); + ``` + + ### API Key Configuration + + Set your Pinecone API key using one of these methods: + + ```bash + # Environment variable (recommended) + export PINECONE_API_KEY=your_pinecone_api_key + ``` + + ```ts + // Or pass directly to plugin configuration + pinecone([ + { + indexId: 'my-knowledge-base', + embedder: googleAI.embedder('gemini-embedding-001'), + clientParams: { + apiKey: 'your_pinecone_api_key', + }, + }, + ]) + ``` + + Get your API key from [Pinecone Console](https://app.pinecone.io/). + + + For Go applications, use the Pinecone Go client: + + ```bash + go get github.com/pinecone-io/go-pinecone/pinecone + ``` + + ```go + package main + + import ( + "context" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/pinecone" + "github.com/firebase/genkit/go/plugins/googleai" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx, + genkit.WithPlugins( + &googleai.GoogleAI{}, + &pinecone.Pinecone{ + APIKey: os.Getenv("PINECONE_API_KEY"), + Indexes: []pinecone.IndexConfig{ + { + IndexID: "my-knowledge-base", + Embedder: "googleai/gemini-embedding-001", + }, + }, + }, + ), + ) + if err != nil { + log.Fatal(err) + } + } + ``` + + ### Environment Configuration + + ```bash + export PINECONE_API_KEY=your_pinecone_api_key + ``` + + + For Python applications, install the Pinecone client: + + ```bash + pip install pinecone-client genkit-plugin-pinecone + ``` + + ```python + from genkit.ai import Genkit + from genkit.plugins.pinecone import Pinecone + from genkit.plugins.google_genai import GoogleGenAI + + ai = Genkit( + plugins=[ + GoogleGenAI(), + Pinecone( + api_key=os.getenv("PINECONE_API_KEY"), + indexes=[ + { + "index_id": "my-knowledge-base", + "embedder": "googleai/gemini-embedding-001", + } + ], + ), + ], + ) + ``` + + ### Environment Configuration + + ```bash + export PINECONE_API_KEY=your_pinecone_api_key + ``` + + + +## Basic Usage + +### Document Indexing + + + + Index documents for semantic search: + + ```ts + import { pineconeIndexerRef } from 'genkitx-pinecone'; + import { Document } from 'genkit'; + + // Prepare documents for indexing + const documents: Document[] = [ + { + content: 'Artificial Intelligence is transforming how we work and live.', + metadata: { + title: 'AI Overview', + category: 'technology', + source: 'blog', + }, + }, + { + content: 'Machine learning algorithms can identify patterns in large datasets.', + metadata: { + title: 'ML Patterns', + category: 'data-science', + source: 'research', + }, + }, + ]; + + // Index documents using the default configured index + await ai.index({ + indexer: pineconeIndexerRef, + documents, + }); + + // Or specify a specific index + const knowledgeBaseIndexer = pineconeIndexerRef({ + indexId: 'my-knowledge-base', + }); + + await ai.index({ + indexer: knowledgeBaseIndexer, + documents, + }); + ``` + + + Index documents for semantic search: + + ```go + import ( + "context" + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + ) + + func indexDocuments(ctx context.Context) error { + documents := []ai.Document{ + { + Content: "Artificial Intelligence is transforming how we work and live.", + Metadata: map[string]interface{}{ + "title": "AI Overview", + "category": "technology", + "source": "blog", + }, + }, + { + Content: "Machine learning algorithms can identify patterns in large datasets.", + Metadata: map[string]interface{}{ + "title": "ML Patterns", + "category": "data-science", + "source": "research", + }, + }, + } + + // Index documents + err := genkit.Index(ctx, g, + ai.WithIndexer("pinecone/my-knowledge-base"), + ai.WithDocuments(documents), + ) + if err != nil { + return fmt.Errorf("failed to index documents: %w", err) + } + + return nil + } + ``` + + + Index documents for semantic search: + + ```python + from typing import List, Dict, Any + + # Prepare documents for indexing + documents = [ + { + "content": "Artificial Intelligence is transforming how we work and live.", + "metadata": { + "title": "AI Overview", + "category": "technology", + "source": "blog", + }, + }, + { + "content": "Machine learning algorithms can identify patterns in large datasets.", + "metadata": { + "title": "ML Patterns", + "category": "data-science", + "source": "research", + }, + }, + ] + + # Index documents + async def index_documents(docs: List[Dict[str, Any]], index_id: str = None): + try: + indexer = f"pinecone/{index_id}" if index_id else "pinecone/my-knowledge-base" + + await ai.index( + indexer=indexer, + documents=docs + ) + + return {"indexed": len(docs), "success": True} + except Exception as error: + print(f"Indexing failed: {error}") + return {"indexed": 0, "success": False} + ``` + + + +### Document Retrieval + + + + Retrieve relevant documents using semantic search: + + ```ts + import { pineconeRetrieverRef } from 'genkitx-pinecone'; + + // Basic retrieval + const query = "How does machine learning work?"; + const docs = await ai.retrieve({ + retriever: pineconeRetrieverRef, + query, + }); + + console.log('Retrieved documents:', docs); + + // Advanced retrieval with specific index and options + const knowledgeBaseRetriever = pineconeRetrieverRef({ + indexId: 'my-knowledge-base', + }); + + const advancedDocs = await ai.retrieve({ + retriever: knowledgeBaseRetriever, + query, + options: { + k: 5, // Number of documents to retrieve + filter: { + category: 'technology', // Metadata filtering + }, + }, + }); + ``` + + + Retrieve relevant documents using semantic search: + + ```go + // Basic retrieval + func retrieveDocuments(ctx context.Context, query string) ([]ai.Document, error) { + docs, err := genkit.Retrieve(ctx, g, + ai.WithRetriever("pinecone/my-knowledge-base"), + ai.WithQuery(query), + ) + if err != nil { + return nil, fmt.Errorf("retrieval failed: %w", err) + } + + return docs, nil + } + + // Advanced retrieval with options + func advancedRetrieve(ctx context.Context, query, indexID string, limit int, filter map[string]interface{}) ([]ai.Document, error) { + retriever := fmt.Sprintf("pinecone/%s", indexID) + + docs, err := genkit.Retrieve(ctx, g, + ai.WithRetriever(retriever), + ai.WithQuery(query), + ai.WithOptions(map[string]interface{}{ + "k": limit, + "filter": filter, + }), + ) + if err != nil { + return nil, fmt.Errorf("advanced retrieval failed: %w", err) + } + + return docs, nil + } + ``` + + + Retrieve relevant documents using semantic search: + + ```python + from typing import List, Dict, Any, Optional + + # Basic retrieval + async def retrieve_documents(query: str, index_id: str = "my-knowledge-base") -> List[Dict[str, Any]]: + try: + retriever = f"pinecone/{index_id}" + docs = await ai.retrieve( + retriever=retriever, + query=query + ) + return docs + except Exception as error: + print(f"Retrieval failed: {error}") + return [] + + # Advanced retrieval with options + async def advanced_retrieve( + query: str, + index_id: str = "my-knowledge-base", + limit: int = 5, + filter_criteria: Optional[Dict[str, Any]] = None + ) -> List[Dict[str, Any]]: + try: + retriever = f"pinecone/{index_id}" + + options = {"k": limit} + if filter_criteria: + options["filter"] = filter_criteria + + docs = await ai.retrieve( + retriever=retriever, + query=query, + options=options + ) + + return docs + except Exception as error: + print(f"Advanced retrieval failed: {error}") + return [] + ``` + + + +## RAG Implementation + + + + Build a complete RAG system with Pinecone: + + ```ts + // RAG flow with Pinecone retrieval + export const ragFlow = ai.defineFlow( + { + name: 'ragFlow', + inputSchema: z.object({ + question: z.string(), + indexId: z.string().optional(), + maxResults: z.number().optional(), + }), + outputSchema: z.object({ + answer: z.string(), + sources: z.array(z.object({ + content: z.string(), + metadata: z.record(z.any()), + score: z.number(), + })), + }), + }, + async ({ question, indexId, maxResults = 3 }) => { + // Retrieve relevant documents + const retriever = indexId + ? pineconeRetrieverRef({ indexId }) + : pineconeRetrieverRef; + + const docs = await ai.retrieve({ + retriever, + query: question, + options: { k: maxResults }, + }); + + // Build context from retrieved documents + const context = docs + .map(doc => `Source: ${doc.metadata?.title || 'Unknown'}\n${doc.content}`) + .join('\n\n'); + + // Generate answer using context + const response = await ai.generate({ + model: googleAI.model('gemini-2.5-flash'), + prompt: `Based on the following context, answer the question: "${question}" + +Context: +${context} + +Answer:`, + config: { + temperature: 0.3, + maxTokens: 500, + }, + }); + + return { + answer: response.text, + sources: docs.map(doc => ({ + content: doc.content, + metadata: doc.metadata || {}, + score: doc.score || 0, + })), + }; + }, + ); + ``` + + + Build a complete RAG system with Pinecone: + + ```go + // RAG implementation + func ragQuery(ctx context.Context, question, indexID string, maxResults int) (string, []ai.Document, error) { + // Retrieve relevant documents + retriever := fmt.Sprintf("pinecone/%s", indexID) + docs, err := genkit.Retrieve(ctx, g, + ai.WithRetriever(retriever), + ai.WithQuery(question), + ai.WithOptions(map[string]interface{}{ + "k": maxResults, + }), + ) + if err != nil { + return "", nil, fmt.Errorf("retrieval failed: %w", err) + } + + // Build context from retrieved documents + var contextParts []string + for _, doc := range docs { + title := "Unknown" + if t, ok := doc.Metadata["title"].(string); ok { + title = t + } + contextParts = append(contextParts, fmt.Sprintf("Source: %s\n%s", title, doc.Content)) + } + context := strings.Join(contextParts, "\n\n") + + // Generate answer using context + prompt := fmt.Sprintf(`Based on the following context, answer the question: "%s" + +Context: +%s + +Answer:`, question, context) + + resp, err := genkit.Generate(ctx, g, + ai.WithModel("googleai/gemini-2.5-flash"), + ai.WithPrompt(prompt), + ai.WithConfig(map[string]interface{}{ + "temperature": 0.3, + "maxTokens": 500, + }), + ) + if err != nil { + return "", nil, fmt.Errorf("generation failed: %w", err) + } + + return resp.Text(), docs, nil + } + ``` + + + Build a complete RAG system with Pinecone: + + ```python + # RAG implementation + async def rag_query( + question: str, + index_id: str = "my-knowledge-base", + max_results: int = 3 + ) -> Dict[str, Any]: + try: + # Retrieve relevant documents + retriever = f"pinecone/{index_id}" + docs = await ai.retrieve( + retriever=retriever, + query=question, + options={"k": max_results} + ) + + # Build context from retrieved documents + context_parts = [] + for doc in docs: + title = doc.get("metadata", {}).get("title", "Unknown") + context_parts.append(f"Source: {title}\n{doc['content']}") + + context = "\n\n".join(context_parts) + + # Generate answer using context + prompt = f'''Based on the following context, answer the question: "{question}" + +Context: +{context} + +Answer:''' + + response = await ai.generate( + model="googleai/gemini-2.5-flash", + prompt=prompt, + config={ + "temperature": 0.3, + "max_tokens": 500, + } + ) + + return { + "answer": response.text, + "sources": [ + { + "content": doc["content"], + "metadata": doc.get("metadata", {}), + "score": doc.get("score", 0), + } + for doc in docs + ], + } + except Exception as error: + print(f"RAG query failed: {error}") + return {"answer": "I'm sorry, I couldn't find relevant information.", "sources": []} + ``` + + + +## Best Practices + +### Index Management + +1. **Choose appropriate dimensions**: Match your embedding model's output dimensions +2. **Use meaningful metadata**: Include searchable fields like category, date, source +3. **Optimize for your use case**: Consider pod type and replicas based on query volume +4. **Monitor performance**: Track query latency and accuracy metrics + +### Query Optimization + +1. **Use specific queries**: More specific queries yield better results +2. **Leverage metadata filtering**: Combine semantic search with metadata filters +3. **Tune similarity thresholds**: Adjust based on your quality requirements +4. **Implement query expansion**: Enhance queries with synonyms or context + +### Cost Management + +1. **Right-size your index**: Choose appropriate pod types and replica counts +2. **Use namespaces**: Organize data efficiently within indexes +3. **Monitor usage**: Track query volume and storage costs +4. **Implement caching**: Cache frequent queries to reduce API calls + +## Next Steps + +- Learn about [RAG implementation](/unified-docs/rag) to build complete retrieval-augmented generation systems +- Explore [creating flows](/unified-docs/creating-flows) to build structured AI workflows with vector search +- See [deployment guides](/unified-docs/deployment) for production deployment strategies +- Check out other vector database options for different use cases diff --git a/src/sidebar.ts b/src/sidebar.ts index 0019ee34..934396da 100644 --- a/src/sidebar.ts +++ b/src/sidebar.ts @@ -302,12 +302,71 @@ const PYTHON_SIDEBAR = [ ]; const UNIFIED_SIDEBAR = [ - { label: "Generating content", slug: "unified-docs/generating-content" }, - { label: "Creating flows", slug: "unified-docs/creating-flows" }, - { label: "Tool calling", slug: "unified-docs/tool-calling" }, - { label: "Retrieval-augmented generation (RAG)", slug: "unified-docs/rag" }, - { label: "Managing prompts with Dotprompt", slug: "unified-docs/dotprompt" }, - { label: "Evaluation", slug: "unified-docs/evaluation" }, + { label: "Get started", slug: "unified-docs/get-started" }, + { label: "Developer tools", slug: "unified-docs/developer-tools" }, + { label: "MCP Server", slug: "unified-docs/mcp-server" }, + { + label: "Building AI workflows", + items: [ + { label: "Generating content", slug: "unified-docs/generating-content" }, + { label: "Passing information through context", slug: "unified-docs/context" }, + { label: "Creating flows", slug: "unified-docs/creating-flows" }, + { label: "Managing prompts with Dotprompt", slug: "unified-docs/dotprompt" }, + { label: "Creating persistent chat sessions", slug: "unified-docs/chat-sessions" }, + { label: "Tool calling", slug: "unified-docs/tool-calling" }, + { label: "Pause generation using interrupts", slug: "unified-docs/interrupts" }, + { label: "Retrieval-augmented generation (RAG)", slug: "unified-docs/rag" }, + { label: "Building multi-agent systems", slug: "unified-docs/multi-agent-systems" }, + { label: "Evaluation", slug: "unified-docs/evaluation" }, + { label: "Observability and monitoring", slug: "unified-docs/observability-monitoring" }, + { label: "Error handling", slug: "unified-docs/error-handling" }, + ], + }, + { + label: "AI Providers", + items: [ + { label: "Google AI", slug: "unified-docs/plugins/google-ai" }, + { label: "Vertex AI", slug: "unified-docs/plugins/vertex-ai" }, + { label: "OpenAI", slug: "unified-docs/plugins/openai" }, + { label: "Anthropic (Claude)", slug: "unified-docs/plugins/anthropic" }, + { label: "xAI (Grok)", slug: "unified-docs/plugins/xai" }, + { label: "DeepSeek", slug: "unified-docs/plugins/deepseek" }, + { label: "Ollama", slug: "unified-docs/plugins/ollama" }, + { label: "Model Context Protocol (MCP)", slug: "unified-docs/plugins/mcp" }, + ], + }, + { + label: "Vector Databases", + items: [ + { label: "Pinecone", slug: "unified-docs/vector-databases/pinecone" }, + { label: "ChromaDB", slug: "unified-docs/vector-databases/chromadb" }, + { label: "pgvector", slug: "unified-docs/vector-databases/pgvector" }, + { label: "LanceDB", slug: "unified-docs/vector-databases/lancedb" }, + { label: "Astra DB", slug: "unified-docs/vector-databases/astra-db" }, + { label: "Neo4j", slug: "unified-docs/vector-databases/neo4j" }, + { label: "Cloud SQL PostgreSQL", slug: "unified-docs/vector-databases/cloud-sql-postgresql" }, + ], + }, + { + label: "Web Framework Integrations", + items: [ + { label: "Express.js", slug: "unified-docs/frameworks/express" }, + { label: "Next.js", slug: "unified-docs/frameworks/nextjs" }, + ], + }, + { + label: "Writing Plugins", + items: [ + { label: "Overview", slug: "unified-docs/plugin-authoring/overview" }, + { label: "Model Plugins", slug: "unified-docs/plugin-authoring/models" }, + ], + }, + { + label: "Advanced Topics", + items: [ + { label: "Deployment Guide", slug: "unified-docs/deployment" }, + ], + }, ]; export const sidebar = [ From 499fd0ef580a05c5f612cfad0ffe663cc00d432c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cchrisraygill=E2=80=9D?= Date: Thu, 7 Aug 2025 21:38:06 -0400 Subject: [PATCH 4/9] Add comprehensive deployment and vector database documentation - Add deployment guides for any platform, authorization, Cloud Run, and Firebase - Add Model Context Protocol documentation - Add vector database guides for Cloud Firestore and dev local vectorstore - Update tool calling documentation and sidebar navigation - Provide platform-agnostic deployment options with security best practices --- .../unified-docs/deployment/any-platform.mdx | 992 ++++++++++++++++++ .../unified-docs/deployment/authorization.mdx | 252 +++++ .../unified-docs/deployment/cloud-run.mdx | 584 +++++++++++ .../docs/unified-docs/deployment/firebase.mdx | 949 +++++++++++++++++ .../unified-docs/model-context-protocol.mdx | 721 +++++++++++++ .../docs/unified-docs/tool-calling.mdx | 20 +- .../vector-databases/cloud-firestore.mdx | 980 +++++++++++++++++ .../dev-local-vectorstore.mdx | 728 +++++++++++++ src/sidebar.ts | 43 +- 9 files changed, 5241 insertions(+), 28 deletions(-) create mode 100644 src/content/docs/unified-docs/deployment/any-platform.mdx create mode 100644 src/content/docs/unified-docs/deployment/authorization.mdx create mode 100644 src/content/docs/unified-docs/deployment/cloud-run.mdx create mode 100644 src/content/docs/unified-docs/deployment/firebase.mdx create mode 100644 src/content/docs/unified-docs/model-context-protocol.mdx create mode 100644 src/content/docs/unified-docs/vector-databases/cloud-firestore.mdx create mode 100644 src/content/docs/unified-docs/vector-databases/dev-local-vectorstore.mdx diff --git a/src/content/docs/unified-docs/deployment/any-platform.mdx b/src/content/docs/unified-docs/deployment/any-platform.mdx new file mode 100644 index 00000000..68153a0f --- /dev/null +++ b/src/content/docs/unified-docs/deployment/any-platform.mdx @@ -0,0 +1,992 @@ +--- +title: Deploy to Any Platform +description: Learn how to deploy Genkit flows to any Node.js, Go, or Python hosting platform with maximum flexibility and control. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +Genkit flows can be deployed to any platform that supports Node.js, Go, or Python applications. This approach gives you maximum flexibility and control over your deployment environment, allowing you to choose the hosting provider that best fits your needs. + +## Key Features + +- **Platform agnostic**: Deploy to any hosting provider +- **Full control**: Complete control over infrastructure and configuration +- **Cost flexibility**: Choose the most cost-effective hosting option +- **Custom environments**: Support for specialized deployment requirements +- **Hybrid deployments**: Mix and match different platforms for different services + +## Prerequisites + +- Hosting platform that supports your chosen language runtime +- Basic understanding of web server deployment +- Familiarity with [Genkit flows](/unified-docs/creating-flows) + +## Popular Hosting Platforms + +### Node.js Platforms +- **Vercel**: Excellent for Next.js and serverless functions +- **Netlify**: Great for JAMstack applications +- **Railway**: Simple deployment with automatic scaling +- **Render**: Easy deployment with managed databases +- **DigitalOcean App Platform**: Managed platform with predictable pricing +- **AWS Lambda**: Serverless with pay-per-request pricing +- **Heroku**: Simple deployment with add-on ecosystem + +### Go Platforms +- **Railway**: Native Go support with simple deployment +- **Render**: Managed Go hosting with auto-scaling +- **DigitalOcean App Platform**: Container-based Go deployment +- **AWS Lambda**: Serverless Go functions +- **Fly.io**: Global edge deployment +- **PlanetScale**: For Go applications with database needs + +### Python Platforms +- **Railway**: Python support with automatic dependency detection +- **Render**: Managed Python hosting +- **PythonAnywhere**: Python-focused hosting platform +- **DigitalOcean App Platform**: Container-based Python deployment +- **AWS Lambda**: Serverless Python functions +- **Heroku**: Traditional Python hosting + +## Implementation + +### Basic Server Setup + + + + Create a production-ready Express server: + + ```ts + import { genkit, z } from 'genkit'; + import { googleAI } from '@genkit-ai/googleai'; + import { startFlowServer } from '@genkit-ai/express'; + import express from 'express'; + import helmet from 'helmet'; + import rateLimit from 'express-rate-limit'; + + const ai = genkit({ + plugins: [googleAI()], + model: googleAI.model('gemini-2.5-flash'), + }); + + // Define your flows + const chatFlow = ai.defineFlow( + { + name: 'chat', + inputSchema: z.object({ + message: z.string(), + history: z.array(z.object({ + role: z.enum(['user', 'assistant']), + content: z.string() + })).optional() + }), + outputSchema: z.object({ response: z.string() }), + }, + async ({ message, history = [] }) => { + const context = history.map(h => `${h.role}: ${h.content}`).join('\n'); + const prompt = context ? `${context}\nuser: ${message}` : message; + + const { text } = await ai.generate(prompt); + return { response: text }; + } + ); + + // Create Express app with security middleware + const app = express(); + + // Security middleware + app.use(helmet()); + app.use(express.json({ limit: '10mb' })); + + // Rate limiting + const limiter = rateLimit({ + windowMs: 15 * 60 * 1000, // 15 minutes + max: 100, // limit each IP to 100 requests per windowMs + message: 'Too many requests from this IP, please try again later.' + }); + app.use('/api/', limiter); + + // Health check endpoint + app.get('/health', (req, res) => { + res.status(200).json({ status: 'healthy', timestamp: new Date().toISOString() }); + }); + + // Start flow server with custom Express app + startFlowServer({ + flows: [chatFlow], + port: parseInt(process.env.PORT || '3000'), + app, // Use custom Express app + cors: { + origin: process.env.ALLOWED_ORIGINS?.split(',') || ['http://localhost:3000'], + credentials: true, + }, + }); + + console.log(`Server running on port ${process.env.PORT || 3000}`); + ``` + + **Package.json for deployment**: + ```json + { + "name": "genkit-app", + "version": "1.0.0", + "scripts": { + "build": "tsc", + "start": "node dist/index.js", + "dev": "tsx src/index.ts", + "postinstall": "npm run build" + }, + "dependencies": { + "genkit": "^0.5.0", + "@genkit-ai/googleai": "^0.5.0", + "@genkit-ai/express": "^0.5.0", + "express": "^4.18.0", + "helmet": "^7.0.0", + "express-rate-limit": "^6.7.0" + }, + "devDependencies": { + "typescript": "^5.0.0", + "tsx": "^4.0.0", + "@types/express": "^4.17.0" + }, + "engines": { + "node": ">=18.0.0" + } + } + ``` + + + Create a production-ready Go server: + + ```go + package main + + import ( + "context" + "encoding/json" + "fmt" + "log" + "net/http" + "os" + "strconv" + "time" + + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/googlegenai" + "golang.org/x/time/rate" + ) + + type ChatMessage struct { + Role string `json:"role"` + Content string `json:"content"` + } + + type ChatInput struct { + Message string `json:"message"` + History []ChatMessage `json:"history,omitempty"` + } + + type ChatOutput struct { + Response string `json:"response"` + } + + type HealthResponse struct { + Status string `json:"status"` + Timestamp string `json:"timestamp"` + } + + // Rate limiting middleware + func rateLimitMiddleware(limiter *rate.Limiter) func(http.Handler) http.Handler { + return func(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if !limiter.Allow() { + http.Error(w, "Rate limit exceeded", http.StatusTooManyRequests) + return + } + next.ServeHTTP(w, r) + }) + } + } + + // CORS middleware + func corsMiddleware(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Access-Control-Allow-Origin", "*") + w.Header().Set("Access-Control-Allow-Methods", "GET, POST, OPTIONS") + w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization") + + if r.Method == "OPTIONS" { + w.WriteHeader(http.StatusOK) + return + } + + next.ServeHTTP(w, r) + }) + } + + // Security headers middleware + func securityMiddleware(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("X-Content-Type-Options", "nosniff") + w.Header().Set("X-Frame-Options", "DENY") + w.Header().Set("X-XSS-Protection", "1; mode=block") + next.ServeHTTP(w, r) + }) + } + + func main() { + ctx := context.Background() + + // Initialize Genkit + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlegenai.GoogleAI{}), + genkit.WithDefaultModel("googleai/gemini-2.5-flash"), + ) + if err != nil { + log.Fatalf("failed to initialize Genkit: %v", err) + } + + // Define chat flow + chatFlow := genkit.DefineFlow(g, "chat", + func(ctx context.Context, input ChatInput) (ChatOutput, error) { + var context string + for _, msg := range input.History { + context += fmt.Sprintf("%s: %s\n", msg.Role, msg.Content) + } + + prompt := input.Message + if context != "" { + prompt = context + "user: " + input.Message + } + + resp, err := genkit.Generate(ctx, g, ai.WithPrompt(prompt)) + if err != nil { + return ChatOutput{}, fmt.Errorf("failed to generate response: %w", err) + } + + return ChatOutput{Response: resp.Text()}, nil + }) + + // Create rate limiter (100 requests per minute) + limiter := rate.NewLimiter(rate.Every(time.Minute/100), 100) + + // Set up HTTP routes + mux := http.NewServeMux() + + // Health check endpoint + mux.HandleFunc("GET /health", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(HealthResponse{ + Status: "healthy", + Timestamp: time.Now().Format(time.RFC3339), + }) + }) + + // Chat endpoint with middleware + mux.Handle("POST /chat", + securityMiddleware( + corsMiddleware( + rateLimitMiddleware(limiter)( + genkit.Handler(chatFlow))))) + + port := os.Getenv("PORT") + if port == "" { + port = "8080" + } + + server := &http.Server{ + Addr: ":" + port, + Handler: mux, + ReadTimeout: 30 * time.Second, + WriteTimeout: 30 * time.Second, + IdleTimeout: 120 * time.Second, + } + + log.Printf("Server starting on port %s", port) + log.Fatal(server.ListenAndServe()) + } + ``` + + **go.mod for deployment**: + ```go + module genkit-app + + go 1.21 + + require ( + github.com/firebase/genkit/go v0.5.0 + golang.org/x/time v0.5.0 + ) + ``` + + + Create a production-ready Flask server: + + ```python + import os + import time + from datetime import datetime + from typing import List, Optional + from flask import Flask, request, jsonify + from flask_limiter import Limiter + from flask_limiter.util import get_remote_address + from flask_cors import CORS + + from genkit.ai import Genkit + from genkit.plugins.flask import genkit_flask_handler + from genkit.plugins.google_genai import GoogleGenAI, google_genai_name + from genkit.types import GenkitError + + # Initialize Genkit + ai = Genkit( + plugins=[GoogleGenAI()], + model=google_genai_name('gemini-2.5-flash'), + ) + + # Create Flask app + app = Flask(__name__) + + # Configure CORS + CORS(app, origins=os.environ.get('ALLOWED_ORIGINS', 'http://localhost:3000').split(',')) + + # Configure rate limiting + limiter = Limiter( + app, + key_func=get_remote_address, + default_limits=["100 per hour"] + ) + + # Security headers + @app.after_request + def add_security_headers(response): + response.headers['X-Content-Type-Options'] = 'nosniff' + response.headers['X-Frame-Options'] = 'DENY' + response.headers['X-XSS-Protection'] = '1; mode=block' + return response + + # Data models + class ChatMessage: + def __init__(self, role: str, content: str): + self.role = role + self.content = content + + class ChatInput: + def __init__(self, message: str, history: Optional[List[ChatMessage]] = None): + self.message = message + self.history = history or [] + + # Health check endpoint + @app.route('/health', methods=['GET']) + def health_check(): + return jsonify({ + 'status': 'healthy', + 'timestamp': datetime.now().isoformat() + }) + + # Chat flow with rate limiting + @app.post('/chat') + @limiter.limit("10 per minute") + @genkit_flask_handler(ai) + @ai.flow() + async def chat(data: dict, ctx): + try: + message = data.get('message', '') + history = data.get('history', []) + + if not message: + raise GenkitError(status='INVALID_ARGUMENT', message='Message is required') + + # Build context from history + context = '' + for msg in history: + context += f"{msg.get('role', 'user')}: {msg.get('content', '')}\n" + + prompt = message + if context: + prompt = context + f"user: {message}" + + response = await ai.generate( + on_chunk=ctx.send_chunk, + prompt=prompt, + ) + + return {'response': response.text} + + except Exception as e: + raise GenkitError(status='INTERNAL', message=str(e)) + + # Error handlers + @app.errorhandler(429) + def ratelimit_handler(e): + return jsonify({'error': 'Rate limit exceeded', 'message': str(e)}), 429 + + @app.errorhandler(500) + def internal_error(e): + return jsonify({'error': 'Internal server error'}), 500 + + if __name__ == "__main__": + port = int(os.environ.get("PORT", 5000)) + debug = os.environ.get("FLASK_ENV") == "development" + + app.run( + debug=debug, + host="0.0.0.0", + port=port, + threaded=True + ) + ``` + + **requirements.txt for deployment**: + ```txt + genkit>=0.5.0 + genkit-plugin-google-genai>=0.5.0 + genkit-plugin-flask>=0.5.0 + flask>=2.3.0 + flask-limiter>=3.3.0 + flask-cors>=4.0.0 + gunicorn>=21.0.0 + ``` + + **Procfile for platforms like Heroku**: + ``` + web: gunicorn --bind 0.0.0.0:$PORT --workers 2 --threads 4 --timeout 120 app:app + ``` + + + +## Platform-Specific Deployment + +### Vercel (Node.js) + + + + **vercel.json**: + ```json + { + "version": 2, + "builds": [ + { + "src": "src/index.ts", + "use": "@vercel/node" + } + ], + "routes": [ + { + "src": "/(.*)", + "dest": "src/index.ts" + } + ], + "env": { + "GEMINI_API_KEY": "@gemini-api-key" + } + } + ``` + + **Deploy**: + ```bash + # Install Vercel CLI + npm i -g vercel + + # Deploy + vercel --prod + ``` + + + +### Railway + + + + **railway.json**: + ```json + { + "$schema": "https://railway.app/railway.schema.json", + "build": { + "builder": "NIXPACKS" + }, + "deploy": { + "startCommand": "npm start", + "healthcheckPath": "/health", + "healthcheckTimeout": 100, + "restartPolicyType": "ON_FAILURE", + "restartPolicyMaxRetries": 10 + } + } + ``` + + + **railway.json**: + ```json + { + "$schema": "https://railway.app/railway.schema.json", + "build": { + "builder": "NIXPACKS" + }, + "deploy": { + "startCommand": "./main", + "healthcheckPath": "/health", + "healthcheckTimeout": 100, + "restartPolicyType": "ON_FAILURE", + "restartPolicyMaxRetries": 10 + } + } + ``` + + + **railway.json**: + ```json + { + "$schema": "https://railway.app/railway.schema.json", + "build": { + "builder": "NIXPACKS" + }, + "deploy": { + "startCommand": "gunicorn --bind 0.0.0.0:$PORT app:app", + "healthcheckPath": "/health", + "healthcheckTimeout": 100, + "restartPolicyType": "ON_FAILURE", + "restartPolicyMaxRetries": 10 + } + } + ``` + + + +### Render + + + + **render.yaml**: + ```yaml + services: + - type: web + name: genkit-app + env: node + buildCommand: npm install && npm run build + startCommand: npm start + healthCheckPath: /health + envVars: + - key: NODE_ENV + value: production + - key: GEMINI_API_KEY + sync: false + ``` + + + **render.yaml**: + ```yaml + services: + - type: web + name: genkit-go-app + env: go + buildCommand: go build -o main . + startCommand: ./main + healthCheckPath: /health + envVars: + - key: GEMINI_API_KEY + sync: false + ``` + + + **render.yaml**: + ```yaml + services: + - type: web + name: genkit-python-app + env: python + buildCommand: pip install -r requirements.txt + startCommand: gunicorn --bind 0.0.0.0:$PORT app:app + healthCheckPath: /health + envVars: + - key: FLASK_ENV + value: production + - key: GEMINI_API_KEY + sync: false + ``` + + + +### DigitalOcean App Platform + + + + **.do/app.yaml**: + ```yaml + name: genkit-app + services: + - name: web + source_dir: / + github: + repo: your-username/your-repo + branch: main + run_command: npm start + environment_slug: node-js + instance_count: 1 + instance_size_slug: basic-xxs + health_check: + http_path: /health + envs: + - key: NODE_ENV + value: production + - key: GEMINI_API_KEY + value: ${GEMINI_API_KEY} + type: SECRET + ``` + + + **.do/app.yaml**: + ```yaml + name: genkit-go-app + services: + - name: web + source_dir: / + github: + repo: your-username/your-repo + branch: main + run_command: ./main + environment_slug: go + instance_count: 1 + instance_size_slug: basic-xxs + health_check: + http_path: /health + envs: + - key: GEMINI_API_KEY + value: ${GEMINI_API_KEY} + type: SECRET + ``` + + + **.do/app.yaml**: + ```yaml + name: genkit-python-app + services: + - name: web + source_dir: / + github: + repo: your-username/your-repo + branch: main + run_command: gunicorn --bind 0.0.0.0:$PORT app:app + environment_slug: python + instance_count: 1 + instance_size_slug: basic-xxs + health_check: + http_path: /health + envs: + - key: FLASK_ENV + value: production + - key: GEMINI_API_KEY + value: ${GEMINI_API_KEY} + type: SECRET + ``` + + + +## Environment Configuration + +### Environment Variables + + + + Create `.env.example`: + ```bash + # Required + GEMINI_API_KEY=your_api_key_here + PORT=3000 + + # Optional + NODE_ENV=production + ALLOWED_ORIGINS=https://yourdomain.com,https://www.yourdomain.com + RATE_LIMIT_WINDOW_MS=900000 + RATE_LIMIT_MAX_REQUESTS=100 + ``` + + **Environment validation**: + ```ts + import { z } from 'zod'; + + const envSchema = z.object({ + GEMINI_API_KEY: z.string().min(1, 'GEMINI_API_KEY is required'), + PORT: z.string().default('3000'), + NODE_ENV: z.enum(['development', 'production']).default('development'), + ALLOWED_ORIGINS: z.string().optional(), + }); + + export const env = envSchema.parse(process.env); + ``` + + + **Environment validation**: + ```go + package main + + import ( + "log" + "os" + "strconv" + ) + + type Config struct { + GeminiAPIKey string + Port string + AllowedOrigins []string + RateLimit int + } + + func loadConfig() Config { + apiKey := os.Getenv("GEMINI_API_KEY") + if apiKey == "" { + log.Fatal("GEMINI_API_KEY environment variable is required") + } + + port := os.Getenv("PORT") + if port == "" { + port = "8080" + } + + rateLimitStr := os.Getenv("RATE_LIMIT") + rateLimit := 100 // default + if rateLimitStr != "" { + if parsed, err := strconv.Atoi(rateLimitStr); err == nil { + rateLimit = parsed + } + } + + return Config{ + GeminiAPIKey: apiKey, + Port: port, + RateLimit: rateLimit, + } + } + ``` + + + **Environment validation**: + ```python + import os + from typing import List + + class Config: + def __init__(self): + self.gemini_api_key = self._get_required_env('GEMINI_API_KEY') + self.port = int(os.environ.get('PORT', 5000)) + self.flask_env = os.environ.get('FLASK_ENV', 'development') + self.allowed_origins = self._get_list_env('ALLOWED_ORIGINS', ['http://localhost:3000']) + self.rate_limit = int(os.environ.get('RATE_LIMIT', 100)) + + def _get_required_env(self, key: str) -> str: + value = os.environ.get(key) + if not value: + raise ValueError(f'{key} environment variable is required') + return value + + def _get_list_env(self, key: str, default: List[str]) -> List[str]: + value = os.environ.get(key) + return value.split(',') if value else default + + config = Config() + ``` + + + +## Monitoring and Logging + +### Application Monitoring + + + + ```ts + import winston from 'winston'; + + // Configure logging + const logger = winston.createLogger({ + level: process.env.LOG_LEVEL || 'info', + format: winston.format.combine( + winston.format.timestamp(), + winston.format.errors({ stack: true }), + winston.format.json() + ), + transports: [ + new winston.transports.Console(), + new winston.transports.File({ filename: 'error.log', level: 'error' }), + new winston.transports.File({ filename: 'combined.log' }) + ] + }); + + // Request logging middleware + app.use((req, res, next) => { + const start = Date.now(); + res.on('finish', () => { + const duration = Date.now() - start; + logger.info('Request completed', { + method: req.method, + url: req.url, + status: res.statusCode, + duration, + userAgent: req.get('User-Agent') + }); + }); + next(); + }); + + // Error handling middleware + app.use((err, req, res, next) => { + logger.error('Unhandled error', { + error: err.message, + stack: err.stack, + url: req.url, + method: req.method + }); + res.status(500).json({ error: 'Internal server error' }); + }); + ``` + + + ```go + import ( + "log/slog" + "net/http" + "time" + ) + + // Logging middleware + func loggingMiddleware(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + start := time.Now() + + // Wrap ResponseWriter to capture status code + wrapped := &responseWriter{ResponseWriter: w, statusCode: 200} + + next.ServeHTTP(wrapped, r) + + duration := time.Since(start) + + slog.Info("Request completed", + "method", r.Method, + "url", r.URL.Path, + "status", wrapped.statusCode, + "duration", duration, + "user_agent", r.UserAgent(), + ) + }) + } + + type responseWriter struct { + http.ResponseWriter + statusCode int + } + + func (rw *responseWriter) WriteHeader(code int) { + rw.statusCode = code + rw.ResponseWriter.WriteHeader(code) + } + + // Error recovery middleware + func recoveryMiddleware(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + defer func() { + if err := recover(); err != nil { + slog.Error("Panic recovered", + "error", err, + "url", r.URL.Path, + "method", r.Method, + ) + http.Error(w, "Internal server error", http.StatusInternalServerError) + } + }() + next.ServeHTTP(w, r) + }) + } + ``` + + + ```python + import logging + import time + from flask import g, request + + # Configure logging + logging.basicConfig( + level=logging.INFO, + format='%(asctime)s %(levelname)s %(name)s %(message)s', + handlers=[ + logging.StreamHandler(), + logging.FileHandler('app.log') + ] + ) + + logger = logging.getLogger(__name__) + + # Request logging + @app.before_request + def before_request(): + g.start_time = time.time() + + @app.after_request + def after_request(response): + duration = time.time() - g.start_time + logger.info( + 'Request completed', + extra={ + 'method': request.method, + 'url': request.url, + 'status': response.status_code, + 'duration': duration, + 'user_agent': request.user_agent.string + } + ) + return response + + # Error handling + @app.errorhandler(Exception) + def handle_exception(e): + logger.error( + 'Unhandled exception', + extra={ + 'error': str(e), + 'url': request.url, + 'method': request.method + }, + exc_info=True + ) + return jsonify({'error': 'Internal server error'}), 500 + ``` + + + +## Best Practices + +### Security +1. **Use HTTPS only** in production +2. **Implement rate limiting** to prevent abuse +3. **Validate all inputs** before processing +4. **Use security headers** to prevent common attacks +5. **Keep dependencies updated** regularly + +### Performance +1. **Implement caching** for frequently accessed data +2. **Use connection pooling** for database connections +3. **Optimize bundle size** and startup time +4. **Monitor memory usage** and optimize accordingly +5. **Use CDN** for static assets + +### Reliability +1. **Implement health checks** for monitoring +2. **Use graceful shutdown** handling +3. **Implement circuit breakers** for external services +4. **Set up proper logging** and monitoring +5. **Use environment-specific configurations** + +### Cost Optimization +1. **Choose appropriate instance sizes** for your workload +2. **Implement auto-scaling** based on demand +3. **Monitor resource usage** and optimize +4. **Use spot instances** where appropriate +5. **Implement request timeouts** to prevent resource waste + +## Troubleshooting + +### Common Issues + +1. **Environment variable issues**: + - Verify all required environment variables are set diff --git a/src/content/docs/unified-docs/deployment/authorization.mdx b/src/content/docs/unified-docs/deployment/authorization.mdx new file mode 100644 index 00000000..4ef2e990 --- /dev/null +++ b/src/content/docs/unified-docs/deployment/authorization.mdx @@ -0,0 +1,252 @@ +--- +title: Authorization and Security +description: Learn how to implement robust authorization and security patterns for Genkit flows across different deployment platforms and languages. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +Securing your Genkit flows is crucial for protecting your AI services from unauthorized access and potential abuse. This guide covers comprehensive authorization patterns and security best practices across all supported languages and deployment platforms. + +## Key Security Concepts + +- **Authentication**: Verifying the identity of users or services +- **Authorization**: Determining what authenticated users can access +- **API Key Management**: Secure handling of service credentials +- **Rate Limiting**: Preventing abuse and controlling usage +- **Input Validation**: Ensuring data integrity and preventing attacks +- **Audit Logging**: Tracking access and usage patterns + +## Authentication Strategies + +### API Key Authentication + + + + ```ts + import { genkit, z } from 'genkit'; + import { apiKey } from 'genkit/context'; + import { withContextProvider } from '@genkit-ai/express'; + + const ai = genkit({ + plugins: [googleAI()], + model: googleAI.model('gemini-2.5-flash'), + }); + + // Custom API key validation with user context + const customAuthProvider = async (req: any) => { + const authHeader = req.headers['x-api-key']; + if (!authHeader) { + throw new Error('API key required'); + } + + // Validate API key against database or service + const user = await validateApiKey(authHeader); + if (!user) { + throw new Error('Invalid API key'); + } + + return { + auth: { + userId: user.id, + email: user.email, + plan: user.plan, + rateLimit: user.rateLimit, + }, + }; + }; + + async function validateApiKey(apiKey: string) { + // Implement your API key validation logic + const users = { + 'sk-test-123': { id: 'user1', email: 'user@example.com', plan: 'pro', rateLimit: 1000 }, + 'sk-prod-456': { id: 'user2', email: 'admin@example.com', plan: 'enterprise', rateLimit: 10000 }, + }; + return users[apiKey] || null; + } + + const protectedFlow = withContextProvider( + ai.defineFlow( + { + name: 'protectedFlow', + inputSchema: z.object({ query: z.string() }), + outputSchema: z.object({ response: z.string() }), + }, + async ({ query }, { context }) => { + // Access user context + const user = context.auth; + console.log(`Request from user: ${user.email}, plan: ${user.plan}`); + + const { text } = await ai.generate(query); + return { response: text }; + } + ), + customAuthProvider + ); + ``` + + + ```go + package main + + import ( + "context" + "encoding/json" + "fmt" + "net/http" + ) + + type User struct { + ID string `json:"id"` + Email string `json:"email"` + Plan string `json:"plan"` + RateLimit int `json:"rate_limit"` + } + + type AuthContext struct { + User *User + } + + // API key validation middleware + func apiKeyMiddleware(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + apiKey := r.Header.Get("X-API-Key") + if apiKey == "" { + http.Error(w, "API key required", http.StatusUnauthorized) + return + } + + user, err := validateAPIKey(apiKey) + if err != nil { + http.Error(w, "Invalid API key", http.StatusUnauthorized) + return + } + + // Add user to context + ctx := context.WithValue(r.Context(), "auth", &AuthContext{User: user}) + next.ServeHTTP(w, r.WithContext(ctx)) + }) + } + + func validateAPIKey(apiKey string) (*User, error) { + users := map[string]*User{ + "sk-test-123": { + ID: "user1", Email: "user@example.com", + Plan: "pro", RateLimit: 1000, + }, + "sk-prod-456": { + ID: "user2", Email: "admin@example.com", + Plan: "enterprise", RateLimit: 10000, + }, + } + + if user, exists := users[apiKey]; exists { + return user, nil + } + return nil, fmt.Errorf("invalid API key") + } + + // Protected flow handler + func protectedFlowHandler(flow genkit.Flow) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + // Get auth context + authCtx := r.Context().Value("auth").(*AuthContext) + user := authCtx.User + + fmt.Printf("Request from user: %s, plan: %s\n", user.Email, user.Plan) + + // Call the flow + genkit.Handler(flow).ServeHTTP(w, r) + } + } + ``` + + + ```python + import os + from typing import Optional + from flask import Flask, request + from genkit.ai import Genkit + from genkit.plugins.flask import genkit_flask_handler + from genkit.types import GenkitError + + app = Flask(__name__) + ai = Genkit(...) + + class User: + def __init__(self, id: str, email: str, plan: str, rate_limit: int): + self.id = id + self.email = email + self.plan = plan + self.rate_limit = rate_limit + + def validate_api_key(api_key: str) -> Optional[User]: + """Validate API key and return user information""" + users = { + 'sk-test-123': User('user1', 'user@example.com', 'pro', 1000), + 'sk-prod-456': User('user2', 'admin@example.com', 'enterprise', 10000), + } + return users.get(api_key) + + async def api_key_context_provider(request): + """Context provider for API key authentication""" + api_key = request.headers.get('X-API-Key') + if not api_key: + raise GenkitError(status='UNAUTHENTICATED', message='API key required') + + user = validate_api_key(api_key) + if not user: + raise GenkitError(status='UNAUTHENTICATED', message='Invalid API key') + + return { + 'auth': { + 'user_id': user.id, + 'email': user.email, + 'plan': user.plan, + 'rate_limit': user.rate_limit, + } + } + + @app.post('/protected') + @genkit_flask_handler(ai, context_provider=api_key_context_provider) + @ai.flow() + async def protected_flow(query: str, ctx): + # Access user context + auth = ctx.context.get('auth') + print(f"Request from user: {auth['email']}, plan: {auth['plan']}") + + response = await ai.generate( + on_chunk=ctx.send_chunk, + prompt=query, + ) + return {'response': response.text} + ``` + + + +## Best Practices + +### Security Checklist + +1. **Use HTTPS only** in production environments +2. **Implement proper authentication** for all endpoints +3. **Validate and sanitize all inputs** before processing +4. **Use rate limiting** to prevent abuse +5. **Log security events** for monitoring and auditing +6. **Keep dependencies updated** to patch security vulnerabilities +7. **Use environment variables** for sensitive configuration +8. **Implement proper error handling** without exposing sensitive information + +### Production Security + +- **API Gateway**: Use an API gateway for centralized security policies +- **WAF (Web Application Firewall)**: Protect against common web attacks +- **DDoS Protection**: Implement protection against distributed denial-of-service attacks +- **Security Headers**: Use security headers like HSTS, CSP, and X-Frame-Options +- **Regular Security Audits**: Conduct regular security assessments and penetration testing + +## Next Steps + +- Learn about [Cloud Run deployment](/unified-docs/deployment/cloud-run) for Google Cloud security features +- Explore [Firebase deployment](/unified-docs/deployment/firebase) for integrated authentication +- See [any platform deployment](/unified-docs/deployment/any-platform) for custom security implementations +- Check out [monitoring and observability](/unified-docs/observability-monitoring) for security monitoring diff --git a/src/content/docs/unified-docs/deployment/cloud-run.mdx b/src/content/docs/unified-docs/deployment/cloud-run.mdx new file mode 100644 index 00000000..a816b220 --- /dev/null +++ b/src/content/docs/unified-docs/deployment/cloud-run.mdx @@ -0,0 +1,584 @@ +--- +title: Deploy with Cloud Run +description: Learn how to deploy Genkit flows to Google Cloud Run across JavaScript, Go, and Python with automatic scaling and containerization. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +Google Cloud Run provides a fully managed serverless platform for deploying Genkit flows as containerized applications. Cloud Run automatically scales your applications based on traffic and provides excellent performance with pay-per-use pricing. + +## Key Features + +- **Fully managed**: No infrastructure management required +- **Automatic scaling**: Scales to zero when not in use +- **Container-based**: Deploy any language or runtime +- **Global availability**: Deploy to multiple regions +- **Custom domains**: Support for custom domain mapping +- **Traffic splitting**: Blue-green deployments and gradual rollouts + +## Prerequisites + +- Google Cloud project with billing enabled +- Google Cloud CLI installed and configured +- Docker installed (for custom containers) +- Familiarity with [Genkit flows](/unified-docs/creating-flows) + +## Setup + +### Google Cloud Project Setup + + + + 1. **Set up Google Cloud CLI**: + ```bash + gcloud auth login + gcloud config set project your-project-id + gcloud services enable run.googleapis.com + ``` + + 2. **Create project directory**: + ```bash + mkdir my-genkit-cloudrun-app + cd my-genkit-cloudrun-app + npm init -y + ``` + + 3. **Install dependencies**: + ```bash + npm install genkit @genkit-ai/googleai @genkit-ai/express + npm install --save-dev typescript tsx + ``` + + + 1. **Set up Google Cloud CLI**: + ```bash + gcloud auth login + gcloud config set project your-project-id + gcloud services enable run.googleapis.com + ``` + + 2. **Create Go module**: + ```bash + mkdir my-genkit-cloudrun-app + cd my-genkit-cloudrun-app + go mod init example/cloudrun-app + ``` + + 3. **Install dependencies**: + ```bash + go get github.com/firebase/genkit/go + go get github.com/firebase/genkit/go/plugins/googlegenai + go get github.com/firebase/genkit/go/plugins/server + ``` + + + 1. **Set up Google Cloud CLI**: + ```bash + gcloud auth login + gcloud config set project your-project-id + gcloud services enable run.googleapis.com + ``` + + 2. **Create Python project**: + ```bash + mkdir my-genkit-cloudrun-app + cd my-genkit-cloudrun-app + python -m venv venv + source venv/bin/activate # On Windows: venv\Scripts\activate + ``` + + 3. **Install dependencies**: + ```bash + pip install genkit genkit-plugin-google-genai genkit-plugin-flask flask + ``` + + + +## Implementation + +### Basic Application Setup + + + + Create `src/index.ts`: + + ```ts + import { genkit, z } from 'genkit'; + import { googleAI } from '@genkit-ai/googleai'; + import { startFlowServer } from '@genkit-ai/express'; + + const ai = genkit({ + plugins: [googleAI()], + model: googleAI.model('gemini-2.5-flash'), + }); + + // Define your flows + const generateJokeFlow = ai.defineFlow( + { + name: 'generateJoke', + inputSchema: z.object({ topic: z.string() }), + outputSchema: z.object({ joke: z.string() }), + }, + async ({ topic }) => { + const { text } = await ai.generate( + `Tell a funny joke about ${topic}. Keep it clean and family-friendly.` + ); + return { joke: text }; + } + ); + + const summarizeFlow = ai.defineFlow( + { + name: 'summarize', + inputSchema: z.object({ text: z.string() }), + outputSchema: z.object({ summary: z.string() }), + }, + async ({ text }) => { + const { text: summary } = await ai.generate( + `Summarize the following text in 2-3 sentences: ${text}` + ); + return { summary }; + } + ); + + // Start the flow server + startFlowServer({ + flows: [generateJokeFlow, summarizeFlow], + port: parseInt(process.env.PORT || '3400'), + cors: { + origin: true, // Allow all origins in development + credentials: true, + }, + }); + + console.log(`Server running on port ${process.env.PORT || 3400}`); + ``` + + Create `package.json` scripts: + + ```json + { + "scripts": { + "build": "tsc", + "start": "node lib/index.js", + "dev": "tsx src/index.ts" + } + } + ``` + + Create `tsconfig.json`: + + ```json + { + "compilerOptions": { + "target": "ES2020", + "module": "commonjs", + "outDir": "./lib", + "rootDir": "./src", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "lib"] + } + ``` + + + Create `main.go`: + + ```go + package main + + import ( + "context" + "fmt" + "log" + "net/http" + "os" + + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/googlegenai" + "github.com/firebase/genkit/go/plugins/server" + ) + + func main() { + ctx := context.Background() + + // Initialize Genkit + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlegenai.GoogleAI{}), + genkit.WithDefaultModel("googleai/gemini-2.5-flash"), + ) + if err != nil { + log.Fatalf("failed to initialize Genkit: %v", err) + } + + // Define flows + jokeFlow := genkit.DefineFlow(g, "generateJoke", + func(ctx context.Context, input struct{ Topic string }) (struct{ Joke string }, error) { + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("Tell a funny joke about %s. Keep it clean and family-friendly.", input.Topic), + ) + if err != nil { + return struct{ Joke string }{}, fmt.Errorf("failed to generate joke: %w", err) + } + return struct{ Joke string }{Joke: resp.Text()}, nil + }) + + summarizeFlow := genkit.DefineFlow(g, "summarize", + func(ctx context.Context, input struct{ Text string }) (struct{ Summary string }, error) { + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("Summarize the following text in 2-3 sentences: %s", input.Text), + ) + if err != nil { + return struct{ Summary string }{}, fmt.Errorf("failed to summarize: %w", err) + } + return struct{ Summary string }{Summary: resp.Text()}, nil + }) + + // Set up HTTP routes + mux := http.NewServeMux() + + // Add CORS middleware + corsHandler := func(h http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Access-Control-Allow-Origin", "*") + w.Header().Set("Access-Control-Allow-Methods", "POST, GET, OPTIONS") + w.Header().Set("Access-Control-Allow-Headers", "Content-Type, Authorization") + + if r.Method == "OPTIONS" { + w.WriteHeader(http.StatusOK) + return + } + + h.ServeHTTP(w, r) + }) + } + + mux.Handle("POST /generateJoke", corsHandler(genkit.Handler(jokeFlow))) + mux.Handle("POST /summarize", corsHandler(genkit.Handler(summarizeFlow))) + + // Health check endpoint + mux.HandleFunc("GET /health", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + w.Write([]byte("OK")) + }) + + port := os.Getenv("PORT") + if port == "" { + port = "8080" + } + + log.Printf("Server starting on port %s", port) + log.Fatal(server.Start(ctx, "0.0.0.0:"+port, mux)) + } + ``` + + + Create `main.py`: + + ```python + import os + from flask import Flask + + from genkit.ai import Genkit + from genkit.plugins.flask import genkit_flask_handler + from genkit.plugins.google_genai import GoogleGenAI, google_genai_name + + # Initialize Genkit + ai = Genkit( + plugins=[GoogleGenAI()], + model=google_genai_name('gemini-2.5-flash'), + ) + + app = Flask(__name__) + + @app.post('/generateJoke') + @genkit_flask_handler(ai) + @ai.flow() + async def generate_joke(topic: str, ctx): + response = await ai.generate( + on_chunk=ctx.send_chunk, + prompt=f'Tell a funny joke about {topic}. Keep it clean and family-friendly.', + ) + return {'joke': response.text} + + @app.post('/summarize') + @genkit_flask_handler(ai) + @ai.flow() + async def summarize(text: str, ctx): + response = await ai.generate( + on_chunk=ctx.send_chunk, + prompt=f'Summarize the following text in 2-3 sentences: {text}', + ) + return {'summary': response.text} + + @app.route('/health') + def health_check(): + return 'OK', 200 + + if __name__ == "__main__": + port = int(os.environ.get("PORT", 8080)) + app.run(debug=False, host="0.0.0.0", port=port) + ``` + + Create `requirements.txt`: + + ```txt + genkit + genkit-plugin-google-genai + genkit-plugin-flask + flask + gunicorn + ``` + + + +## Deployment + +### Direct Source Deployment + + + + ```bash + # Deploy directly from source + gcloud run deploy genkit-app \ + --source . \ + --platform managed \ + --region us-central1 \ + --allow-unauthenticated \ + --set-env-vars GOOGLE_CLOUD_PROJECT=your-project-id \ + --update-secrets GEMINI_API_KEY=gemini-api-key:latest + + # Deploy with custom settings + gcloud run deploy genkit-app \ + --source . \ + --platform managed \ + --region us-central1 \ + --memory 1Gi \ + --cpu 1 \ + --timeout 300 \ + --concurrency 100 \ + --min-instances 0 \ + --max-instances 10 \ + --set-env-vars GOOGLE_CLOUD_PROJECT=your-project-id \ + --update-secrets GEMINI_API_KEY=gemini-api-key:latest + ``` + + + ```bash + # Deploy Go application + gcloud run deploy genkit-go-app \ + --source . \ + --platform managed \ + --region us-central1 \ + --allow-unauthenticated \ + --set-env-vars GOOGLE_CLOUD_PROJECT=your-project-id \ + --update-secrets GEMINI_API_KEY=gemini-api-key:latest + + # Deploy with performance optimizations + gcloud run deploy genkit-go-app \ + --source . \ + --platform managed \ + --region us-central1 \ + --memory 512Mi \ + --cpu 1 \ + --timeout 60 \ + --concurrency 1000 \ + --min-instances 1 \ + --max-instances 100 \ + --set-env-vars GOOGLE_CLOUD_PROJECT=your-project-id \ + --update-secrets GEMINI_API_KEY=gemini-api-key:latest + ``` + + + ```bash + # Deploy Python application + gcloud run deploy genkit-python-app \ + --source . \ + --platform managed \ + --region us-central1 \ + --allow-unauthenticated \ + --set-env-vars GOOGLE_CLOUD_PROJECT=your-project-id \ + --update-secrets GEMINI_API_KEY=gemini-api-key:latest + + # Deploy with Gunicorn for production + gcloud run deploy genkit-python-app \ + --source . \ + --platform managed \ + --region us-central1 \ + --memory 1Gi \ + --cpu 2 \ + --timeout 300 \ + --concurrency 80 \ + --set-env-vars GOOGLE_CLOUD_PROJECT=your-project-id \ + --update-secrets GEMINI_API_KEY=gemini-api-key:latest + ``` + + **For production, create a `Procfile`**: + ``` + web: gunicorn --bind :$PORT --workers 1 --threads 8 --timeout 0 main:app + ``` + + + +## Testing Deployed Services + + + + ```bash + # Get service URL + SERVICE_URL=$(gcloud run services describe genkit-app \ + --platform managed \ + --region us-central1 \ + --format 'value(status.url)') + + # Test joke generation + curl -X POST $SERVICE_URL/generateJoke \ + -H "Content-Type: application/json" \ + -d '{"data": {"topic": "programming"}}' + + # Test with authentication + curl -X POST $SERVICE_URL/generateJoke \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $(gcloud auth print-identity-token)" \ + -d '{"data": {"topic": "programming"}}' + ``` + + + ```bash + # Get service URL + SERVICE_URL=$(gcloud run services describe genkit-go-app \ + --platform managed \ + --region us-central1 \ + --format 'value(status.url)') + + # Test joke generation + curl -X POST $SERVICE_URL/generateJoke \ + -H "Content-Type: application/json" \ + -d '{"topic": "programming"}' + + # Test health endpoint + curl $SERVICE_URL/health + ``` + + + ```bash + # Get service URL + SERVICE_URL=$(gcloud run services describe genkit-python-app \ + --platform managed \ + --region us-central1 \ + --format 'value(status.url)') + + # Test joke generation + curl -X POST $SERVICE_URL/generateJoke \ + -H "Content-Type: application/json" \ + -d '{"data": "programming"}' + + # Test streaming response + curl -X POST $SERVICE_URL/generateJoke \ + -H "Content-Type: application/json" \ + -H "Accept: text/event-stream" \ + -d '{"data": "programming"}' + ``` + + + +## Advanced Configuration + +### Traffic Management + +```bash +# Deploy new version without traffic +gcloud run deploy genkit-app \ + --source . \ + --no-traffic \ + --tag staging + +# Gradually shift traffic +gcloud run services update-traffic genkit-app \ + --to-tags staging=10 + +# Complete the rollout +gcloud run services update-traffic genkit-app \ + --to-latest +``` + +### Custom Domains + +```bash +# Map custom domain +gcloud run domain-mappings create \ + --service genkit-app \ + --domain api.yourdomain.com \ + --region us-central1 +``` + +### VPC Connector + +```bash +# Create VPC connector +gcloud compute networks vpc-access connectors create genkit-connector \ + --region us-central1 \ + --subnet default \ + --subnet-project your-project-id + +# Deploy with VPC connector +gcloud run deploy genkit-app \ + --source . \ + --vpc-connector genkit-connector \ + --vpc-egress all-traffic +``` + +## Best Practices + +### Security +1. **Never allow unauthenticated access** in production +2. **Use IAM roles** for service-to-service communication +3. **Store secrets securely** using Secret Manager +4. **Implement rate limiting** to prevent abuse +5. **Use HTTPS only** for all communications + +### Performance +1. **Set appropriate resource limits** based on your workload +2. **Use minimum instances** for consistent performance +3. **Implement health checks** for better reliability +4. **Monitor cold start times** and optimize accordingly +5. **Use connection pooling** for external services + +### Cost Management +1. **Set maximum instances** to control costs +2. **Use appropriate CPU and memory** allocations +3. **Monitor usage** with Cloud Monitoring +4. **Implement request timeout** to prevent runaway costs +5. **Use traffic splitting** for gradual rollouts + +## Troubleshooting + +### Common Issues + +1. **Port binding errors**: + - Ensure your app listens on `0.0.0.0:$PORT` + - Cloud Run sets the PORT environment variable + +2. **Memory issues**: + - Increase memory allocation if needed + - Monitor memory usage in Cloud Monitoring + +3. **Timeout errors**: + - Increase timeout settings for long-running operations + - Implement proper error handling + +4. **Secret access issues**: + - Verify service account permissions + - Check secret names and versions + +## Next Steps + +- Learn about [any platform deployment](/unified-docs/deployment/any-platform) for maximum flexibility +- Explore [authorization patterns](/unified-docs/deployment/authorization) for advanced security +- See [client integration](/unified-docs/deployment/client-access) for building frontend applications +- Check out [monitoring and observability](/unified-docs/observability-monitoring) for production insights diff --git a/src/content/docs/unified-docs/deployment/firebase.mdx b/src/content/docs/unified-docs/deployment/firebase.mdx new file mode 100644 index 00000000..2cc351ac --- /dev/null +++ b/src/content/docs/unified-docs/deployment/firebase.mdx @@ -0,0 +1,949 @@ +--- +title: Deploy with Firebase +description: Learn how to deploy Genkit flows as Cloud Functions for Firebase across JavaScript, Go, and Python with built-in authentication and authorization. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +Firebase Cloud Functions provides a serverless platform for deploying Genkit flows with built-in authentication, authorization, and scaling. This deployment option is ideal for applications that need Firebase's ecosystem features like Authentication, Firestore, and App Check. + +## Key Features + +- **Serverless**: Automatic scaling with pay-per-use pricing +- **Built-in Authentication**: Firebase Auth integration +- **App Check**: Client integrity verification +- **Firebase Ecosystem**: Seamless integration with other Firebase services +- **Global CDN**: Automatic global distribution + +## Prerequisites + +- Firebase project with Blaze plan (required for Cloud Functions) +- Firebase CLI installed and configured +- Familiarity with [Genkit flows](/unified-docs/creating-flows) + +## Setup + +### Firebase Project Setup + + + + 1. **Create or select a Firebase project**: + ```bash + firebase login + firebase projects:list + ``` + + 2. **Initialize Firebase in your project**: + ```bash + mkdir my-genkit-firebase-app + cd my-genkit-firebase-app + firebase init genkit + ``` + + 3. **Install dependencies**: + ```bash + cd functions + npm install genkit @genkit-ai/googleai firebase-functions + ``` + + + Go doesn't have direct Firebase Functions support, but you can deploy Go applications to Cloud Run and integrate with Firebase services: + + 1. **Set up Firebase project**: + ```bash + firebase login + firebase init + ``` + + 2. **Create Go module**: + ```bash + mkdir my-genkit-go-app + cd my-genkit-go-app + go mod init example/firebase-app + go get github.com/firebase/genkit/go + ``` + + 3. **Install Firebase Admin SDK**: + ```bash + go get firebase.google.com/go/v4 + ``` + + + Python doesn't have direct Firebase Functions support, but you can deploy Python applications to Cloud Run and integrate with Firebase services: + + 1. **Set up Firebase project**: + ```bash + firebase login + firebase init + ``` + + 2. **Create Python project**: + ```bash + mkdir my-genkit-python-app + cd my-genkit-python-app + python -m venv venv + source venv/bin/activate # On Windows: venv\Scripts\activate + ``` + + 3. **Install dependencies**: + ```bash + pip install genkit genkit-plugin-google-genai firebase-admin + ``` + + + +## Implementation + +### Basic Flow Setup + + + + Create your flow in `functions/src/index.ts`: + + ```ts + import { genkit, z } from 'genkit'; + import { googleAI } from '@genkit-ai/googleai'; + import { onCallGenkit, hasClaim } from 'firebase-functions/https'; + import { defineSecret } from 'firebase-functions/params'; + + // Define secrets for API keys + const apiKey = defineSecret('GEMINI_API_KEY'); + + const ai = genkit({ + plugins: [googleAI()], + model: googleAI.model('gemini-2.5-flash'), + }); + + // Define your flow + const generatePoemFlow = ai.defineFlow( + { + name: 'generatePoem', + inputSchema: z.object({ subject: z.string() }), + outputSchema: z.object({ poem: z.string() }), + }, + async ({ subject }) => { + const { text } = await ai.generate(`Compose a poem about ${subject}.`); + return { poem: text }; + } + ); + + // Export as Cloud Function + export const generatePoem = onCallGenkit( + { + secrets: [apiKey], + authPolicy: hasClaim('email_verified'), + enforceAppCheck: true, + }, + generatePoemFlow + ); + ``` + + + Create your application with Firebase integration: + + ```go + package main + + import ( + "context" + "log" + "net/http" + "os" + + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/googlegenai" + "github.com/firebase/genkit/go/plugins/server" + "firebase.google.com/go/v4" + "firebase.google.com/go/v4/auth" + ) + + func main() { + ctx := context.Background() + + // Initialize Firebase + app, err := firebase.NewApp(ctx, nil) + if err != nil { + log.Fatalf("error initializing app: %v", err) + } + + authClient, err := app.Auth(ctx) + if err != nil { + log.Fatalf("error getting Auth client: %v", err) + } + + // Initialize Genkit + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlegenai.GoogleAI{}), + genkit.WithDefaultModel("googleai/gemini-2.5-flash"), + ) + if err != nil { + log.Fatalf("failed to initialize Genkit: %v", err) + } + + // Define flow with Firebase Auth + poemFlow := genkit.DefineFlow(g, "generatePoem", + func(ctx context.Context, input struct{ Subject string }) (struct{ Poem string }, error) { + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("Compose a poem about %s", input.Subject), + ) + if err != nil { + return struct{ Poem string }{}, err + } + return struct{ Poem string }{Poem: resp.Text()}, nil + }) + + // Create HTTP handler with Firebase Auth middleware + mux := http.NewServeMux() + mux.HandleFunc("POST /generatePoem", func(w http.ResponseWriter, r *http.Request) { + // Verify Firebase ID token + idToken := r.Header.Get("Authorization") + if idToken == "" { + http.Error(w, "Unauthorized", http.StatusUnauthorized) + return + } + + token, err := authClient.VerifyIDToken(ctx, idToken) + if err != nil { + http.Error(w, "Invalid token", http.StatusUnauthorized) + return + } + + // Check if email is verified + if !token.Claims["email_verified"].(bool) { + http.Error(w, "Email not verified", http.StatusForbidden) + return + } + + // Call the flow + genkit.Handler(poemFlow)(w, r) + }) + + log.Fatal(server.Start(ctx, "0.0.0.0:"+os.Getenv("PORT"), mux)) + } + ``` + + + Create your application with Firebase integration: + + ```python + import os + from flask import Flask, request, jsonify + import firebase_admin + from firebase_admin import auth, credentials + + from genkit.ai import Genkit + from genkit.plugins.flask import genkit_flask_handler + from genkit.plugins.google_genai import GoogleGenAI, google_genai_name + + # Initialize Firebase Admin + if not firebase_admin._apps: + cred = credentials.ApplicationDefault() + firebase_admin.initialize_app(cred) + + # Initialize Genkit + ai = Genkit( + plugins=[GoogleGenAI()], + model=google_genai_name('gemini-2.5-flash'), + ) + + app = Flask(__name__) + + def verify_firebase_token(request): + """Middleware to verify Firebase ID token""" + auth_header = request.headers.get('Authorization') + if not auth_header or not auth_header.startswith('Bearer '): + return None + + id_token = auth_header.split('Bearer ')[1] + try: + decoded_token = auth.verify_id_token(id_token) + return decoded_token + except Exception: + return None + + async def firebase_context_provider(request): + """Context provider for Firebase authentication""" + token = verify_firebase_token(request) + if not token: + return {'auth': None} + + return { + 'auth': { + 'uid': token['uid'], + 'email': token.get('email'), + 'email_verified': token.get('email_verified', False) + } + } + + @app.post('/generatePoem') + @genkit_flask_handler(ai, context_provider=firebase_context_provider) + @ai.flow() + async def generate_poem(subject: str, ctx): + # Check authentication + if not ctx.context.get('auth'): + raise Exception('Unauthorized') + + if not ctx.context['auth'].get('email_verified'): + raise Exception('Email not verified') + + response = await ai.generate( + on_chunk=ctx.send_chunk, + prompt=f'Compose a poem about {subject}.', + ) + return {'poem': response.text} + + if __name__ == "__main__": + app.run(debug=True, host="0.0.0.0", port=int(os.environ.get("PORT", 8080))) + ``` + + + +## Authentication and Authorization + +### Firebase Authentication + + + + ```ts + import { onCallGenkit, hasClaim, signedIn } from 'firebase-functions/https'; + + // Require signed-in users + export const protectedFlow = onCallGenkit( + { + authPolicy: signedIn(), + }, + myFlow + ); + + // Require email verification + export const verifiedFlow = onCallGenkit( + { + authPolicy: hasClaim('email_verified'), + }, + myFlow + ); + + // Custom authorization logic + export const adminFlow = onCallGenkit( + { + authPolicy: (auth) => { + return auth?.token?.email_verified && + auth?.token?.admin === true; + }, + }, + myFlow + ); + + // Access auth context in flow + const contextAwareFlow = ai.defineFlow( + { + name: 'contextAware', + inputSchema: z.object({ data: z.string() }), + }, + async (input, { context }) => { + const userId = context.auth?.uid; + const isAdmin = context.auth?.token?.admin; + + // Use auth context in your logic + return { result: `Hello ${userId}` }; + } + ); + ``` + + + ```go + // Middleware for Firebase Auth verification + func firebaseAuthMiddleware(authClient *auth.Client) func(http.Handler) http.Handler { + return func(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + authHeader := r.Header.Get("Authorization") + if authHeader == "" { + http.Error(w, "Missing Authorization header", http.StatusUnauthorized) + return + } + + // Extract token from "Bearer " + parts := strings.Split(authHeader, " ") + if len(parts) != 2 || parts[0] != "Bearer" { + http.Error(w, "Invalid Authorization header format", http.StatusUnauthorized) + return + } + + idToken := parts[1] + token, err := authClient.VerifyIDToken(r.Context(), idToken) + if err != nil { + http.Error(w, "Invalid token", http.StatusUnauthorized) + return + } + + // Add user info to context + ctx := context.WithValue(r.Context(), "user", token) + next.ServeHTTP(w, r.WithContext(ctx)) + }) + } + } + + // Use in your handlers + mux.Handle("POST /protectedFlow", + firebaseAuthMiddleware(authClient)(genkit.Handler(protectedFlow))) + + // Access user context in flow + protectedFlow := genkit.DefineFlow(g, "protected", + func(ctx context.Context, input string) (string, error) { + user := ctx.Value("user").(*auth.Token) + userID := user.UID + + // Use user context in your logic + return fmt.Sprintf("Hello %s", userID), nil + }) + ``` + + + ```python + from genkit.types import GenkitError + + async def firebase_auth_context_provider(request): + """Enhanced context provider with role checking""" + token = verify_firebase_token(request) + if not token: + return {'auth': None} + + # Get custom claims for roles + user_record = auth.get_user(token['uid']) + custom_claims = user_record.custom_claims or {} + + return { + 'auth': { + 'uid': token['uid'], + 'email': token.get('email'), + 'email_verified': token.get('email_verified', False), + 'admin': custom_claims.get('admin', False), + 'roles': custom_claims.get('roles', []) + } + } + + @app.post('/adminFlow') + @genkit_flask_handler(ai, context_provider=firebase_auth_context_provider) + @ai.flow() + async def admin_flow(data: str, ctx): + # Check authentication and authorization + auth = ctx.context.get('auth') + if not auth: + raise GenkitError(status='UNAUTHENTICATED', message='Authentication required') + + if not auth.get('email_verified'): + raise GenkitError(status='PERMISSION_DENIED', message='Email verification required') + + if not auth.get('admin'): + raise GenkitError(status='PERMISSION_DENIED', message='Admin access required') + + # Flow logic here + return await ai.generate(prompt=f'Admin task: {data}') + ``` + + + +### App Check Integration + + + + ```ts + export const secureFlow = onCallGenkit( + { + // Enable App Check enforcement + enforceAppCheck: true, + + // Optional: Consume tokens for replay protection + consumeAppCheckToken: true, + + // Combine with authentication + authPolicy: signedIn(), + }, + myFlow + ); + ``` + + **Client-side App Check setup**: + ```html + + ``` + + + ```go + // App Check verification middleware + func appCheckMiddleware() func(http.Handler) http.Handler { + return func(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + appCheckToken := r.Header.Get("X-Firebase-AppCheck") + if appCheckToken == "" { + http.Error(w, "Missing App Check token", http.StatusUnauthorized) + return + } + + // Verify App Check token with Firebase Admin SDK + // Implementation depends on your specific setup + + next.ServeHTTP(w, r) + }) + } + } + + // Apply to protected routes + mux.Handle("POST /secureFlow", + appCheckMiddleware()( + firebaseAuthMiddleware(authClient)( + genkit.Handler(secureFlow)))) + ``` + + + ```python + from firebase_admin import app_check + + def verify_app_check_token(request): + """Verify Firebase App Check token""" + app_check_token = request.headers.get('X-Firebase-AppCheck') + if not app_check_token: + return False + + try: + # Verify the App Check token + app_check.verify_token(app_check_token) + return True + except Exception: + return False + + async def secure_context_provider(request): + """Context provider with App Check verification""" + # Verify App Check first + if not verify_app_check_token(request): + raise GenkitError(status='PERMISSION_DENIED', message='App Check verification failed') + + # Then verify Firebase Auth + token = verify_firebase_token(request) + if not token: + raise GenkitError(status='UNAUTHENTICATED', message='Authentication required') + + return { + 'auth': { + 'uid': token['uid'], + 'email': token.get('email'), + 'email_verified': token.get('email_verified', False) + }, + 'app_check_verified': True + } + ``` + + + +## Secrets Management + + + + **Set up secrets**: + ```bash + # Store API key in Firebase Functions secrets + firebase functions:secrets:set GEMINI_API_KEY + ``` + + **Use in functions**: + ```ts + import { defineSecret } from 'firebase-functions/params'; + + const geminiApiKey = defineSecret('GEMINI_API_KEY'); + const openaiApiKey = defineSecret('OPENAI_API_KEY'); + + export const multiModelFlow = onCallGenkit( + { + secrets: [geminiApiKey, openaiApiKey], + authPolicy: signedIn(), + }, + myFlow + ); + ``` + + + **Use Google Secret Manager**: + ```go + import ( + secretmanager "cloud.google.com/go/secretmanager/apiv1" + "cloud.google.com/go/secretmanager/apiv1/secretmanagerpb" + ) + + func getSecret(ctx context.Context, projectID, secretID string) (string, error) { + client, err := secretmanager.NewClient(ctx) + if err != nil { + return "", err + } + defer client.Close() + + req := &secretmanagerpb.AccessSecretVersionRequest{ + Name: fmt.Sprintf("projects/%s/secrets/%s/versions/latest", projectID, secretID), + } + + result, err := client.AccessSecretVersion(ctx, req) + if err != nil { + return "", err + } + + return string(result.Payload.Data), nil + } + + // Use in your application + func main() { + ctx := context.Background() + + apiKey, err := getSecret(ctx, "your-project-id", "gemini-api-key") + if err != nil { + log.Fatal(err) + } + + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlegenai.GoogleAI{APIKey: apiKey}), + ) + } + ``` + + + **Use Google Secret Manager**: + ```python + from google.cloud import secretmanager + + def get_secret(project_id: str, secret_id: str) -> str: + """Retrieve secret from Google Secret Manager""" + client = secretmanager.SecretManagerServiceClient() + name = f"projects/{project_id}/secrets/{secret_id}/versions/latest" + + response = client.access_secret_version(request={"name": name}) + return response.payload.data.decode("UTF-8") + + # Use in your application + GEMINI_API_KEY = get_secret("your-project-id", "gemini-api-key") + + ai = Genkit( + plugins=[GoogleGenAI(api_key=GEMINI_API_KEY)], + model=google_genai_name('gemini-2.5-flash'), + ) + ``` + + + +## Deployment + + + + **Deploy to Firebase Functions**: + ```bash + # Deploy all functions + firebase deploy --only functions + + # Deploy specific function + firebase deploy --only functions:generatePoem + + # Deploy with environment + firebase use production + firebase deploy --only functions + ``` + + **Test deployed function**: + ```bash + # Get function URL + firebase functions:config:get + + # Test with curl + curl -X POST https://your-region-your-project.cloudfunctions.net/generatePoem \ + -H "Authorization: Bearer $(firebase auth:print-access-token)" \ + -H "Content-Type: application/json" \ + -d '{"data": {"subject": "mountains"}}' + ``` + + + **Deploy to Cloud Run with Firebase integration**: + ```bash + # Build and deploy + gcloud run deploy genkit-firebase-app \ + --source . \ + --platform managed \ + --region us-central1 \ + --allow-unauthenticated \ + --set-env-vars GOOGLE_CLOUD_PROJECT=your-project-id + + # Test deployed service + curl -X POST https://genkit-firebase-app-hash-uc.a.run.app/generatePoem \ + -H "Authorization: Bearer $(gcloud auth print-identity-token)" \ + -H "Content-Type: application/json" \ + -d '{"subject": "mountains"}' + ``` + + + **Deploy to Cloud Run with Firebase integration**: + ```bash + # Create requirements.txt + echo "genkit + genkit-plugin-google-genai + genkit-plugin-flask + firebase-admin + flask" > requirements.txt + + # Deploy + gcloud run deploy genkit-firebase-app \ + --source . \ + --platform managed \ + --region us-central1 \ + --allow-unauthenticated \ + --set-env-vars GOOGLE_CLOUD_PROJECT=your-project-id + + # Test deployed service + curl -X POST https://genkit-firebase-app-hash-uc.a.run.app/generatePoem \ + -H "Authorization: Bearer $(gcloud auth print-identity-token)" \ + -H "Content-Type: application/json" \ + -d '{"data": "mountains"}' + ``` + + + +## Client Integration + +### Web Client + + + + ```html + + + + Genkit Firebase App + + + + + + + + + ``` + + + ```javascript + // Client-side JavaScript for Go backend + import { initializeApp } from 'firebase/app'; + import { getAuth, signInWithPopup, GoogleAuthProvider } from 'firebase/auth'; + + const app = initializeApp(firebaseConfig); + const auth = getAuth(app); + + async function callGenkitFlow(subject) { + const user = auth.currentUser; + if (!user) { + throw new Error('User not authenticated'); + } + + const idToken = await user.getIdToken(); + + const response = await fetch('https://your-cloud-run-url/generatePoem', { + method: 'POST', + headers: { + 'Authorization': `Bearer ${idToken}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ subject }), + }); + + if (!response.ok) { + throw new Error('Request failed'); + } + + return response.json(); + } + ``` + + + ```javascript + // Client-side JavaScript for Python backend + import { initializeApp } from 'firebase/app'; + import { getAuth, signInWithPopup, GoogleAuthProvider } from 'firebase/auth'; + + const app = initializeApp(firebaseConfig); + const auth = getAuth(app); + + async function callGenkitFlow(subject) { + const user = auth.currentUser; + if (!user) { + throw new Error('User not authenticated'); + } + + const idToken = await user.getIdToken(); + + const response = await fetch('https://your-cloud-run-url/generatePoem', { + method: 'POST', + headers: { + 'Authorization': `Bearer ${idToken}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ data: subject }), + }); + + if (!response.ok) { + throw new Error('Request failed'); + } + + return response.json(); + } + ``` + + + +## Local Development + + + + **Firebase Emulator Suite**: + ```bash + # Start emulators with Genkit + genkit start -- firebase emulators:start --inspect-functions + + # Or start separately + firebase emulators:start + genkit start -- npm run serve + ``` + + **Environment setup**: + ```bash + # Set up local environment + export GEMINI_API_KEY=your-api-key + export GOOGLE_CLOUD_PROJECT=your-project-id + ``` + + + **Local development**: + ```bash + # Set up environment + export GOOGLE_CLOUD_PROJECT=your-project-id + export GEMINI_API_KEY=your-api-key + gcloud auth application-default login + + # Start with Genkit dev UI + genkit start -- go run . + ``` + + + **Local development**: + ```bash + # Set up environment + export GOOGLE_CLOUD_PROJECT=your-project-id + export GEMINI_API_KEY=your-api-key + export GOOGLE_APPLICATION_CREDENTIALS=path/to/service-account.json + + # Start with Genkit dev UI + genkit start -- python main.py + ``` + + + +## Best Practices + +### Security +1. **Always use authentication** for production deployments +2. **Enable App Check** for client integrity verification +3. **Use Firebase Security Rules** for Firestore access control +4. **Store secrets securely** using Firebase Functions secrets or Secret Manager +5. **Validate input data** in your flows + +### Performance +1. **Optimize cold starts** by minimizing dependencies +2. **Use connection pooling** for database connections +3. **Implement caching** for frequently accessed data +4. **Monitor function performance** using Firebase Performance Monitoring + +### Cost Management +1. **Set up billing alerts** to monitor costs +2. **Use appropriate timeout values** for functions +3. **Implement rate limiting** to prevent abuse +4. **Monitor function invocations** and optimize accordingly + +## Troubleshooting + +### Common Issues + +1. **Authentication errors**: + - Verify Firebase project configuration + - Check that users have verified emails + - Ensure proper token handling + +2. **App Check failures**: + - Verify reCAPTCHA configuration + - Check App Check token generation + - Ensure proper client-side setup + +3. **Secret access issues**: + - Verify secret names and permissions + - Check service account roles + - Ensure secrets are properly configured + +## Next Steps + +- Learn about [Cloud Run deployment](/unified-docs/deployment/cloud-run) for more flexible hosting +- Explore [authorization patterns](/unified-docs/deployment/authorization) for advanced security +- See [client integration](/unified-docs/deployment/client-access) for building frontend applications +- Check out [monitoring and observability](/unified-docs/observability-monitoring) for production insights diff --git a/src/content/docs/unified-docs/model-context-protocol.mdx b/src/content/docs/unified-docs/model-context-protocol.mdx new file mode 100644 index 00000000..f6d13d31 --- /dev/null +++ b/src/content/docs/unified-docs/model-context-protocol.mdx @@ -0,0 +1,721 @@ +--- +title: Model Context Protocol (MCP) +description: Learn how to extend Genkit's capabilities using the Model Context Protocol to connect with external tools, resources, and data sources across JavaScript, Go, and Python. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +The Model Context Protocol (MCP) is an open standard that enables AI applications to securely connect with external tools, resources, and data sources. With Genkit's MCP integration, you can: + +- **Access external tools and data** from MCP servers as a client +- **Expose Genkit capabilities** as an MCP server for other applications +- **Build extensible AI workflows** that leverage external services and APIs +- **Create reusable tool ecosystems** that work across different AI applications + +MCP bridges the gap between your AI models and the external world, enabling more powerful and context-aware applications. + +## Core Concepts + +### MCP Servers and Clients + +- **MCP Server**: Provides tools, resources, and prompts that can be consumed by AI applications +- **MCP Client**: Consumes capabilities from MCP servers (your Genkit application acts as a client) +- **Tools**: Functions that the AI can call to perform actions or retrieve information +- **Resources**: Static or dynamic data sources that provide context to the AI +- **Prompts**: Reusable prompt templates with parameters + +### How MCP Enhances AI Workflows + +MCP enables your AI applications to: + +1. **Access real-time data** from external APIs and databases +2. **Perform actions** in external systems (file operations, API calls, etc.) +3. **Leverage specialized tools** without implementing them from scratch +4. **Share capabilities** between different AI applications +5. **Maintain security** through controlled access to external resources + +## Setting Up MCP with Genkit + + + + Install the MCP plugin: + + ```bash + npm install genkit @genkit-ai/mcp + ``` + + Basic setup with multiple MCP servers: + + ```ts + import { genkit } from 'genkit'; + import { googleAI } from '@genkit-ai/googleai'; + import { createMcpHost } from '@genkit-ai/mcp'; + + const ai = genkit({ + plugins: [googleAI()], + }); + + // Create MCP host to manage multiple servers + const mcpHost = createMcpHost({ + name: 'myMcpClients', + mcpServers: { + // Filesystem operations + fs: { + command: 'npx', + args: ['-y', '@modelcontextprotocol/server-filesystem', process.cwd()], + }, + // Memory/context management + memory: { + command: 'npx', + args: ['-y', '@modelcontextprotocol/server-memory'], + }, + // Remote HTTP server + weather: { + url: 'https://api.weather.com/mcp', + headers: { + 'Authorization': 'Bearer your-api-key', + }, + }, + }, + }); + ``` + + + Import the MCP package: + + ```bash + go get github.com/firebase/genkit/go/plugins/mcp + ``` + + Basic setup: + + ```go + package main + + import ( + "context" + "log" + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/mcp" + ) + + func main() { + ctx := context.Background() + g, err := genkit.Init(ctx) + if err != nil { + log.Fatal(err) + } + + // Create MCP manager for multiple servers + manager, err := mcp.NewMCPManager(mcp.MCPManagerOptions{ + Name: "my-app", + MCPServers: []mcp.MCPServerConfig{ + { + Name: "filesystem", + Config: mcp.MCPClientOptions{ + Name: "fs-server", + Stdio: &mcp.StdioConfig{ + Command: "npx", + Args: []string{"-y", "@modelcontextprotocol/server-filesystem", "."}, + }, + }, + }, + { + Name: "time", + Config: mcp.MCPClientOptions{ + Name: "time-server", + Stdio: &mcp.StdioConfig{ + Command: "uvx", + Args: []string{"mcp-server-time"}, + }, + }, + }, + }, + }) + if err != nil { + log.Fatal(err) + } + } + ``` + + + Install the MCP plugin: + + ```bash + pip install genkit-mcp + ``` + + Basic setup: + + ```python + import asyncio + from genkit.ai import Genkit + from genkit.plugins.mcp import MCPPlugin + + async def main(): + ai = Genkit( + plugins=[ + MCPPlugin( + name="my-mcp-client", + servers={ + "filesystem": { + "command": "npx", + "args": ["-y", "@modelcontextprotocol/server-filesystem", "."], + }, + "memory": { + "command": "npx", + "args": ["-y", "@modelcontextprotocol/server-memory"], + }, + "weather": { + "url": "https://api.weather.com/mcp", + "headers": { + "Authorization": "Bearer your-api-key", + }, + }, + }, + ), + ], + ) + + if __name__ == "__main__": + asyncio.run(main()) + ``` + + + +## Using MCP in AI Workflows + +### Accessing External Tools + +MCP servers provide tools that your AI can use just like any other Genkit tool: + + + + ```ts + // Get all available tools from connected MCP servers + const mcpTools = await mcpHost.getActiveTools(ai); + + // Use MCP tools in generation + const response = await ai.generate({ + model: googleAI.model('gemini-2.5-flash'), + prompt: 'Analyze the files in the current directory and remember key findings for later.', + tools: mcpTools, + }); + + // Get tools from specific servers only + const fsTools = await mcpHost.getActiveTools(ai, ['fs']); + const memoryTools = await mcpHost.getActiveTools(ai, ['memory']); + + const response2 = await ai.generate({ + prompt: 'Read the README file and store its contents in memory.', + tools: [...fsTools, ...memoryTools], + }); + ``` + + + ```go + // Get all tools from all connected servers + tools, err := manager.GetActiveTools(ctx, g) + if err != nil { + log.Fatal(err) + } + + // Use tools in generation + resp, err := genkit.Generate(ctx, g, + ai.WithModel(myModel), + ai.WithPrompt("What time is it and what files are in the current directory?"), + ai.WithTools(tools...), + ) + if err != nil { + log.Fatal(err) + } + + // Get tools from a specific server + timeTool, err := manager.GetTool(ctx, g, "time", "get_current_time") + if err != nil { + log.Fatal(err) + } + ``` + + + ```python + # Get all available tools + tools = await mcp_plugin.get_active_tools() + + # Use tools in generation + response = await ai.generate( + model="googleai/gemini-2.5-flash", + prompt="What time is it and what files are in the current directory?", + tools=tools, + ) + + # Get tools from specific servers + fs_tools = await mcp_plugin.get_tools_from_server("filesystem") + memory_tools = await mcp_plugin.get_tools_from_server("memory") + + response = await ai.generate( + prompt="Read files and remember important information", + tools=fs_tools + memory_tools, + ) + ``` + + + +### Working with Resources + +MCP resources provide contextual information that can enhance your AI's understanding: + + + + ```ts + // Get resources from MCP servers + const resources = await mcpHost.getActiveResources(ai); + + // Use resources to provide context + const response = await ai.generate({ + prompt: 'Based on the available system information, recommend optimizations.', + resources: resources, + }); + + // Access specific resources + const systemInfo = await mcpHost.getResource('system', 'system://info'); + ``` + + + ```go + // Get resources from MCP servers + resources, err := manager.GetActiveResources(ctx, g) + if err != nil { + log.Fatal(err) + } + + // Use resources in generation + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt("Analyze the system information and provide recommendations"), + ai.WithResources(resources...), + ) + ``` + + + ```python + # Get resources from MCP servers + resources = await mcp_plugin.get_active_resources() + + # Use resources in generation + response = await ai.generate( + prompt="Analyze the system information and provide recommendations", + resources=resources, + ) + ``` + + + +### Using MCP Prompts + +MCP servers can provide reusable prompt templates: + + + + ```ts + // Get a prompt from an MCP server + const analysisPrompt = await mcpHost.getPrompt('memory', 'analyze_data'); + + // Use the prompt with parameters + const response = await analysisPrompt({ + data: 'user interaction logs', + focus: 'user behavior patterns' + }); + ``` + + + ```go + // Get prompt from specific server + prompt, err := manager.GetPrompt(ctx, g, "memory", "analyze_data", map[string]any{ + "data": "user interaction logs", + "focus": "user behavior patterns", + }) + if err != nil { + log.Fatal(err) + } + ``` + + + ```python + # Get prompt from specific server + prompt = await mcp_plugin.get_prompt("memory", "analyze_data") + + # Use the prompt with parameters + response = await prompt({ + "data": "user interaction logs", + "focus": "user behavior patterns" + }) + ``` + + + +## Building Flows with MCP + +### Example: Document Analysis Workflow + + + + ```ts + const documentAnalysisFlow = ai.defineFlow( + { + name: 'documentAnalysis', + inputSchema: z.object({ + directory: z.string(), + analysisType: z.string(), + }), + outputSchema: z.object({ + summary: z.string(), + insights: z.array(z.string()), + recommendations: z.array(z.string()), + }), + }, + async ({ directory, analysisType }) => { + // Get MCP tools for file operations and memory + const tools = await mcpHost.getActiveTools(ai); + + // Step 1: Analyze documents in the directory + const analysisResult = await ai.generate({ + prompt: `Analyze all documents in ${directory} for ${analysisType}. + Read each file and extract key information.`, + tools: tools, + }); + + // Step 2: Store findings in memory for later reference + await ai.generate({ + prompt: `Store the following analysis results in memory: ${analysisResult.text}`, + tools: tools, + }); + + // Step 3: Generate insights and recommendations + const insights = await ai.generate({ + prompt: `Based on the document analysis, provide key insights and actionable recommendations.`, + tools: tools, + }); + + return { + summary: analysisResult.text, + insights: insights.text.split('\n').filter(line => line.trim()), + recommendations: [], // Parse from insights + }; + } + ); + ``` + + + ```go + type DocumentAnalysisInput struct { + Directory string `json:"directory"` + AnalysisType string `json:"analysis_type"` + } + + type DocumentAnalysisOutput struct { + Summary string `json:"summary"` + Insights []string `json:"insights"` + Recommendations []string `json:"recommendations"` + } + + documentAnalysisFlow := genkit.DefineFlow(g, "documentAnalysis", + func(ctx context.Context, input DocumentAnalysisInput) (DocumentAnalysisOutput, error) { + // Get MCP tools + tools, err := manager.GetActiveTools(ctx, g) + if err != nil { + return DocumentAnalysisOutput{}, err + } + + // Analyze documents + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt(fmt.Sprintf("Analyze all documents in %s for %s", + input.Directory, input.AnalysisType)), + ai.WithTools(tools...), + ) + if err != nil { + return DocumentAnalysisOutput{}, err + } + + return DocumentAnalysisOutput{ + Summary: resp.Text(), + Insights: []string{}, // Parse from response + Recommendations: []string{}, // Parse from response + }, nil + }) + ``` + + + ```python + @ai.flow() + async def document_analysis_flow(directory: str, analysis_type: str): + # Get MCP tools + tools = await mcp_plugin.get_active_tools() + + # Step 1: Analyze documents + analysis_result = await ai.generate( + prompt=f"Analyze all documents in {directory} for {analysis_type}. " + f"Read each file and extract key information.", + tools=tools, + ) + + # Step 2: Store findings in memory + await ai.generate( + prompt=f"Store the following analysis results in memory: {analysis_result.text}", + tools=tools, + ) + + # Step 3: Generate insights + insights = await ai.generate( + prompt="Based on the document analysis, provide key insights and recommendations.", + tools=tools, + ) + + return { + "summary": analysis_result.text, + "insights": insights.text.split('\n'), + "recommendations": [] # Parse from insights + } + ``` + + + +## Advanced MCP Patterns + +### Dynamic Server Management + + + + ```ts + // Connect to servers dynamically based on user needs + const connectWeatherServer = async (apiKey: string) => { + await mcpHost.connect('weather', { + url: 'https://api.weather.com/mcp', + headers: { 'Authorization': `Bearer ${apiKey}` }, + }); + }; + + // Disconnect when no longer needed + const disconnectWeatherServer = async () => { + await mcpHost.disconnect('weather'); + }; + + // Check server status + const activeServers = await mcpHost.getActiveServers(); + console.log('Connected servers:', activeServers); + ``` + + + ```go + // Connect to server dynamically + err = manager.Connect("weather", mcp.MCPClientOptions{ + Name: "weather-server", + HTTP: &mcp.HTTPConfig{ + URL: "https://api.weather.com/mcp", + Headers: map[string]string{ + "Authorization": "Bearer " + apiKey, + }, + }, + }) + if err != nil { + log.Fatal(err) + } + + // Disconnect when done + err = manager.Disconnect("weather") + if err != nil { + log.Fatal(err) + } + ``` + + + ```python + # Connect to server dynamically + await mcp_plugin.connect_server("weather", { + "url": "https://api.weather.com/mcp", + "headers": { + "Authorization": f"Bearer {api_key}", + }, + }) + + # Disconnect server + await mcp_plugin.disconnect_server("weather") + + # Check active servers + active_servers = await mcp_plugin.get_active_servers() + print(f"Active servers: {active_servers}") + ``` + + + +### Error Handling and Resilience + + + + ```ts + const robustMcpFlow = ai.defineFlow( + { + name: 'robustMcpFlow', + inputSchema: z.object({ task: z.string() }), + outputSchema: z.string(), + }, + async ({ task }) => { + try { + // Wait for MCP connections to be ready + await mcpHost.ready(); + + // Get available tools with fallback + const tools = await mcpHost.getActiveTools(ai); + + if (tools.length === 0) { + return 'No MCP tools available, proceeding with basic capabilities.'; + } + + const response = await ai.generate({ + prompt: task, + tools: tools, + }); + + return response.text; + + } catch (error) { + console.error('MCP operation failed:', error); + + // Fallback to basic generation without MCP tools + const fallbackResponse = await ai.generate({ + prompt: `${task} (Note: External tools unavailable)`, + }); + + return fallbackResponse.text; + } finally { + // Clean up resources + await mcpHost.close(); + } + } + ); + ``` + + + ```go + robustMcpFlow := genkit.DefineFlow(g, "robustMcpFlow", + func(ctx context.Context, input struct{ Task string }) (string, error) { + // Graceful shutdown + defer func() { + if err := manager.Close(); err != nil { + log.Printf("Error closing MCP manager: %v", err) + } + }() + + // Try to get MCP tools + tools, err := manager.GetActiveTools(ctx, g) + if err != nil { + log.Printf("Failed to get MCP tools: %v", err) + // Fallback to basic generation + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt(input.Task + " (Note: External tools unavailable)"), + ) + if err != nil { + return "", err + } + return resp.Text(), nil + } + + // Use MCP tools + resp, err := genkit.Generate(ctx, g, + ai.WithPrompt(input.Task), + ai.WithTools(tools...), + ) + if err != nil { + return "", err + } + + return resp.Text(), nil + }) + ``` + + + ```python + @ai.flow() + async def robust_mcp_flow(task: str) -> str: + try: + # Wait for MCP connections + await mcp_plugin.ready() + + # Get available tools + tools = await mcp_plugin.get_active_tools() + + if not tools: + return "No MCP tools available, proceeding with basic capabilities." + + response = await ai.generate( + prompt=task, + tools=tools, + ) + + return response.text + + except Exception as error: + print(f"MCP operation failed: {error}") + + # Fallback to basic generation + fallback_response = await ai.generate( + prompt=f"{task} (Note: External tools unavailable)", + ) + + return fallback_response.text + + finally: + # Clean up connections + await mcp_plugin.close() + ``` + + + +## Best Practices + +### Security and Trust + +1. **Validate MCP server sources**: Only connect to trusted MCP servers +2. **Sanitize inputs**: Validate all data passed to MCP tools +3. **Limit permissions**: Run MCP servers with minimal required permissions +4. **Monitor resource usage**: Track memory and CPU usage of MCP processes +5. **Use secure transports**: Prefer HTTPS and authenticated connections + +### Performance Optimization + +1. **Connection pooling**: Reuse MCP connections when possible +2. **Lazy loading**: Connect to servers only when needed +3. **Timeout configuration**: Set appropriate timeouts for MCP operations +4. **Resource cleanup**: Always close connections and clean up resources +5. **Caching**: Cache frequently accessed MCP resources + +### Development and Testing + +1. **Use MCP Inspector**: Test your MCP servers with the official inspector tool +2. **Mock MCP servers**: Create mock servers for testing and development +3. **Error simulation**: Test error handling with unreliable connections +4. **Performance testing**: Measure the impact of MCP operations on your flows +5. **Documentation**: Document your MCP integrations and dependencies + +## Common MCP Servers + +### Official MCP Servers + +- **@modelcontextprotocol/server-filesystem**: File system operations +- **@modelcontextprotocol/server-memory**: Context and memory management +- **@modelcontextprotocol/server-sqlite**: SQLite database operations +- **@modelcontextprotocol/server-git**: Git repository operations + +### Community MCP Servers + +- **Weather APIs**: Real-time weather data +- **Database connectors**: PostgreSQL, MySQL, MongoDB +- **Cloud services**: AWS, GCP, Azure integrations +- **Development tools**: GitHub, Jira, Slack integrations + +## Next Steps + +- Learn about [tool calling](/unified-docs/tool-calling) to understand how MCP tools integrate with Genkit's tool system +- Explore [creating flows](/unified-docs/creating-flows) to build workflows that leverage MCP capabilities +- See the [MCP Server guide](/unified-docs/mcp-server) for creating your own MCP servers +- Check out the [official MCP documentation](https://modelcontextprotocol.io) for more details on the protocol +- Browse the [MCP server registry](https://github.com/modelcontextprotocol/servers) for available servers diff --git a/src/content/docs/unified-docs/tool-calling.mdx b/src/content/docs/unified-docs/tool-calling.mdx index 09cdb866..1c12804d 100644 --- a/src/content/docs/unified-docs/tool-calling.mdx +++ b/src/content/docs/unified-docs/tool-calling.mdx @@ -786,9 +786,21 @@ apply more complicated logic, you can handle tool calls explicitly: +## Extending Tool Capabilities with MCP + +The [Model Context Protocol (MCP)](/unified-docs/model-context-protocol) provides a powerful way to extend your tool-calling capabilities by connecting to external MCP servers. With MCP, you can: + +- **Access pre-built tools** from the MCP ecosystem without implementing them yourself +- **Connect to external services** like databases, APIs, and file systems +- **Share tools** between different AI applications +- **Build extensible workflows** that leverage community-maintained tools + +MCP tools work seamlessly with Genkit's tool-calling system, allowing you to mix custom tools with external MCP tools in the same generation request. + ## Next steps -- Learn about [interrupts](/docs/interrupts) to pause tool execution for user interaction -- Explore [retrieval-augmented generation (RAG)](/unified-docs/rag) for handling large amounts of contextual information -- See [multi-agent systems](/docs/multi-agent) for coordinating multiple AI agents with tools -- Check out the [tool calling example](https://github.com/firebase/genkit/tree/main/js/testapps/tool-calling) for a complete implementation +- Learn about [Model Context Protocol (MCP)](/unified-docs/model-context-protocol) to extend your tool capabilities with external servers +- Explore [interrupts](/docs/interrupts) to pause tool execution for user interaction +- See [retrieval-augmented generation (RAG)](/unified-docs/rag) for handling large amounts of contextual information +- Check out [multi-agent systems](/docs/multi-agent) for coordinating multiple AI agents with tools +- Browse the [tool calling example](https://github.com/firebase/genkit/tree/main/js/testapps/tool-calling) for a complete implementation diff --git a/src/content/docs/unified-docs/vector-databases/cloud-firestore.mdx b/src/content/docs/unified-docs/vector-databases/cloud-firestore.mdx new file mode 100644 index 00000000..d2169b3a --- /dev/null +++ b/src/content/docs/unified-docs/vector-databases/cloud-firestore.mdx @@ -0,0 +1,980 @@ +--- +title: Cloud Firestore Vector Search +description: Learn how to use Google Cloud Firestore as a vector database for RAG applications across JavaScript, Go, and Python with Genkit. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +Cloud Firestore provides native vector search capabilities, making it an excellent choice for RAG (Retrieval-Augmented Generation) applications. Firestore's vector search feature allows you to store and query high-dimensional vector embeddings alongside your document data, providing fast and scalable similarity search. + +## Key Features + +- **Native vector search**: Built-in support for high-dimensional vector operations +- **Scalable**: Automatically scales with your application needs +- **Real-time**: Supports real-time updates and queries +- **Integrated**: Part of the Firebase/Google Cloud ecosystem +- **Multi-modal**: Store vectors alongside structured document data + +## Installation and Setup + + + + Install the Firebase plugin: + + ```bash + npm install @genkit-ai/firebase firebase-admin + ``` + + Initialize Firebase Admin SDK: + + ```ts + import { initializeApp } from 'firebase-admin/app'; + import { getFirestore } from 'firebase-admin/firestore'; + + const app = initializeApp({ + projectId: 'your-firebase-project-id', + }); + + const firestore = getFirestore(app); + ``` + + + Install the Firebase plugin: + + ```bash + go get github.com/firebase/genkit/go/plugins/firebase + ``` + + Import and configure: + + ```go + import ( + "github.com/firebase/genkit/go/plugins/firebase" + firebasev4 "firebase.google.com/go/v4" + ) + + // Initialize Firebase + firebasePlugin := &firebase.Firebase{ + ProjectId: "your-firebase-project-id", + } + ``` + + + Install the Firebase plugin: + + ```bash + pip install genkit-plugin-firebase google-cloud-firestore + ``` + + Initialize Firestore client: + + ```python + from google.cloud import firestore + from genkit.plugins.firebase.firestore import FirestoreVectorStore + + # Initialize Firestore client + firestore_client = firestore.Client(project="your-firebase-project-id") + ``` + + + +## Prerequisites + +### Firebase Project Setup + +1. **Create a Firebase project** at [Firebase Console](https://console.firebase.google.com/) +2. **Enable Firestore** in your project: + - Go to Firestore Database in the Firebase console + - Click "Create database" + - Choose your security rules and location +3. **Upgrade to Blaze plan** (required for vector search features) + +### Authentication + + + + Set up authentication using one of these methods: + + **Option 1: Service Account Key** + ```bash + export GOOGLE_APPLICATION_CREDENTIALS="path/to/serviceAccountKey.json" + ``` + + **Option 2: Service Account Credentials (Environment Variable)** + ```bash + export GCLOUD_SERVICE_ACCOUNT_CREDS='{"type":"service_account",...}' + ``` + + **Option 3: Application Default Credentials** + ```bash + gcloud auth application-default login + ``` + + If using `GCLOUD_SERVICE_ACCOUNT_CREDS`, configure Firestore explicitly: + + ```ts + import { initializeApp } from 'firebase-admin/app'; + import { getFirestore } from 'firebase-admin/firestore'; + + const app = initializeApp(); + let firestore = getFirestore(app); + + if (process.env.GCLOUD_SERVICE_ACCOUNT_CREDS) { + const serviceAccountCreds = JSON.parse(process.env.GCLOUD_SERVICE_ACCOUNT_CREDS); + const authOptions = { credentials: serviceAccountCreds }; + firestore.settings(authOptions); + } + ``` + + + **Local Development:** + ```bash + firebase login + firebase use your-project-id + ``` + + **Production:** + ```bash + export GOOGLE_APPLICATION_CREDENTIALS="path/to/serviceAccountKey.json" + ``` + + **Custom Firebase App:** + ```go + import "google.golang.org/api/option" + + app, err := firebasev4.NewApp(ctx, &firebasev4.Config{ + ProjectID: "your-project-id", + }, option.WithCredentialsFile("path/to/serviceAccountKey.json")) + + firebasePlugin := &firebase.Firebase{ + App: app, + } + ``` + + + **Local Development:** + ```bash + gcloud auth application-default login + gcloud config set project your-project-id + ``` + + **Production:** + ```bash + export GOOGLE_APPLICATION_CREDENTIALS="path/to/serviceAccountKey.json" + ``` + + **Custom Configuration:** + ```python + from google.cloud import firestore + from google.oauth2 import service_account + + credentials = service_account.Credentials.from_service_account_file( + "path/to/serviceAccountKey.json" + ) + firestore_client = firestore.Client( + project="your-project-id", + credentials=credentials + ) + ``` + + + +## Basic Usage + +### Defining a Firestore Retriever + + + + ```ts + import { genkit } from 'genkit'; + import { googleAI } from '@genkit-ai/googleai'; + import { defineFirestoreRetriever } from '@genkit-ai/firebase'; + import { initializeApp } from 'firebase-admin/app'; + import { getFirestore } from 'firebase-admin/firestore'; + + const ai = genkit({ + plugins: [googleAI()], + }); + + const app = initializeApp(); + const firestore = getFirestore(app); + + const retriever = defineFirestoreRetriever(ai, { + name: 'documentRetriever', + firestore, + collection: 'documents', + contentField: 'text', + vectorField: 'embedding', + embedder: googleAI.embedder('text-embedding-004'), + distanceMeasure: 'COSINE', // Options: 'COSINE', 'EUCLIDEAN', 'DOT_PRODUCT' + }); + ``` + + + ```go + import ( + "github.com/firebase/genkit/go/genkit" + "github.com/firebase/genkit/go/plugins/firebase" + "github.com/firebase/genkit/go/plugins/googlegenai" + ) + + func main() { + ctx := context.Background() + + // Initialize plugins + firebasePlugin := &firebase.Firebase{ + ProjectId: "your-firebase-project-id", + } + + googleAIPlugin := &googlegenai.GoogleAI{ + APIKey: "your-api-key", + } + + g, err := genkit.Init(ctx, genkit.WithPlugins(firebasePlugin, googleAIPlugin)) + if err != nil { + log.Fatal(err) + } + + // Define retriever + retriever, err := firebase.DefineRetriever(ctx, g, firebase.RetrieverOptions{ + Name: "documentRetriever", + Collection: "documents", + VectorField: "embedding", + EmbedderName: "googleai/text-embedding-004", + TopK: 10, + }) + if err != nil { + log.Fatal(err) + } + } + ``` + + + ```python + from genkit.ai import Genkit + from genkit.plugins.firebase.firestore import FirestoreVectorStore + from genkit.plugins.google_genai import GoogleGenAI + from google.cloud import firestore + + # Initialize Firestore client + firestore_client = firestore.Client() + + ai = Genkit( + plugins=[ + GoogleGenAI(), + FirestoreVectorStore( + name='documentRetriever', + collection='documents', + vector_field='embedding', + content_field='text', + embedder='googleai/text-embedding-004', + firestore_client=firestore_client, + ), + ] + ) + ``` + + + +### Retrieving Documents + + + + ```ts + // Basic retrieval + const docs = await ai.retrieve({ + retriever, + query: 'What is machine learning?', + options: { + limit: 5, + }, + }); + + // Retrieval with filters + const filteredDocs = await ai.retrieve({ + retriever, + query: 'artificial intelligence concepts', + options: { + limit: 10, + where: { + category: 'technology', + status: 'published' + }, + collection: 'alternativeCollection', // Override default collection + }, + }); + + console.log('Retrieved documents:', docs); + ``` + + + ```go + // Basic retrieval + results, err := ai.Retrieve(ctx, retriever, ai.WithDocs("What is machine learning?")) + if err != nil { + log.Fatal(err) + } + + // Use retrieved documents in generation + var contextDocs []string + for _, doc := range results.Documents { + contextDocs = append(contextDocs, doc.Content[0].Text) + } + + context := strings.Join(contextDocs, "\n\n") + resp, err := genkit.Generate(ctx, g, + ai.WithModel(googleAIPlugin.Model(g, "gemini-1.5-flash")), + ai.WithPrompt(fmt.Sprintf("Context: %s\n\nQuestion: %s", + context, "What is machine learning?")), + ) + if err != nil { + log.Fatal(err) + } + + fmt.Printf("Answer: %s\n", resp.Text()) + ``` + + + ```python + from genkit.ai import Document + + async def retrieve_documents(): + # Create query document + query_doc = Document.from_text("What is machine learning?") + + # Retrieve documents + results = await ai.retrieve( + query=query_doc, + retriever='documentRetriever', + ) + + return results + + # Use in RAG workflow + async def rag_query(question: str): + query_doc = Document.from_text(question) + + # Retrieve relevant documents + retrieved_docs = await ai.retrieve( + query=query_doc, + retriever='documentRetriever', + ) + + # Prepare context + context = "\n\n".join([doc.content[0].text for doc in retrieved_docs]) + + # Generate answer + response = await ai.generate( + model="googleai/gemini-1.5-flash", + prompt=f"Context: {context}\n\nQuestion: {question}\n\nAnswer:", + ) + + return response.text + + # Example usage + # import asyncio + # answer = asyncio.run(rag_query("What is machine learning?")) + # print(answer) + ``` + + + +## Data Indexing + +### Document Structure + +Your Firestore documents should follow this structure: + +```json +{ + "text": "Your document content here...", + "embedding": [0.1, -0.2, 0.3, ...], + "metadata": { + "title": "Document Title", + "category": "Technology", + "author": "Author Name", + "timestamp": "2024-01-15T10:30:00Z" + } +} +``` + +### Populating the Index + + + + ```ts + import { chunk } from 'llm-chunk'; + import { FieldValue } from 'firebase-admin/firestore'; + import pdf from 'pdf-parse'; + import { readFile } from 'fs/promises'; + + const indexConfig = { + collection: 'documents', + contentField: 'text', + vectorField: 'embedding', + embedder: googleAI.embedder('text-embedding-004'), + }; + + export async function indexDocuments(filePath: string) { + // Extract text from PDF + const pdfFile = await readFile(filePath); + const data = await pdf(pdfFile); + const pdfText = data.text; + + // Chunk the text + const chunks = await chunk(pdfText); + + // Index each chunk + for (const text of chunks) { + const embedding = (await ai.embed({ + embedder: indexConfig.embedder, + content: text, + }))[0].embedding; + + await firestore.collection(indexConfig.collection).add({ + [indexConfig.vectorField]: FieldValue.vector(embedding), + [indexConfig.contentField]: text, + metadata: { + source: filePath, + timestamp: new Date().toISOString(), + }, + }); + } + } + + // Batch indexing for better performance + export async function batchIndexDocuments(documents: string[]) { + const batch = firestore.batch(); + + for (const text of documents) { + const embedding = (await ai.embed({ + embedder: indexConfig.embedder, + content: text, + }))[0].embedding; + + const docRef = firestore.collection(indexConfig.collection).doc(); + batch.set(docRef, { + [indexConfig.vectorField]: FieldValue.vector(embedding), + [indexConfig.contentField]: text, + metadata: { + timestamp: new Date().toISOString(), + }, + }); + } + + await batch.commit(); + } + ``` + + + ```go + import ( + "context" + "fmt" + "log" + "time" + "github.com/firebase/genkit/go/ai" + firebasev4 "firebase.google.com/go/v4" + ) + + func indexDocuments(ctx context.Context, g *genkit.Genkit, documents []string) error { + // Get Firestore client + app, err := firebasev4.NewApp(ctx, &firebasev4.Config{ + ProjectID: "your-project-id", + }) + if err != nil { + return err + } + + client, err := app.Firestore(ctx) + if err != nil { + return err + } + defer client.Close() + + // Get embedder + embedder := googleAIPlugin.Embedder(g, "text-embedding-004") + + for i, text := range documents { + // Generate embedding + embeddingResp, err := ai.Embed(ctx, embedder, ai.WithDocs(text)) + if err != nil { + return fmt.Errorf("failed to generate embedding: %w", err) + } + + // Store in Firestore + docData := map[string]interface{}{ + "text": text, + "embedding": embeddingResp.Embeddings[0].Embedding, + "metadata": map[string]interface{}{ + "index": i, + "timestamp": time.Now().Format(time.RFC3339), + }, + } + + _, err = client.Collection("documents").Doc(fmt.Sprintf("doc-%d", i)).Set(ctx, docData) + if err != nil { + return fmt.Errorf("failed to store document: %w", err) + } + } + + return nil + } + + // Batch indexing + func batchIndexDocuments(ctx context.Context, g *genkit.Genkit, documents []string) error { + app, err := firebasev4.NewApp(ctx, &firebasev4.Config{ + ProjectID: "your-project-id", + }) + if err != nil { + return err + } + + client, err := app.Firestore(ctx) + if err != nil { + return err + } + defer client.Close() + + batch := client.Batch() + embedder := googleAIPlugin.Embedder(g, "text-embedding-004") + + for i, text := range documents { + embeddingResp, err := ai.Embed(ctx, embedder, ai.WithDocs(text)) + if err != nil { + return err + } + + docRef := client.Collection("documents").Doc(fmt.Sprintf("doc-%d", i)) + batch.Set(docRef, map[string]interface{}{ + "text": text, + "embedding": embeddingResp.Embeddings[0].Embedding, + "metadata": map[string]interface{}{ + "index": i, + "timestamp": time.Now().Format(time.RFC3339), + }, + }) + } + + _, err = batch.Commit(ctx) + return err + } + ``` + + + ```python + from genkit.ai import Document + from genkit.types import TextPart + from google.cloud import firestore + import asyncio + + async def index_documents(ai: Genkit, documents: list[str], collection_name: str): + """Index documents in Firestore with embeddings.""" + + # Prepare documents for embedding + genkit_documents = [Document(content=[TextPart(text=doc)]) for doc in documents] + + # Generate embeddings + embed_response = await ai.embed( + embedder='googleai/text-embedding-004', + content=genkit_documents + ) + embeddings = [emb.embedding for emb in embed_response.embeddings] + + # Get Firestore client + firestore_client = firestore.Client() + + # Index each document + for i, document_text in enumerate(documents): + doc_id = f'doc-{i + 1}' + embedding = embeddings[i] + + doc_ref = firestore_client.collection(collection_name).document(doc_id) + doc_ref.set({ + 'text': document_text, + 'embedding': embedding, + 'metadata': { + 'index': i, + 'timestamp': firestore.SERVER_TIMESTAMP, + }, + }) + print(f"Indexed document {doc_id}") + + # Batch indexing for better performance + async def batch_index_documents(ai: Genkit, documents: list[str], collection_name: str): + """Batch index documents for better performance.""" + + genkit_documents = [Document(content=[TextPart(text=doc)]) for doc in documents] + embed_response = await ai.embed( + embedder='googleai/text-embedding-004', + content=genkit_documents + ) + embeddings = [emb.embedding for emb in embed_response.embeddings] + + firestore_client = firestore.Client() + batch = firestore_client.batch() + + for i, document_text in enumerate(documents): + doc_ref = firestore_client.collection(collection_name).document(f'doc-{i + 1}') + batch.set(doc_ref, { + 'text': document_text, + 'embedding': embeddings[i], + 'metadata': { + 'index': i, + 'timestamp': firestore.SERVER_TIMESTAMP, + }, + }) + + # Commit batch + batch.commit() + print(f"Batch indexed {len(documents)} documents") + + # Example usage + # documents = [ + # "Machine learning is a subset of artificial intelligence...", + # "Deep learning uses neural networks with multiple layers...", + # "Natural language processing enables computers to understand text...", + # ] + # asyncio.run(index_documents(ai, documents, 'documents')) + ``` + + + +## Creating Vector Indexes + +Firestore requires vector indexes for efficient similarity search. Create the index using the `gcloud` CLI: + + + + ```bash + # For text-embedding-004 (768 dimensions) + gcloud alpha firestore indexes composite create \ + --project=your-firebase-project-id \ + --collection-group=documents \ + --query-scope=COLLECTION \ + --field-config=vector-config='{"dimension":"768","flat": "{}"}',field-path=embedding + + # For other embedding models, adjust the dimension: + # text-embedding-3-small: 1536 dimensions + # text-embedding-3-large: 3072 dimensions + ``` + + You can also let Firestore suggest the command by making a query first: + + ```ts + try { + const docs = await ai.retrieve({ + retriever, + query: 'test query', + }); + } catch (error) { + // Firestore will throw an error with the exact command needed + console.error('Index required:', error.message); + } + ``` + + + ```bash + # Create vector index for your collection + gcloud alpha firestore indexes composite create \ + --project=your-firebase-project-id \ + --collection-group=documents \ + --query-scope=COLLECTION \ + --field-config=vector-config='{"dimension":"768","flat": "{}"}',field-path=embedding + ``` + + Check index creation status: + + ```bash + gcloud firestore indexes composite list --project=your-firebase-project-id + ``` + + + ```bash + # Create the vector index + gcloud firestore indexes composite create \ + --project=your-firebase-project-id \ + --collection-group=documents \ + --query-scope=COLLECTION \ + --field-config=vector-config='{"dimension":"768","flat": "{}"}',field-path=embedding + ``` + + Common embedding dimensions: + - **text-embedding-004**: 768 dimensions + - **text-embedding-3-small**: 1536 dimensions + - **text-embedding-3-large**: 3072 dimensions + + You can also trigger index creation by making a query: + + ```python + try: + query_doc = Document.from_text("test query") + results = await ai.retrieve( + query=query_doc, + retriever='documentRetriever', + ) + except Exception as error: + # Error message will contain the exact gcloud command needed + print(f"Index required: {error}") + ``` + + + +## Advanced Configuration + +### Retrieval Options + + + + ```ts + const retriever = defineFirestoreRetriever(ai, { + name: 'advancedRetriever', + firestore, + collection: 'documents', + contentField: 'text', + vectorField: 'embedding', + embedder: googleAI.embedder('text-embedding-004'), + distanceMeasure: 'COSINE', // 'COSINE', 'EUCLIDEAN', 'DOT_PRODUCT' + }); + + // Advanced retrieval with options + const docs = await ai.retrieve({ + retriever, + query: 'machine learning algorithms', + options: { + limit: 10, // Maximum number of results + where: { // Filter conditions + category: 'technology', + status: 'published', + 'metadata.author': 'John Doe' + }, + collection: 'tech_docs', // Override default collection + }, + }); + ``` + + + ```go + // Advanced retriever configuration + retriever, err := firebase.DefineRetriever(ctx, g, firebase.RetrieverOptions{ + Name: "advancedRetriever", + Collection: "documents", + VectorField: "embedding", + EmbedderName: "googleai/text-embedding-004", + TopK: 10, + // Additional configuration options + }) + + // Use with filtering (implementation depends on Firebase Go SDK capabilities) + results, err := ai.Retrieve(ctx, retriever, ai.WithDocs("machine learning")) + if err != nil { + log.Fatal(err) + } + + // Process results + for _, doc := range results.Documents { + fmt.Printf("Document: %s\n", doc.Content[0].Text) + } + ``` + + + ```python + # Advanced configuration + ai = Genkit( + plugins=[ + GoogleGenAI(), + FirestoreVectorStore( + name='advancedRetriever', + collection='documents', + vector_field='embedding', + content_field='text', + embedder='googleai/text-embedding-004', + firestore_client=firestore_client, + # Additional configuration options can be added here + ), + ] + ) + + # Advanced retrieval with custom logic + async def advanced_retrieve(query: str, filters: dict = None): + query_doc = Document.from_text(query) + + # Basic retrieval + results = await ai.retrieve( + query=query_doc, + retriever='advancedRetriever', + ) + + # Apply additional filtering if needed + if filters: + # Custom filtering logic here + pass + + return results + ``` + + + +### Multiple Collections + + + + ```ts + // Define multiple retrievers for different collections + const techRetriever = defineFirestoreRetriever(ai, { + name: 'techDocuments', + firestore, + collection: 'tech_docs', + contentField: 'content', + vectorField: 'embedding', + embedder: googleAI.embedder('text-embedding-004'), + }); + + const generalRetriever = defineFirestoreRetriever(ai, { + name: 'generalDocuments', + firestore, + collection: 'general_docs', + contentField: 'text', + vectorField: 'embedding', + embedder: googleAI.embedder('text-embedding-004'), + }); + + // Use different retrievers based on query type + async function smartRetrieve(query: string, domain: string) { + const retriever = domain === 'tech' ? techRetriever : generalRetriever; + + return await ai.retrieve({ + retriever, + query, + options: { limit: 5 }, + }); + } + ``` + + + ```go + // Define multiple retrievers + techRetriever, err := firebase.DefineRetriever(ctx, g, firebase.RetrieverOptions{ + Name: "techDocuments", + Collection: "tech_docs", + VectorField: "embedding", + EmbedderName: "googleai/text-embedding-004", + TopK: 5, + }) + + generalRetriever, err := firebase.DefineRetriever(ctx, g, firebase.RetrieverOptions{ + Name: "generalDocuments", + Collection: "general_docs", + VectorField: "embedding", + EmbedderName: "googleai/text-embedding-004", + TopK: 10, + }) + + // Smart retrieval function + func smartRetrieve(ctx context.Context, query, domain string) (*ai.RetrieveResponse, error) { + var retriever ai.Retriever + + switch domain { + case "tech": + retriever = techRetriever + default: + retriever = generalRetriever + } + + return ai.Retrieve(ctx, retriever, ai.WithDocs(query)) + } + ``` + + + ```python + # Define multiple retrievers + ai = Genkit( + plugins=[ + GoogleGenAI(), + FirestoreVectorStore( + name='techDocuments', + collection='tech_docs', + vector_field='embedding', + content_field='content', + embedder='googleai/text-embedding-004', + firestore_client=firestore_client, + ), + FirestoreVectorStore( + name='generalDocuments', + collection='general_docs', + vector_field='embedding', + content_field='text', + embedder='googleai/text-embedding-004', + firestore_client=firestore_client, + ), + ] + ) + + async def smart_retrieve(query: str, domain: str = 'general'): + """Retrieve from different collections based on domain.""" + retriever_name = 'techDocuments' if domain == 'tech' else 'generalDocuments' + + query_doc = Document.from_text(query) + return await ai.retrieve( + query=query_doc, + retriever=retriever_name, + ) + ``` + + + +## Best Practices + +### Performance Optimization + +1. **Batch Operations**: Use batch writes when indexing multiple documents +2. **Appropriate Chunking**: Split large documents into optimal chunk sizes (500-1000 tokens) +3. **Index Management**: Create indexes before querying to avoid errors +4. **Caching**: Implement caching for frequently accessed documents + +### Security + +1. **Firestore Rules**: Configure proper security rules for your collections: + ```javascript + // Example Firestore security rules + rules_version = '2'; + service cloud.firestore { + match /databases/{database}/documents { + match /documents/{document} { + allow read, write: if request.auth != null; + } + } + } + ``` + +2. **API Key Management**: Never expose API keys in client-side code +3. **Authentication**: Implement proper user authentication for sensitive data + +### Cost Management + +1. **Document Size**: Keep documents reasonably sized to minimize read costs +2. **Query Optimization**: Design efficient queries to reduce operation costs +3. **Storage Management**: Regularly clean up unused documents and embeddings +4. **Index Strategy**: Only create necessary indexes to minimize storage costs + +## Troubleshooting + +### Common Issues + +1. **Index Not Found Error**: + ```bash + # Create the required index + gcloud alpha firestore indexes composite create \ + --project=your-project-id \ + --collection-group=your-collection \ + --query-scope=COLLECTION \ + --field-config=vector-config='{"dimension":"768","flat": "{}"}',field-path=embedding + ``` + +2. **Authentication Errors**: + - Ensure `GOOGLE_APPLICATION_CREDENTIALS` is set correctly + - Verify Firebase project permissions + - Check that the service account has Firestore access + +3. **Dimension Mismatch**: + - Ensure diff --git a/src/content/docs/unified-docs/vector-databases/dev-local-vectorstore.mdx b/src/content/docs/unified-docs/vector-databases/dev-local-vectorstore.mdx new file mode 100644 index 00000000..418d90eb --- /dev/null +++ b/src/content/docs/unified-docs/vector-databases/dev-local-vectorstore.mdx @@ -0,0 +1,728 @@ +--- +title: Dev Local Vector Store +description: Learn how to use the Dev Local Vector Store for local development and testing across JavaScript, Go, and Python with Genkit. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +The Dev Local Vector Store provides a simple, file-based vector database solution for local development and testing. It's designed to be lightweight and easy to set up, making it perfect for prototyping, testing, and development environments where you don't need the complexity of a full production vector database. + +## Key Features + +- **Zero setup**: No external dependencies or services required +- **File-based storage**: Stores vectors and metadata locally +- **Development focused**: Optimized for quick iteration and testing +- **Lightweight**: Minimal resource usage +- **Cross-platform**: Works on any system with file system access + +:::caution +The Dev Local Vector Store is intended for development and testing only. For production applications, use a dedicated vector database like Pinecone, ChromaDB, or Cloud Firestore. +::: + +## Installation and Setup + + + + JavaScript doesn't have a dedicated dev local vector store plugin, but you can create a simple in-memory or file-based solution for development: + + ```bash + npm install genkit + ``` + + Create a simple local vector store: + + ```ts + import { genkit } from 'genkit'; + import { googleAI } from '@genkit-ai/googleai'; + import fs from 'fs/promises'; + import path from 'path'; + + const ai = genkit({ + plugins: [googleAI()], + }); + + // Simple local vector store implementation + class DevLocalVectorStore { + private storePath: string; + private embedder: any; + + constructor(storePath: string, embedder: any) { + this.storePath = storePath; + this.embedder = embedder; + } + + async index(documents: string[]) { + const embeddings = await Promise.all( + documents.map(async (doc) => { + const result = await ai.embed({ + embedder: this.embedder, + content: doc, + }); + return { + text: doc, + embedding: result[0].embedding, + }; + }) + ); + + await fs.writeFile( + this.storePath, + JSON.stringify(embeddings, null, 2) + ); + } + + async search(query: string, limit: number = 5) { + const queryEmbedding = await ai.embed({ + embedder: this.embedder, + content: query, + }); + + const data = JSON.parse(await fs.readFile(this.storePath, 'utf-8')); + + // Simple cosine similarity + const results = data + .map((item: any) => ({ + ...item, + similarity: this.cosineSimilarity( + queryEmbedding[0].embedding, + item.embedding + ), + })) + .sort((a: any, b: any) => b.similarity - a.similarity) + .slice(0, limit); + + return results; + } + + private cosineSimilarity(a: number[], b: number[]): number { + const dotProduct = a.reduce((sum, val, i) => sum + val * b[i], 0); + const magnitudeA = Math.sqrt(a.reduce((sum, val) => sum + val * val, 0)); + const magnitudeB = Math.sqrt(b.reduce((sum, val) => sum + val * val, 0)); + return dotProduct / (magnitudeA * magnitudeB); + } + } + + // Usage + const localStore = new DevLocalVectorStore( + './dev-vector-store.json', + googleAI.embedder('text-embedding-004') + ); + ``` + + + Go doesn't have a dedicated dev local vector store plugin, but you can create a simple file-based solution: + + ```bash + go get github.com/firebase/genkit/go/genkit + ``` + + Create a simple local vector store: + + ```go + package main + + import ( + "context" + "encoding/json" + "fmt" + "math" + "os" + "sort" + + "github.com/firebase/genkit/go/ai" + "github.com/firebase/genkit/go/genkit" + ) + + type VectorDocument struct { + Text string `json:"text"` + Embedding []float64 `json:"embedding"` + } + + type DevLocalVectorStore struct { + storePath string + embedder ai.Embedder + } + + func NewDevLocalVectorStore(storePath string, embedder ai.Embedder) *DevLocalVectorStore { + return &DevLocalVectorStore{ + storePath: storePath, + embedder: embedder, + } + } + + func (store *DevLocalVectorStore) Index(ctx context.Context, documents []string) error { + var vectorDocs []VectorDocument + + for _, doc := range documents { + resp, err := ai.Embed(ctx, store.embedder, ai.WithDocs(doc)) + if err != nil { + return err + } + + vectorDocs = append(vectorDocs, VectorDocument{ + Text: doc, + Embedding: resp.Embeddings[0].Embedding, + }) + } + + data, err := json.MarshalIndent(vectorDocs, "", " ") + if err != nil { + return err + } + + return os.WriteFile(store.storePath, data, 0644) + } + + func (store *DevLocalVectorStore) Search(ctx context.Context, query string, limit int) ([]VectorDocument, error) { + // Get query embedding + queryResp, err := ai.Embed(ctx, store.embedder, ai.WithDocs(query)) + if err != nil { + return nil, err + } + queryEmbedding := queryResp.Embeddings[0].Embedding + + // Load stored documents + data, err := os.ReadFile(store.storePath) + if err != nil { + return nil, err + } + + var docs []VectorDocument + if err := json.Unmarshal(data, &docs); err != nil { + return nil, err + } + + // Calculate similarities and sort + type result struct { + doc VectorDocument + similarity float64 + } + + var results []result + for _, doc := range docs { + similarity := cosineSimilarity(queryEmbedding, doc.Embedding) + results = append(results, result{doc: doc, similarity: similarity}) + } + + sort.Slice(results, func(i, j int) bool { + return results[i].similarity > results[j].similarity + }) + + // Return top results + if limit > len(results) { + limit = len(results) + } + + var topDocs []VectorDocument + for i := 0; i < limit; i++ { + topDocs = append(topDocs, results[i].doc) + } + + return topDocs, nil + } + + func cosineSimilarity(a, b []float64) float64 { + var dotProduct, magnitudeA, magnitudeB float64 + + for i := range a { + dotProduct += a[i] * b[i] + magnitudeA += a[i] * a[i] + magnitudeB += b[i] * b[i] + } + + magnitudeA = math.Sqrt(magnitudeA) + magnitudeB = math.Sqrt(magnitudeB) + + if magnitudeA == 0 || magnitudeB == 0 { + return 0 + } + + return dotProduct / (magnitudeA * magnitudeB) + } + ``` + + + Install the Dev Local Vector Store plugin: + + ```bash + pip install genkit-plugin-dev-local-vectorstore + ``` + + Configure the plugin: + + ```python + from genkit.ai import Genkit + from genkit.plugins.dev_local_vectorstore import DevLocalVectorStore + from genkit.plugins.google_genai import GoogleGenAI + + ai = Genkit( + plugins=[ + GoogleGenAI(), + DevLocalVectorStore( + name='my_vectorstore', + embedder='googleai/text-embedding-004', + ), + ], + ) + ``` + + + +## Basic Usage + +### Indexing Documents + + + + ```ts + // Index documents in the local store + const documents = [ + 'Machine learning is a subset of artificial intelligence.', + 'Deep learning uses neural networks with multiple layers.', + 'Natural language processing enables computers to understand text.', + 'Computer vision allows machines to interpret visual information.', + ]; + + await localStore.index(documents); + console.log('Documents indexed successfully'); + ``` + + + ```go + // Initialize the local store + embedder := googleAIPlugin.Embedder(g, "text-embedding-004") + localStore := NewDevLocalVectorStore("./dev-vector-store.json", embedder) + + // Index documents + documents := []string{ + "Machine learning is a subset of artificial intelligence.", + "Deep learning uses neural networks with multiple layers.", + "Natural language processing enables computers to understand text.", + "Computer vision allows machines to interpret visual information.", + } + + err := localStore.Index(ctx, documents) + if err != nil { + log.Fatal(err) + } + fmt.Println("Documents indexed successfully") + ``` + + + ```python + from genkit.types import Document + + # Prepare documents + data_list = [ + 'Machine learning is a subset of artificial intelligence.', + 'Deep learning uses neural networks with multiple layers.', + 'Natural language processing enables computers to understand text.', + 'Computer vision allows machines to interpret visual information.', + ] + + # Convert to Genkit documents + genkit_docs = [Document.from_text(text=item) for item in data_list] + + # Index documents + await DevLocalVectorStore.index('my_vectorstore', genkit_docs) + print("Documents indexed successfully") + ``` + + + +### Retrieving Documents + + + + ```ts + // Search for similar documents + const query = 'What is artificial intelligence?'; + const results = await localStore.search(query, 3); + + console.log('Search results:'); + results.forEach((result, index) => { + console.log(`${index + 1}. ${result.text} (similarity: ${result.similarity.toFixed(3)})`); + }); + + // Use in RAG workflow + async function ragQuery(question: string) { + const retrievedDocs = await localStore.search(question, 3); + const context = retrievedDocs.map(doc => doc.text).join('\n\n'); + + const response = await ai.generate({ + model: googleAI.model('gemini-1.5-flash'), + prompt: `Context: ${context}\n\nQuestion: ${question}\n\nAnswer:`, + }); + + return response.text; + } + + const answer = await ragQuery('What is machine learning?'); + console.log('Answer:', answer); + ``` + + + ```go + // Search for similar documents + query := "What is artificial intelligence?" + results, err := localStore.Search(ctx, query, 3) + if err != nil { + log.Fatal(err) + } + + fmt.Println("Search results:") + for i, result := range results { + fmt.Printf("%d. %s\n", i+1, result.Text) + } + + // Use in RAG workflow + func ragQuery(ctx context.Context, question string) (string, error) { + retrievedDocs, err := localStore.Search(ctx, question, 3) + if err != nil { + return "", err + } + + var contextParts []string + for _, doc := range retrievedDocs { + contextParts = append(contextParts, doc.Text) + } + context := strings.Join(contextParts, "\n\n") + + resp, err := genkit.Generate(ctx, g, + ai.WithModel(googleAIPlugin.Model(g, "gemini-1.5-flash")), + ai.WithPrompt(fmt.Sprintf("Context: %s\n\nQuestion: %s\n\nAnswer:", context, question)), + ) + if err != nil { + return "", err + } + + return resp.Text(), nil + } + + answer, err := ragQuery(ctx, "What is machine learning?") + if err != nil { + log.Fatal(err) + } + fmt.Printf("Answer: %s\n", answer) + ``` + + + ```python + from genkit.types import Document + + # Search for similar documents + async def search_documents(query: str): + query_doc = Document.from_text(query) + + results = await ai.retrieve( + query=query_doc, + retriever='my_vectorstore', + ) + + return results + + # Use in RAG workflow + async def rag_query(question: str): + query_doc = Document.from_text(question) + + # Retrieve relevant documents + retrieved_docs = await ai.retrieve( + query=query_doc, + retriever='my_vectorstore', + ) + + # Prepare context + context = "\n\n".join([doc.content[0].text for doc in retrieved_docs]) + + # Generate answer + response = await ai.generate( + model="googleai/gemini-1.5-flash", + prompt=f"Context: {context}\n\nQuestion: {question}\n\nAnswer:", + ) + + return response.text + + # Example usage + results = await search_documents("What is artificial intelligence?") + print("Search results:") + for i, doc in enumerate(results, 1): + print(f"{i}. {doc.content[0].text}") + + answer = await rag_query("What is machine learning?") + print(f"Answer: {answer}") + ``` + + + +## Advanced Configuration + +### Custom Storage Location + + + + ```ts + // Custom storage path + const customStore = new DevLocalVectorStore( + './data/custom-vector-store.json', + googleAI.embedder('text-embedding-004') + ); + + // Multiple stores for different domains + const techStore = new DevLocalVectorStore( + './data/tech-docs.json', + googleAI.embedder('text-embedding-004') + ); + + const generalStore = new DevLocalVectorStore( + './data/general-docs.json', + googleAI.embedder('text-embedding-004') + ); + ``` + + + ```go + // Custom storage paths + techStore := NewDevLocalVectorStore("./data/tech-docs.json", embedder) + generalStore := NewDevLocalVectorStore("./data/general-docs.json", embedder) + + // Index different types of content + techDocs := []string{ + "API documentation for REST endpoints", + "Database schema design principles", + "Microservices architecture patterns", + } + + generalDocs := []string{ + "Company policies and procedures", + "Meeting notes and summaries", + "Project planning documents", + } + + err := techStore.Index(ctx, techDocs) + if err != nil { + log.Fatal(err) + } + + err = generalStore.Index(ctx, generalDocs) + if err != nil { + log.Fatal(err) + } + ``` + + + ```python + # Multiple vector stores for different domains + ai = Genkit( + plugins=[ + GoogleGenAI(), + DevLocalVectorStore( + name='tech_docs', + embedder='googleai/text-embedding-004', + ), + DevLocalVectorStore( + name='general_docs', + embedder='googleai/text-embedding-004', + ), + ], + ) + + # Index different types of content + tech_docs = [ + Document.from_text("API documentation for REST endpoints"), + Document.from_text("Database schema design principles"), + Document.from_text("Microservices architecture patterns"), + ] + + general_docs = [ + Document.from_text("Company policies and procedures"), + Document.from_text("Meeting notes and summaries"), + Document.from_text("Project planning documents"), + ] + + await DevLocalVectorStore.index('tech_docs', tech_docs) + await DevLocalVectorStore.index('general_docs', general_docs) + ``` + + + +### Batch Operations + + + + ```ts + // Batch indexing for better performance + async function batchIndex(documents: string[], batchSize: number = 10) { + for (let i = 0; i < documents.length; i += batchSize) { + const batch = documents.slice(i, i + batchSize); + await localStore.index(batch); + console.log(`Indexed batch ${Math.floor(i / batchSize) + 1}`); + } + } + + // Large document set + const largeDocumentSet = [ + // ... hundreds of documents + ]; + + await batchIndex(largeDocumentSet, 20); + ``` + + + ```go + // Batch indexing function + func batchIndex(ctx context.Context, store *DevLocalVectorStore, documents []string, batchSize int) error { + for i := 0; i < len(documents); i += batchSize { + end := i + batchSize + if end > len(documents) { + end = len(documents) + } + + batch := documents[i:end] + err := store.Index(ctx, batch) + if err != nil { + return err + } + + fmt.Printf("Indexed batch %d\n", (i/batchSize)+1) + } + return nil + } + + // Usage + largeDocumentSet := []string{ + // ... hundreds of documents + } + + err := batchIndex(ctx, localStore, largeDocumentSet, 20) + if err != nil { + log.Fatal(err) + } + ``` + + + ```python + # Batch indexing for large document sets + async def batch_index(store_name: str, documents: list[str], batch_size: int = 20): + for i in range(0, len(documents), batch_size): + batch = documents[i:i + batch_size] + genkit_docs = [Document.from_text(text=doc) for doc in batch] + + await DevLocalVectorStore.index(store_name, genkit_docs) + print(f"Indexed batch {(i // batch_size) + 1}") + + # Large document set + large_document_set = [ + # ... hundreds of documents + ] + + await batch_index('my_vectorstore', large_document_set, 20) + ``` + + + +## Best Practices + +### Development Workflow + +1. **Start Simple**: Use the dev local vector store for initial prototyping +2. **Test Locally**: Validate your RAG pipeline before moving to production +3. **Iterate Quickly**: Make changes and test without external dependencies +4. **Document Structure**: Establish consistent document formats early + +### Performance Considerations + +1. **Document Size**: Keep documents reasonably sized (500-1000 tokens) +2. **Index Size**: Monitor file size for large document sets +3. **Search Limits**: Use appropriate limits for search results +4. **Batch Processing**: Use batch operations for large datasets + +### Migration to Production + + + + ```ts + // Environment-based vector store selection + const isProduction = process.env.NODE_ENV === 'production'; + + const vectorStore = isProduction + ? new PineconeVectorStore({ + apiKey: process.env.PINECONE_API_KEY, + indexName: 'production-index', + }) + : new DevLocalVectorStore( + './dev-vector-store.json', + googleAI.embedder('text-embedding-004') + ); + + // Same interface for both stores + await vectorStore.index(documents); + const results = await vectorStore.search(query, 5); + ``` + + + ```go + // Environment-based store selection + var store VectorStore + + if os.Getenv("ENVIRONMENT") == "production" { + store = NewPineconeVectorStore(os.Getenv("PINECONE_API_KEY"), "production-index") + } else { + store = NewDevLocalVectorStore("./dev-vector-store.json", embedder) + } + + // Same interface for both stores + err := store.Index(ctx, documents) + results, err := store.Search(ctx, query, 5) + ``` + + + ```python + import os + + # Environment-based configuration + if os.getenv("ENVIRONMENT") == "production": + ai = Genkit( + plugins=[ + GoogleGenAI(), + PineconeVectorStore( + name='production_store', + api_key=os.getenv("PINECONE_API_KEY"), + index_name='production-index', + embedder='googleai/text-embedding-004', + ), + ], + ) + store_name = 'production_store' + else: + ai = Genkit( + plugins=[ + GoogleGenAI(), + DevLocalVectorStore( + name='dev_store', + embedder='googleai/text-embedding-004', + ), + ], + ) + store_name = 'dev_store' + + # Same interface for both environments + query_doc = Document.from_text("search query") + results = await ai.retrieve(query=query_doc, retriever=store_name) + ``` + + + +## Limitations + +1. **Not for Production**: Designed for development and testing only +2. **No Persistence**: Data is stored in local files +3. **Single Machine**: Cannot be shared across multiple instances +4. **Limited Scalability**: Performance degrades with very large datasets +5. **No Advanced Features**: Lacks filtering, metadata search, and other advanced capabilities + +## Next Steps + +- Start with the Dev Local Vector Store for prototyping +- Learn about [RAG implementation](/unified-docs/rag) for building complete workflows +- Explore production vector databases like [Pinecone](/unified-docs/vector-databases/pinecone) or [ChromaDB](/unified-docs/vector-databases/chromadb) +- See [deployment guides](/unified-docs/deployment) for moving to production diff --git a/src/sidebar.ts b/src/sidebar.ts index 934396da..950b268f 100644 --- a/src/sidebar.ts +++ b/src/sidebar.ts @@ -13,16 +13,6 @@ const JS_SIDEBAR = [ label: "Summarize YouTube videos", slug: "docs/tutorials/tutorial-summarize-youtube-videos", }, - { - label: "Agentic barista Angular app", - link: "https://developers.google.com/solutions/learn/agentic-barista", - attrs: { - "data-external": true, - target: "_blank", - class: "external-icon", - rel: "noopener", - }, - }, ], }, { @@ -138,7 +128,6 @@ const JS_SIDEBAR = [ { label: "Writing plugins", items: [ - // NOTE: Deployment links were incorrectly placed here before, removed them. { label: "Overview", slug: "docs/plugin-authoring" }, { label: "Writing an Evaluator Plugin", @@ -146,16 +135,6 @@ const JS_SIDEBAR = [ }, ], }, - // { - // label: "Migration Guides", - // items: [ - // // Added 0.9->1.0 link to main Genkit section previously - // { - // label: "Migrate from 0.5 to 0.9", - // slug: "docs/migrating-from-0.5", - // }, - // ], - // }, { label: "Community", items: [{ label: "Connect with us", slug: "docs/feedback" }], @@ -171,7 +150,6 @@ const JS_SIDEBAR = [ target: "_blank", class: "external-icon", rel: "noopener", - // style: "font-weight: 600; font-size: var(--sl-text-base); color: var(--sl-color-white);", }, }, { label: "API stability channels", slug: "docs/api-stability" }, @@ -251,6 +229,7 @@ const GO_SIDEBAR = [ ], }, ]; + const PYTHON_SIDEBAR = [ { label: "Get started", slug: "python/docs/get-started" }, { label: "Deploy with Cloud Run", slug: "python/docs/cloud-run" }, @@ -314,6 +293,7 @@ const UNIFIED_SIDEBAR = [ { label: "Managing prompts with Dotprompt", slug: "unified-docs/dotprompt" }, { label: "Creating persistent chat sessions", slug: "unified-docs/chat-sessions" }, { label: "Tool calling", slug: "unified-docs/tool-calling" }, + { label: "Model Context Protocol (MCP)", slug: "unified-docs/model-context-protocol" }, { label: "Pause generation using interrupts", slug: "unified-docs/interrupts" }, { label: "Retrieval-augmented generation (RAG)", slug: "unified-docs/rag" }, { label: "Building multi-agent systems", slug: "unified-docs/multi-agent-systems" }, @@ -332,12 +312,12 @@ const UNIFIED_SIDEBAR = [ { label: "xAI (Grok)", slug: "unified-docs/plugins/xai" }, { label: "DeepSeek", slug: "unified-docs/plugins/deepseek" }, { label: "Ollama", slug: "unified-docs/plugins/ollama" }, - { label: "Model Context Protocol (MCP)", slug: "unified-docs/plugins/mcp" }, ], }, { label: "Vector Databases", items: [ + { label: "Dev Local Vector Store", slug: "unified-docs/vector-databases/dev-local-vectorstore" }, { label: "Pinecone", slug: "unified-docs/vector-databases/pinecone" }, { label: "ChromaDB", slug: "unified-docs/vector-databases/chromadb" }, { label: "pgvector", slug: "unified-docs/vector-databases/pgvector" }, @@ -345,6 +325,7 @@ const UNIFIED_SIDEBAR = [ { label: "Astra DB", slug: "unified-docs/vector-databases/astra-db" }, { label: "Neo4j", slug: "unified-docs/vector-databases/neo4j" }, { label: "Cloud SQL PostgreSQL", slug: "unified-docs/vector-databases/cloud-sql-postgresql" }, + { label: "Cloud Firestore", slug: "unified-docs/vector-databases/cloud-firestore" }, ], }, { @@ -354,6 +335,17 @@ const UNIFIED_SIDEBAR = [ { label: "Next.js", slug: "unified-docs/frameworks/nextjs" }, ], }, + { + label: "Deployment", + collapsed: true, + items: [ + { label: "Overview", slug: "unified-docs/deployment" }, + { label: "Firebase", slug: "unified-docs/deployment/firebase" }, + { label: "Cloud Run", slug: "unified-docs/deployment/cloud-run" }, + { label: "Any Platform", slug: "unified-docs/deployment/any-platform" }, + { label: "Authorization & Security", slug: "unified-docs/deployment/authorization" }, + ], + }, { label: "Writing Plugins", items: [ @@ -363,8 +355,11 @@ const UNIFIED_SIDEBAR = [ }, { label: "Advanced Topics", + collapsed: true, items: [ - { label: "Deployment Guide", slug: "unified-docs/deployment" }, + { label: "Observability and monitoring", slug: "unified-docs/observability-monitoring" }, + { label: "Error handling", slug: "unified-docs/error-handling" }, + { label: "Multi-agent systems", slug: "unified-docs/multi-agent-systems" }, ], }, ]; From 45c13b9c68189e8b6f509d1ff8f937392a1e8492 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cchrisraygill=E2=80=9D?= Date: Thu, 7 Aug 2025 21:46:30 -0400 Subject: [PATCH 5/9] Make Deployment sidebar section expanded by default --- src/sidebar.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/sidebar.ts b/src/sidebar.ts index 950b268f..d1a01cae 100644 --- a/src/sidebar.ts +++ b/src/sidebar.ts @@ -337,7 +337,6 @@ const UNIFIED_SIDEBAR = [ }, { label: "Deployment", - collapsed: true, items: [ { label: "Overview", slug: "unified-docs/deployment" }, { label: "Firebase", slug: "unified-docs/deployment/firebase" }, From 232a821b7c3d1d90cf99d704a83aad323f0d7b58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cchrisraygill=E2=80=9D?= Date: Fri, 8 Aug 2025 13:09:46 -0400 Subject: [PATCH 6/9] Restructure observability docs and remove broken links - Split observability-monitoring.mdx into dedicated sections - Add new observability docs: overview, authentication, advanced-configuration, troubleshooting - Remove broken links to non-existent framework docs (Gin, Flask, FastAPI) - Update sidebar navigation to reflect new structure --- .../unified-docs/deployment/cloud-run.mdx | 1 - .../docs/unified-docs/deployment/firebase.mdx | 1 - .../docs/unified-docs/frameworks/express.mdx | 6 +- .../docs/unified-docs/frameworks/nextjs.mdx | 6 +- .../unified-docs/observability-monitoring.mdx | 784 ++++++------ .../observability/advanced-configuration.mdx | 1106 +++++++++++++++++ .../observability/authentication.mdx | 442 +++++++ .../unified-docs/observability/overview.mdx | 242 ++++ .../observability/troubleshooting.mdx | 764 ++++++++++++ .../unified-docs/plugin-authoring/models.mdx | 6 +- .../plugin-authoring/overview.mdx | 6 +- src/sidebar.ts | 14 +- 12 files changed, 2981 insertions(+), 397 deletions(-) create mode 100644 src/content/docs/unified-docs/observability/advanced-configuration.mdx create mode 100644 src/content/docs/unified-docs/observability/authentication.mdx create mode 100644 src/content/docs/unified-docs/observability/overview.mdx create mode 100644 src/content/docs/unified-docs/observability/troubleshooting.mdx diff --git a/src/content/docs/unified-docs/deployment/cloud-run.mdx b/src/content/docs/unified-docs/deployment/cloud-run.mdx index a816b220..4feef2d3 100644 --- a/src/content/docs/unified-docs/deployment/cloud-run.mdx +++ b/src/content/docs/unified-docs/deployment/cloud-run.mdx @@ -580,5 +580,4 @@ gcloud run deploy genkit-app \ - Learn about [any platform deployment](/unified-docs/deployment/any-platform) for maximum flexibility - Explore [authorization patterns](/unified-docs/deployment/authorization) for advanced security -- See [client integration](/unified-docs/deployment/client-access) for building frontend applications - Check out [monitoring and observability](/unified-docs/observability-monitoring) for production insights diff --git a/src/content/docs/unified-docs/deployment/firebase.mdx b/src/content/docs/unified-docs/deployment/firebase.mdx index 2cc351ac..8021bb93 100644 --- a/src/content/docs/unified-docs/deployment/firebase.mdx +++ b/src/content/docs/unified-docs/deployment/firebase.mdx @@ -945,5 +945,4 @@ Firebase Cloud Functions provides a serverless platform for deploying Genkit flo - Learn about [Cloud Run deployment](/unified-docs/deployment/cloud-run) for more flexible hosting - Explore [authorization patterns](/unified-docs/deployment/authorization) for advanced security -- See [client integration](/unified-docs/deployment/client-access) for building frontend applications - Check out [monitoring and observability](/unified-docs/observability-monitoring) for production insights diff --git a/src/content/docs/unified-docs/frameworks/express.mdx b/src/content/docs/unified-docs/frameworks/express.mdx index 9de7a5ad..fd1c9d2a 100644 --- a/src/content/docs/unified-docs/frameworks/express.mdx +++ b/src/content/docs/unified-docs/frameworks/express.mdx @@ -12,8 +12,8 @@ The Express.js integration allows you to expose Genkit flows and actions as REST :::note[Framework Availability] Express.js integration is primarily available for JavaScript/Node.js. For other languages, see equivalent frameworks: -- **Go**: [Gin](/unified-docs/frameworks/gin) or standard `net/http` -- **Python**: [Flask](/unified-docs/frameworks/flask) or [FastAPI](/unified-docs/frameworks/fastapi) +- **Go**: Gin or standard `net/http` +- **Python**: Flask or FastAPI ::: ## Installation and Setup @@ -966,5 +966,3 @@ Express.js integration is primarily available for JavaScript/Node.js. For other - See [deployment guides](/unified-docs/deployment) for production deployment strategies - Check out other framework integrations: - [Next.js](/unified-docs/frameworks/nextjs) for React applications - - [Gin](/unified-docs/frameworks/gin) for Go applications - - [Flask](/unified-docs/frameworks/flask) for Python applications diff --git a/src/content/docs/unified-docs/frameworks/nextjs.mdx b/src/content/docs/unified-docs/frameworks/nextjs.mdx index 23f1e2b0..e596ee17 100644 --- a/src/content/docs/unified-docs/frameworks/nextjs.mdx +++ b/src/content/docs/unified-docs/frameworks/nextjs.mdx @@ -12,8 +12,8 @@ The Next.js integration provides a seamless way to build full-stack AI applicati :::note[Framework Availability] Next.js integration is primarily available for JavaScript/Node.js. For other languages, see equivalent frameworks: -- **Go**: [Gin](/unified-docs/frameworks/gin) with React frontend -- **Python**: [FastAPI](/unified-docs/frameworks/fastapi) with React frontend +- **Go**: Gin with React frontend +- **Python**: FastAPI with React frontend ::: ## Installation and Setup @@ -1139,5 +1139,3 @@ Next.js integration is primarily available for JavaScript/Node.js. For other lan - See [deployment guides](/unified-docs/deployment) for production deployment strategies - Check out other framework integrations: - [Express.js](/unified-docs/frameworks/express) for API-first applications - - [Gin](/unified-docs/frameworks/gin) for Go applications - - [Flask](/unified-docs/frameworks/flask) for Python applications diff --git a/src/content/docs/unified-docs/observability-monitoring.mdx b/src/content/docs/unified-docs/observability-monitoring.mdx index f4585667..880e8373 100644 --- a/src/content/docs/unified-docs/observability-monitoring.mdx +++ b/src/content/docs/unified-docs/observability-monitoring.mdx @@ -1,529 +1,565 @@ --- -title: Observability and monitoring -description: Learn about Genkit's observability features, including tracing, metrics collection, logging, and monitoring capabilities across JavaScript, Go, and Python. +title: Observability and Monitoring +description: Learn how to monitor and observe your Genkit AI workflows across JavaScript, Go, and Python, including local development tools, production monitoring, and OpenTelemetry integration. --- import { Tabs, TabItem } from '@astrojs/starlight/components'; -Genkit provides robust observability features to help you understand and debug your AI applications. These capabilities include tracing, metrics collection, logging, and monitoring, with different levels of support across languages. +Genkit provides comprehensive observability features to help you understand, debug, and optimize your AI workflows. Whether you're developing locally or running in production, Genkit offers the tools you need to monitor performance, trace execution, and troubleshoot issues. -## Availability and Approach +## Overview + +Genkit's observability stack includes: + +- **Local Development Tools**: Built-in tracing and debugging via the Developer UI +- **Production Monitoring**: Firebase Genkit Monitoring dashboard for deployed applications +- **OpenTelemetry Integration**: Export telemetry data to any observability platform +- **Centralized Logging**: Structured logging with automatic export capabilities +- **Metrics Collection**: Performance metrics, error rates, and usage statistics + +## Local Development Observability + +### Developer UI Tracing + +During development, Genkit automatically collects traces and provides detailed debugging capabilities through the Developer UI: - JavaScript provides comprehensive observability features including: - - Automatic OpenTelemetry tracing and metrics - - Built-in Developer UI for trace inspection - - Centralized logging system with configurable levels - - Firebase Genkit Monitoring integration - - Export to custom observability tools + The Developer UI is automatically available when you run: + + ```bash + npx genkit start + ``` + + Features include: + - **Step-by-step trace inspection**: See every step of your flow execution + - **Input/output logging**: Examine data flowing through each step + - **Performance metrics**: View latency and execution statistics + - **Error debugging**: Detailed error information and stack traces + - **Flow testing**: Run and test flows directly from the UI - Go provides core observability features including: - - Full OpenTelemetry instrumentation - - Developer UI trace inspection - - Google Cloud Operations Suite integration - - Trace store for development debugging - - Custom telemetry export capabilities + The trace store feature is enabled automatically in development environments: + + ```bash + genkit start + # or + genkit flow:run + ``` + + The Developer UI provides: + - **Flow execution traces**: Complete visibility into flow steps + - **Input/output inspection**: Debug data transformations + - **Performance analysis**: Identify bottlenecks and optimization opportunities + - **Interactive testing**: Test flows with different inputs - Python has basic observability features: - - Limited OpenTelemetry support - - Basic logging capabilities - - Framework-specific monitoring (e.g., Flask integration) - - Manual instrumentation may be required + Development observability is built into the Genkit runtime: + + ```bash + # Development server with tracing + python -m genkit start + ``` + + Available features: + - **Automatic trace collection**: No configuration required + - **Real-time debugging**: Inspect flows as they execute + - **Data flow visualization**: See how data moves through your workflow + - **Error analysis**: Detailed error reporting and debugging -## Automatic tracing and metrics +### Local Logging + +Genkit provides a centralized logging system that integrates with the observability stack: - Genkit automatically collects traces and metrics without requiring explicit configuration, allowing you to observe and debug your Genkit code's behavior in the Developer UI. Genkit stores these traces, enabling you to analyze your Genkit flows step-by-step with detailed input/output logging and statistics. - - ```typescript - import { genkit } from 'genkit'; - import { googleAI } from '@genkit-ai/googleai'; + ```ts + import { logger } from 'genkit/logging'; - const ai = genkit({ - plugins: [googleAI()], - // Tracing is automatically enabled - }); + // Configure log level + logger.setLogLevel('debug'); - // All flows, tools, and generation calls are automatically traced - const myFlow = ai.defineFlow({ - name: 'myFlow', - inputSchema: z.string(), - outputSchema: z.string(), - }, async (input) => { - // This entire flow execution will be traced - const result = await ai.generate({ - prompt: `Process this: ${input}`, - }); - return result.text; - }); + // Use in your flows + export const myFlow = ai.defineFlow( + { name: 'myFlow' }, + async (input) => { + logger.info('Flow started', { input }); + + try { + const result = await processData(input); + logger.info('Flow completed successfully', { result }); + return result; + } catch (error) { + logger.error('Flow failed', { error: error.message }); + throw error; + } + } + ); ``` - - In production, Genkit can export traces and metrics to Firebase Genkit Monitoring for further analysis. - Genkit is fully instrumented with [OpenTelemetry](https://opentelemetry.io/) and provides hooks to export telemetry data. The trace store feature lets you inspect your traces for your flow runs in the Genkit Developer UI. - ```go import ( "context" + "log/slog" "github.com/firebase/genkit/go/genkit" - "github.com/firebase/genkit/go/plugins/googlecloud" ) - func main() { - ctx := context.Background() + func myFlow(ctx context.Context, input string) (string, error) { + slog.InfoContext(ctx, "Flow started", "input", input) - // Initialize with Google Cloud plugin for telemetry export - g, err := genkit.Init(ctx, - genkit.WithPlugins(&googlecloud.GoogleCloud{}), - ) + result, err := processData(input) if err != nil { - panic(err) + slog.ErrorContext(ctx, "Flow failed", "error", err) + return "", err } - - // All flows and generation calls are automatically traced - flow := genkit.DefineFlow(g, "myFlow", - func(ctx context.Context, input string) (string, error) { - // This entire flow execution will be traced - resp, err := genkit.Generate(ctx, g, - ai.WithPrompt("Process this: " + input), - ) - if err != nil { - return "", err - } - return resp.Text(), nil - }, - ) + + slog.InfoContext(ctx, "Flow completed", "result", result) + return result, nil } ``` - - This feature is enabled whenever you run a Genkit flow in a dev environment (such as when using `genkit start` or `genkit flow:run`). - Python has limited built-in observability features. You may need to implement custom tracing and monitoring: - ```python import logging from genkit.ai import Genkit - from genkit.plugins.google_genai import GoogleGenai - # Configure basic logging + # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) - ai = Genkit(plugins=[GoogleGenai()]) + ai = Genkit() @ai.flow() - async def my_flow(input_text: str, ctx): - # Add manual logging for observability - logger.info(f"Processing input: {input_text}") + async def my_flow(input: str) -> str: + logger.info(f"Flow started with input: {input}") try: - result = await ai.generate( - prompt=f"Process this: {input_text}", - ) - logger.info("Generation completed successfully") - return result.text - except Exception as e: - logger.error(f"Generation failed: {e}") + result = await process_data(input) + logger.info(f"Flow completed successfully: {result}") + return result + except Exception as error: + logger.error(f"Flow failed: {error}") raise ``` - - For more advanced observability, consider integrating with external monitoring tools or using framework-specific monitoring solutions. -## Logging +## Production Monitoring + +### Firebase Genkit Monitoring + +For production deployments, Genkit integrates with Firebase to provide comprehensive monitoring through the Genkit Monitoring dashboard. + +#### Setup and Configuration - Genkit provides a centralized logging system that you can configure using the logging module. One advantage of using the Genkit-provided logger is that it automatically exports logs to Genkit Monitoring when the Firebase Telemetry plugin is enabled. + **1. Install the Firebase plugin:** - ```typescript - import { logger } from 'genkit/logging'; + ```bash + npm install @genkit-ai/firebase + ``` - // Set the desired log level - logger.setLogLevel('debug'); + **2. Environment-based configuration:** - // Use the logger in your flows - const myFlow = ai.defineFlow({ - name: 'myFlow', - inputSchema: z.string(), - outputSchema: z.string(), - }, async (input) => { - logger.info('Starting flow execution', { input }); - - try { - const result = await ai.generate({ - prompt: `Process this: ${input}`, - }); - - logger.info('Flow completed successfully', { - inputLength: input.length, - outputLength: result.text.length - }); - - return result.text; - } catch (error) { - logger.error('Flow execution failed', { error, input }); - throw error; - } - }); + ```bash + export ENABLE_FIREBASE_MONITORING=true ``` - Available log levels: `debug`, `info`, `warn`, `error` + **3. Programmatic configuration:** + + ```ts + import { enableFirebaseTelemetry } from '@genkit-ai/firebase'; + + // Basic setup + enableFirebaseTelemetry(); + + // Advanced configuration + enableFirebaseTelemetry({ + forceDevExport: false, + metricExportIntervalMillis: 300_000, // 5 minutes + disableLoggingInputAndOutput: false, + disableMetrics: false, + disableTraces: false, + }); + ``` - In Go, you can use the standard logging package or structured logging libraries: - - ```go - import ( - "log" - "context" - "github.com/firebase/genkit/go/genkit" - ) + **1. Install the Google Cloud plugin:** - func myFlow(ctx context.Context, input string) (string, error) { - log.Printf("Starting flow execution with input: %s", input) - - resp, err := genkit.Generate(ctx, g, - ai.WithPrompt("Process this: " + input), - ) - if err != nil { - log.Printf("Flow execution failed: %v", err) - return "", err - } - - log.Printf("Flow completed successfully, output length: %d", len(resp.Text())) - return resp.Text(), nil - } + ```bash + go get github.com/firebase/genkit/go/plugins/googlecloud ``` - For structured logging, consider using libraries like `logrus` or `zap`: + **2. Configure telemetry export:** ```go import ( - "github.com/sirupsen/logrus" + "github.com/firebase/genkit/go/plugins/googlecloud" ) - func myFlowWithStructuredLogging(ctx context.Context, input string) (string, error) { - logrus.WithFields(logrus.Fields{ - "input_length": len(input), - "flow_name": "myFlow", - }).Info("Starting flow execution") - - // ... flow logic + func main() { + ctx := context.Background() - logrus.WithFields(logrus.Fields{ - "output_length": len(result), - "success": true, - }).Info("Flow completed") + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlecloud.GoogleCloud{}), + ) + if err != nil { + log.Fatal(err) + } - return result, nil + // Telemetry is automatically configured } ``` - Use Python's standard logging module or integrate with your preferred logging framework: + **1. Install monitoring dependencies:** - ```python - import logging - import structlog + ```bash + pip install genkit[monitoring] + ``` - # Standard logging - logger = logging.getLogger(__name__) + **2. Configure telemetry:** - @ai.flow() - async def my_flow(input_text: str, ctx): - logger.info("Starting flow execution", extra={ - "input_length": len(input_text), - "flow_name": "my_flow" - }) - - try: - result = await ai.generate( - prompt=f"Process this: {input_text}", - ) - - logger.info("Flow completed successfully", extra={ - "input_length": len(input_text), - "output_length": len(result.text) - }) - - return result.text - except Exception as e: - logger.error("Flow execution failed", extra={ - "error": str(e), - "input_length": len(input_text) - }) - raise + ```python + from genkit.ai import Genkit + from genkit.monitoring import enable_firebase_monitoring - # Or use structured logging with structlog - struct_logger = structlog.get_logger() + # Enable monitoring + enable_firebase_monitoring() - @ai.flow() - async def my_structured_flow(input_text: str, ctx): - struct_logger.info("flow_started", - input_length=len(input_text), - flow_name="my_structured_flow") - # ... flow logic + ai = Genkit() ``` -## Developer UI integration +#### Required Google Cloud APIs - - - The Genkit Developer UI provides detailed trace viewing and debugging capabilities. When you run your application with `genkit start`, you can: +Enable these APIs in your Google Cloud project: - - View detailed traces of flow executions - - Inspect input/output at each step - - Analyze performance metrics - - Debug errors with full stack traces - - Monitor token usage and costs +- [Cloud Logging API](https://console.cloud.google.com/apis/library/logging.googleapis.com) +- [Cloud Trace API](https://console.cloud.google.com/apis/library/cloudtrace.googleapis.com) +- [Cloud Monitoring API](https://console.cloud.google.com/apis/library/monitoring.googleapis.com) - ```bash - # Start your app with the Developer UI - genkit start -- npm run dev +#### IAM Permissions - # The Developer UI will be available at http://localhost:4000 - ``` +Grant these roles to your service account: - All traces are automatically captured and displayed in the UI without additional configuration. - - - The Developer UI works the same way for Go applications. When you run your application in development mode, traces are automatically captured: +- **Monitoring Metric Writer** (`roles/monitoring.metricWriter`) +- **Cloud Trace Agent** (`roles/cloudtrace.agent`) +- **Logs Writer** (`roles/logging.logWriter`) - ```bash - # Start your Go app with the Developer UI - genkit start -- go run main.go +### Monitoring Dashboard Features - # The Developer UI will be available at http://localhost:4000 - ``` +The [Genkit Monitoring dashboard](https://console.firebase.google.com/project/_/genai_monitoring) provides: - You can inspect: - - Flow execution traces - - Generation requests and responses - - Tool call details - - Performance metrics - - Error information - - - Python applications can also use the Developer UI, though with potentially limited trace details: +- **Performance Metrics**: Latency, throughput, and error rates +- **Usage Analytics**: Token consumption, model usage, and cost tracking +- **Trace Inspection**: Detailed execution traces with input/output logging +- **Error Analysis**: Error patterns, failure rates, and debugging information +- **Custom Metrics**: Application-specific metrics and KPIs - ```bash - # Start your Python app with the Developer UI - genkit start -- python main.py - - # The Developer UI will be available at http://localhost:4000 - ``` +## OpenTelemetry Integration - The level of trace detail may vary depending on the Python implementation and available instrumentation. - - +Genkit is fully instrumented with [OpenTelemetry](https://opentelemetry.io/), allowing you to export telemetry data to any compatible observability platform. -## Production monitoring +### Custom OpenTelemetry Configuration - For production environments, Genkit integrates with Firebase Genkit Monitoring and other observability platforms: - - ### Firebase Genkit Monitoring - - The [Genkit Monitoring](https://console.firebase.google.com/project/_/genai_monitoring) dashboard helps you understand the overall health of your Genkit features. It is useful for debugging stability and content issues that may indicate problems with your LLM prompts and/or Genkit Flows. - - ```typescript - import { genkit } from 'genkit'; - import { firebase } from '@genkit-ai/firebase'; - - const ai = genkit({ - plugins: [ - firebase({ - // Enables telemetry export to Firebase - telemetry: true, - }), - ], + ```ts + import { NodeSDK } from '@opentelemetry/sdk-node'; + import { getNodeAutoInstrumentations } from '@opentelemetry/auto-instrumentations-node'; + import { JaegerExporter } from '@opentelemetry/exporter-jaeger'; + + // Custom OpenTelemetry setup + const sdk = new NodeSDK({ + traceExporter: new JaegerExporter({ + endpoint: 'http://localhost:14268/api/traces', + }), + instrumentations: [getNodeAutoInstrumentations()], }); - ``` - ### Custom observability platforms + sdk.start(); - You can export OpenTelemetry data to other platforms: - - ```typescript + // Your Genkit app import { genkit } from 'genkit'; - import { opentelemetry } from '@genkit-ai/opentelemetry'; - - const ai = genkit({ - plugins: [ - opentelemetry({ - endpoint: 'https://your-otel-collector.com', - headers: { - 'Authorization': 'Bearer your-token', - }, - }), - ], - }); + const ai = genkit({ /* config */ }); ``` - For production monitoring, use the Google Cloud plugin to export telemetry to Cloud's operations suite: - ```go import ( - "github.com/firebase/genkit/go/plugins/googlecloud" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/exporters/jaeger" + "go.opentelemetry.io/otel/sdk/trace" ) - func main() { - ctx := context.Background() - - g, err := genkit.Init(ctx, - genkit.WithPlugins(&googlecloud.GoogleCloud{ - // Configure telemetry export - ProjectID: "your-project-id", - }), - ) + func setupTelemetry() { + // Create Jaeger exporter + exp, err := jaeger.New(jaeger.WithCollectorEndpoint( + jaeger.WithEndpoint("http://localhost:14268/api/traces"), + )) if err != nil { - panic(err) + log.Fatal(err) } + + // Create trace provider + tp := trace.NewTracerProvider( + trace.WithBatcher(exp), + ) - // Your flows will now export telemetry to Google Cloud + otel.SetTracerProvider(tp) } ``` - - This provides: - - Distributed tracing in Cloud Trace - - Metrics in Cloud Monitoring - - Logs in Cloud Logging - - Error reporting in Cloud Error Reporting - For production monitoring in Python, you'll typically need to integrate with external monitoring solutions: - ```python - import logging from opentelemetry import trace - from opentelemetry.exporter.cloud_trace import CloudTraceSpanExporter + from opentelemetry.exporter.jaeger.thrift import JaegerExporter from opentelemetry.sdk.trace import TracerProvider from opentelemetry.sdk.trace.export import BatchSpanProcessor - # Configure OpenTelemetry for Google Cloud + # Configure OpenTelemetry trace.set_tracer_provider(TracerProvider()) tracer = trace.get_tracer(__name__) - cloud_trace_exporter = CloudTraceSpanExporter() - span_processor = BatchSpanProcessor(cloud_trace_exporter) - trace.get_tracer_provider().add_span_processor(span_processor) + jaeger_exporter = JaegerExporter( + agent_host_name="localhost", + agent_port=6831, + ) - @ai.flow() - async def monitored_flow(input_text: str, ctx): - with tracer.start_as_current_span("my_flow") as span: - span.set_attribute("input.length", len(input_text)) - - try: - result = await ai.generate( - prompt=f"Process this: {input_text}", - ) - span.set_attribute("output.length", len(result.text)) - return result.text - except Exception as e: - span.record_exception(e) - span.set_status(trace.Status(trace.StatusCode.ERROR)) - raise + span_processor = BatchSpanProcessor(jaeger_exporter) + trace.get_tracer_provider().add_span_processor(span_processor) ``` -## Best practices +### Popular Observability Platforms -### Development monitoring +Genkit's OpenTelemetry integration works with: - - - - Use the Developer UI for real-time debugging - - Set appropriate log levels for development vs production - - Monitor token usage and costs during development - - Use trace inspection to optimize flow performance - - Test error scenarios and review error traces - - - - Leverage the Developer UI for flow debugging - - Use structured logging for better searchability - - Monitor goroutine usage in concurrent flows - - Profile memory usage for long-running applications - - Test error handling and recovery scenarios - - - - Implement comprehensive logging throughout your flows - - Use async-compatible logging libraries - - Monitor memory usage and garbage collection - - Test error scenarios thoroughly - - Consider using APM tools for detailed insights - - +- **Jaeger**: Distributed tracing +- **Zipkin**: Request tracing and timing data +- **Prometheus**: Metrics collection and alerting +- **Grafana**: Visualization and dashboards +- **Datadog**: Full-stack monitoring +- **New Relic**: Application performance monitoring +- **Honeycomb**: Observability for complex systems -### Production monitoring +## Advanced Configuration + +### Sampling and Performance + +Control telemetry collection to balance observability with performance: - - Enable Firebase Genkit Monitoring for comprehensive insights - - Set up alerts for error rates and latency thresholds - - Monitor token usage and costs - - Track flow success rates and performance metrics - - Implement custom metrics for business-specific KPIs + ```ts + import { enableFirebaseTelemetry } from '@genkit-ai/firebase'; + import { TraceIdRatioBasedSampler } from '@opentelemetry/sdk-trace-base'; + + enableFirebaseTelemetry({ + // Sample 10% of traces + sampler: new TraceIdRatioBasedSampler(0.1), + + // Reduce export frequency for high-volume apps + metricExportIntervalMillis: 600_000, // 10 minutes + + // Disable input/output logging for sensitive data + disableLoggingInputAndOutput: true, + + // Custom auto-instrumentation + autoInstrumentationConfig: { + '@opentelemetry/instrumentation-fs': { enabled: false }, + '@opentelemetry/instrumentation-dns': { enabled: false }, + }, + }); + ``` - - Use Google Cloud Operations Suite for full observability - - Set up monitoring dashboards for key metrics - - Configure alerting for error rates and latency - - Monitor resource usage and scaling metrics - - Implement health checks and readiness probes + ```go + import ( + "go.opentelemetry.io/otel/sdk/trace" + ) + + func setupSampling() { + // Sample 10% of traces + sampler := trace.TraceIDRatioBased(0.1) + + tp := trace.NewTracerProvider( + trace.WithSampler(sampler), + // Other configuration... + ) + + otel.SetTracerProvider(tp) + } + ``` - - Integrate with your organization's monitoring stack - - Set up comprehensive error tracking - - Monitor application performance and resource usage - - Implement custom metrics collection - - Use distributed tracing for complex workflows + ```python + from opentelemetry.sdk.trace.sampling import TraceIdRatioBasedSampler + + # Configure sampling + sampler = TraceIdRatioBasedSampler(0.1) # 10% sampling + + trace_provider = TracerProvider(sampler=sampler) + trace.set_tracer_provider(trace_provider) + ``` -### Security and privacy +### Custom Metrics + +Add application-specific metrics to your observability stack: - - Avoid logging sensitive data in traces - - Use log level filtering to control information exposure - - Implement proper access controls for monitoring dashboards - - Consider data retention policies for traces and logs - - Sanitize user inputs in log messages + ```ts + import { metrics } from '@opentelemetry/api'; + + const meter = metrics.getMeter('my-genkit-app'); + const requestCounter = meter.createCounter('genkit_requests_total'); + const responseTime = meter.createHistogram('genkit_response_time'); + + export const myFlow = ai.defineFlow( + { name: 'myFlow' }, + async (input) => { + const startTime = Date.now(); + requestCounter.add(1, { flow: 'myFlow' }); + + try { + const result = await processData(input); + responseTime.record(Date.now() - startTime, { + flow: 'myFlow', + status: 'success' + }); + return result; + } catch (error) { + responseTime.record(Date.now() - startTime, { + flow: 'myFlow', + status: 'error' + }); + throw error; + } + } + ); + ``` - - Implement log sanitization for sensitive data - - Use structured logging to control field exposure - - Configure proper IAM for monitoring resources - - Implement trace sampling for high-volume applications - - Consider data residency requirements + ```go + import ( + "go.opentelemetry.io/otel/metric" + ) + + func setupMetrics() { + meter := otel.Meter("my-genkit-app") + + requestCounter, _ := meter.Int64Counter("genkit_requests_total") + responseTime, _ := meter.Float64Histogram("genkit_response_time") + + // Use in your flows + requestCounter.Add(ctx, 1, metric.WithAttributes( + attribute.String("flow", "myFlow"), + )) + } + ``` - - Implement data sanitization in logging - - Use environment-specific log levels - - Secure monitoring endpoints and dashboards - - Consider GDPR and privacy requirements - - Implement proper error handling to avoid data leaks + ```python + from opentelemetry import metrics + + meter = metrics.get_meter(__name__) + request_counter = meter.create_counter("genkit_requests_total") + response_time = meter.create_histogram("genkit_response_time") + + @ai.flow() + async def my_flow(input: str) -> str: + start_time = time.time() + request_counter.add(1, {"flow": "my_flow"}) + + try: + result = await process_data(input) + response_time.record( + time.time() - start_time, + {"flow": "my_flow", "status": "success"} + ) + return result + except Exception as error: + response_time.record( + time.time() - start_time, + {"flow": "my_flow", "status": "error"} + ) + raise + ``` -## Next steps +## Troubleshooting + +### Common Issues + +**Metrics not appearing in dashboard:** +- Verify API permissions and service account roles +- Check that required Google Cloud APIs are enabled +- Ensure `metricExportIntervalMillis` isn't too high +- Confirm network connectivity to Google Cloud services + +**High telemetry costs:** +- Implement sampling to reduce data volume +- Disable input/output logging for large payloads +- Increase export intervals for non-critical environments +- Use trace sampling for high-traffic applications + +**Missing traces in production:** +- Verify OpenTelemetry configuration +- Check service account permissions +- Ensure trace export is enabled +- Validate network connectivity + +### Performance Optimization + +**Reduce telemetry overhead:** +- Use appropriate sampling rates +- Disable unnecessary auto-instrumentations +- Batch exports efficiently +- Monitor telemetry export performance + +**Optimize for production:** +- Disable development-only features +- Use environment-specific configurations +- Implement circuit breakers for telemetry exports +- Monitor resource usage + +## Best Practices + +### Development + +1. **Use the Developer UI**: Take advantage of built-in tracing during development +2. **Test monitoring setup**: Verify telemetry collection before deploying +3. **Monitor resource usage**: Ensure observability doesn't impact performance +4. **Implement health checks**: Monitor the health of your monitoring system + +### Production + +1. **Implement alerting**: Set up alerts for critical metrics and errors +2. **Use dashboards**: Create custom dashboards for your specific use cases +3. **Monitor costs**: Track telemetry costs and optimize as needed +4. **Regular reviews**: Regularly review and optimize your observability setup + +### Security + +1. **Protect sensitive data**: Disable input/output logging for sensitive information +2. **Secure credentials**: Use proper IAM roles and service accounts +3. **Network security**: Ensure secure connections to observability platforms +4. **Data retention**: Configure appropriate data retention policies + +## Next Steps -- Learn about [developer tools](/unified-docs/developer-tools) for local development and debugging -- Explore [evaluation](/unified-docs/evaluation) to measure and improve your application quality -- See [creating flows](/unified-docs/creating-flows) to build observable AI workflows -- Check out [generating content](/unified-docs/generating-content) for understanding traced generation calls +- Set up [Firebase Genkit Monitoring](https://console.firebase.google.com/project/_/genai_monitoring) for your production applications +- Explore [OpenTelemetry documentation](https://opentelemetry.io/docs/) for advanced configurations +- Learn about [evaluation](/unified-docs/evaluation) to complement your monitoring strategy +- Check out [deployment guides](/unified-docs/deployment) for production-ready configurations diff --git a/src/content/docs/unified-docs/observability/advanced-configuration.mdx b/src/content/docs/unified-docs/observability/advanced-configuration.mdx new file mode 100644 index 00000000..a8df3baf --- /dev/null +++ b/src/content/docs/unified-docs/observability/advanced-configuration.mdx @@ -0,0 +1,1106 @@ +--- +title: Advanced Configuration +description: Learn advanced configuration options for Genkit observability, including sampling, performance tuning, custom metrics, and telemetry optimization across JavaScript, Go, and Python. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +This guide covers advanced configuration options for fine-tuning Genkit's observability features to optimize performance, control costs, and customize telemetry collection. + +## Configuration Overview + +Genkit's observability system can be configured at multiple levels: + +- **Global Settings**: Apply to all telemetry collection +- **Language-Specific Options**: Platform-specific configurations +- **Runtime Controls**: Dynamic configuration changes +- **Environment Variables**: External configuration management + +## Default Configuration + +Understanding the default settings helps you make informed configuration decisions: + + + + ```ts + // Default Firebase telemetry configuration + { + autoInstrumentation: true, + autoInstrumentationConfig: { + '@opentelemetry/instrumentation-dns': { enabled: false }, + }, + disableMetrics: false, + disableTraces: false, + disableLoggingInputAndOutput: false, + forceDevExport: false, + metricExportIntervalMillis: 300_000, // 5 minutes + metricExportTimeoutMillis: 300_000, // 5 minutes + sampler: AlwaysOnSampler(), // 100% sampling + } + ``` + + + ```go + // Default configuration is handled by the Google Cloud plugin + // Telemetry is automatically configured with sensible defaults + type TelemetryConfig struct { + ProjectID string + EnableTracing bool // true + EnableMetrics bool // true + EnableLogging bool // true + SamplingRate float64 // 1.0 (100%) + ExportInterval time.Duration // 5 minutes + } + ``` + + + ```python + # Default monitoring configuration + default_config = { + 'enable_tracing': True, + 'enable_metrics': True, + 'enable_logging': True, + 'sampling_rate': 1.0, # 100% sampling + 'export_interval': 300, # 5 minutes + 'disable_input_output_logging': False, + } + ``` + + + +## Sampling Configuration + +Sampling reduces telemetry volume and costs while maintaining observability: + +### Trace Sampling + + + + ```ts + import { enableFirebaseTelemetry } from '@genkit-ai/firebase'; + import { + TraceIdRatioBasedSampler, + ParentBasedSampler, + AlwaysOffSampler, + AlwaysOnSampler + } from '@opentelemetry/sdk-trace-base'; + + // Sample 10% of traces + enableFirebaseTelemetry({ + sampler: new TraceIdRatioBasedSampler(0.1), + }); + + // Parent-based sampling (inherit from parent span) + enableFirebaseTelemetry({ + sampler: new ParentBasedSampler({ + root: new TraceIdRatioBasedSampler(0.1), + }), + }); + + // Environment-based sampling + const samplingRate = process.env.NODE_ENV === 'production' ? 0.1 : 1.0; + enableFirebaseTelemetry({ + sampler: new TraceIdRatioBasedSampler(samplingRate), + }); + ``` + + + ```go + import ( + "go.opentelemetry.io/otel/sdk/trace" + ) + + func configureSampling() { + // 10% sampling + sampler := trace.TraceIDRatioBased(0.1) + + // Parent-based sampling + parentSampler := trace.ParentBased( + trace.TraceIDRatioBased(0.1), + ) + + // Environment-based sampling + var samplingRate float64 = 1.0 + if os.Getenv("ENVIRONMENT") == "production" { + samplingRate = 0.1 + } + + tp := trace.NewTracerProvider( + trace.WithSampler(trace.TraceIDRatioBased(samplingRate)), + ) + + otel.SetTracerProvider(tp) + } + ``` + + + ```python + from opentelemetry.sdk.trace.sampling import ( + TraceIdRatioBasedSampler, + ParentBased, + ALWAYS_OFF, + ALWAYS_ON + ) + + # 10% sampling + sampler = TraceIdRatioBasedSampler(0.1) + + # Parent-based sampling + parent_sampler = ParentBased( + root=TraceIdRatioBasedSampler(0.1) + ) + + # Environment-based sampling + import os + sampling_rate = 0.1 if os.getenv('ENVIRONMENT') == 'production' else 1.0 + + enable_firebase_monitoring( + sampling_rate=sampling_rate + ) + ``` + + + +### Custom Sampling Strategies + +Implement custom sampling logic based on your application needs: + + + + ```ts + import { Sampler, SamplingResult, SamplingDecision } from '@opentelemetry/sdk-trace-base'; + + class CustomSampler implements Sampler { + shouldSample(context, traceId, spanName, spanKind, attributes, links) { + // Sample all error traces + if (attributes['error'] === true) { + return { decision: SamplingDecision.RECORD_AND_SAMPLE }; + } + + // Sample 50% of flow executions + if (spanName.includes('flow:')) { + return Math.random() < 0.5 + ? { decision: SamplingDecision.RECORD_AND_SAMPLE } + : { decision: SamplingDecision.NOT_RECORD }; + } + + // Sample 10% of everything else + return Math.random() < 0.1 + ? { decision: SamplingDecision.RECORD_AND_SAMPLE } + : { decision: SamplingDecision.NOT_RECORD }; + } + + toString() { + return 'CustomSampler'; + } + } + + enableFirebaseTelemetry({ + sampler: new CustomSampler(), + }); + ``` + + + ```go + import ( + "go.opentelemetry.io/otel/sdk/trace" + "go.opentelemetry.io/otel/trace" + ) + + type CustomSampler struct{} + + func (s CustomSampler) ShouldSample(p trace.SamplingParameters) trace.SamplingResult { + // Sample all error spans + if p.Attributes != nil { + for _, attr := range p.Attributes { + if attr.Key == "error" && attr.Value.AsBool() { + return trace.SamplingResult{ + Decision: trace.RecordAndSample, + } + } + } + } + + // Sample 50% of flow executions + if strings.Contains(p.Name, "flow:") { + if rand.Float64() < 0.5 { + return trace.SamplingResult{Decision: trace.RecordAndSample} + } + return trace.SamplingResult{Decision: trace.Drop} + } + + // Sample 10% of everything else + if rand.Float64() < 0.1 { + return trace.SamplingResult{Decision: trace.RecordAndSample} + } + return trace.SamplingResult{Decision: trace.Drop} + } + + func (s CustomSampler) Description() string { + return "CustomSampler" + } + ``` + + + ```python + from opentelemetry.sdk.trace.sampling import Sampler, SamplingResult + from opentelemetry.trace import SpanKind + import random + + class CustomSampler(Sampler): + def should_sample(self, parent_context, trace_id, name, kind, attributes, links, trace_state): + # Sample all error traces + if attributes and attributes.get('error') == True: + return SamplingResult(decision=True) + + # Sample 50% of flow executions + if 'flow:' in name: + return SamplingResult(decision=random.random() < 0.5) + + # Sample 10% of everything else + return SamplingResult(decision=random.random() < 0.1) + + def get_description(self): + return "CustomSampler" + + enable_firebase_monitoring( + custom_sampler=CustomSampler() + ) + ``` + + + +## Performance Optimization + +### Export Intervals and Batching + +Optimize telemetry export for your application's performance requirements: + + + + ```ts + enableFirebaseTelemetry({ + // Reduce export frequency for high-volume applications + metricExportIntervalMillis: 600_000, // 10 minutes + metricExportTimeoutMillis: 30_000, // 30 seconds + + // For development/testing - faster exports + // metricExportIntervalMillis: 10_000, // 10 seconds + // metricExportTimeoutMillis: 5_000, // 5 seconds + }); + + // Custom batch configuration for traces + import { BatchSpanProcessor } from '@opentelemetry/sdk-trace-base'; + + const batchProcessor = new BatchSpanProcessor(exporter, { + maxExportBatchSize: 512, // Default: 512 + exportTimeoutMillis: 30_000, // Default: 30 seconds + scheduledDelayMillis: 5_000, // Default: 5 seconds + maxQueueSize: 2048, // Default: 2048 + }); + ``` + + + ```go + import ( + "go.opentelemetry.io/otel/sdk/trace" + "time" + ) + + func configurePerformance() { + // Configure batch span processor + batchProcessor := trace.NewBatchSpanProcessor( + exporter, + trace.WithBatchTimeout(30*time.Second), + trace.WithExportTimeout(10*time.Second), + trace.WithMaxExportBatchSize(512), + trace.WithMaxQueueSize(2048), + ) + + tp := trace.NewTracerProvider( + trace.WithSpanProcessor(batchProcessor), + ) + + otel.SetTracerProvider(tp) + } + ``` + + + ```python + from opentelemetry.sdk.trace.export import BatchSpanProcessor + + # Configure batch processing + batch_processor = BatchSpanProcessor( + exporter, + max_export_batch_size=512, + schedule_delay_millis=5000, + export_timeout_millis=30000, + max_queue_size=2048, + ) + + enable_firebase_monitoring( + export_interval=600, # 10 minutes + batch_processor=batch_processor + ) + ``` + + + +### Auto-Instrumentation Control + +Fine-tune automatic instrumentation to reduce overhead: + + + + ```ts + enableFirebaseTelemetry({ + autoInstrumentationConfig: { + // Disable file system instrumentation + '@opentelemetry/instrumentation-fs': { enabled: false }, + + // Disable DNS instrumentation + '@opentelemetry/instrumentation-dns': { enabled: false }, + + // Disable network instrumentation + '@opentelemetry/instrumentation-net': { enabled: false }, + + // Configure HTTP instrumentation + '@opentelemetry/instrumentation-http': { + enabled: true, + ignoreIncomingRequestHook: (req) => { + // Ignore health check requests + return req.url?.includes('/health') || false; + }, + ignoreOutgoingRequestHook: (options) => { + // Ignore internal requests + return options.hostname === 'localhost'; + }, + }, + + // Configure Express instrumentation + '@opentelemetry/instrumentation-express': { + enabled: true, + ignoreLayers: [ + // Ignore middleware layers + (layer) => layer.name === 'cors', + ], + }, + }, + }); + ``` + + + ```go + // Go auto-instrumentation is typically handled through + // manual instrumentation or specific library integrations + + import ( + "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" + ) + + func configureInstrumentation() { + // Configure HTTP client instrumentation + client := &http.Client{ + Transport: otelhttp.NewTransport( + http.DefaultTransport, + otelhttp.WithFilter(func(req *http.Request) bool { + // Skip health check requests + return !strings.Contains(req.URL.Path, "/health") + }), + ), + } + + // Configure HTTP server instrumentation + handler := otelhttp.NewHandler( + myHandler, + "my-service", + otelhttp.WithFilter(func(req *http.Request) bool { + // Skip internal requests + return req.Header.Get("X-Internal") == "" + }), + ) + } + ``` + + + ```python + from opentelemetry.instrumentation.requests import RequestsInstrumentor + from opentelemetry.instrumentation.flask import FlaskInstrumentor + + # Configure requests instrumentation + RequestsInstrumentor().instrument( + excluded_urls="localhost,127.0.0.1" + ) + + # Configure Flask instrumentation + FlaskInstrumentor().instrument_app( + app, + excluded_urls="/health,/metrics" + ) + + enable_firebase_monitoring( + auto_instrumentation_config={ + 'requests': {'enabled': True}, + 'flask': {'enabled': True}, + 'sqlalchemy': {'enabled': False}, # Disable if not needed + } + ) + ``` + + + +## Data Privacy and Security + +### Input/Output Logging Control + +Protect sensitive data by controlling what gets logged: + + + + ```ts + enableFirebaseTelemetry({ + // Disable all input/output logging + disableLoggingInputAndOutput: true, + }); + + // Or use custom filtering + import { logger } from 'genkit/logging'; + + const sanitizeData = (data: any) => { + if (typeof data === 'object' && data !== null) { + const sanitized = { ...data }; + // Remove sensitive fields + delete sanitized.password; + delete sanitized.apiKey; + delete sanitized.token; + return sanitized; + } + return data; + }; + + export const secureFlow = ai.defineFlow( + { name: 'secureFlow' }, + async (input) => { + logger.info('Flow started', { input: sanitizeData(input) }); + // Process data... + const result = await processData(input); + logger.info('Flow completed', { result: sanitizeData(result) }); + return result; + } + ); + ``` + + + ```go + import ( + "context" + "log/slog" + ) + + func sanitizeData(data interface{}) interface{} { + // Implement data sanitization logic + if m, ok := data.(map[string]interface{}); ok { + sanitized := make(map[string]interface{}) + for k, v := range m { + if k != "password" && k != "apiKey" && k != "token" { + sanitized[k] = v + } else { + sanitized[k] = "[REDACTED]" + } + } + return sanitized + } + return data + } + + func secureFlow(ctx context.Context, input map[string]interface{}) (string, error) { + slog.InfoContext(ctx, "Flow started", "input", sanitizeData(input)) + + result, err := processData(input) + if err != nil { + return "", err + } + + slog.InfoContext(ctx, "Flow completed", "result", sanitizeData(result)) + return result, nil + } + ``` + + + ```python + import logging + from typing import Any, Dict + + def sanitize_data(data: Any) -> Any: + """Remove sensitive information from data.""" + if isinstance(data, dict): + sanitized = {} + for key, value in data.items(): + if key.lower() in ['password', 'api_key', 'token', 'secret']: + sanitized[key] = '[REDACTED]' + else: + sanitized[key] = value + return sanitized + return data + + enable_firebase_monitoring( + disable_input_output_logging=True # Global disable + ) + + @ai.flow() + async def secure_flow(input: Dict[str, Any]) -> str: + logger.info(f"Flow started with input: {sanitize_data(input)}") + + result = await process_data(input) + logger.info(f"Flow completed: {sanitize_data(result)}") + + return result + ``` + + + +### Attribute Filtering + +Filter sensitive attributes from telemetry data: + + + + ```ts + import { SpanProcessor, Span } from '@opentelemetry/sdk-trace-base'; + + class AttributeFilterProcessor implements SpanProcessor { + private sensitiveKeys = ['password', 'apiKey', 'token', 'secret']; + + onStart(span: Span): void { + // Filter attributes on span start + const attributes = span.attributes; + for (const key of this.sensitiveKeys) { + if (key in attributes) { + span.setAttributes({ [key]: '[REDACTED]' }); + } + } + } + + onEnd(): void {} + shutdown(): Promise { return Promise.resolve(); } + forceFlush(): Promise { return Promise.resolve(); } + } + + // Add to tracer provider + const tp = new TracerProvider({ + spanProcessors: [new AttributeFilterProcessor()], + }); + ``` + + + ```go + import ( + "go.opentelemetry.io/otel/sdk/trace" + "go.opentelemetry.io/otel/attribute" + ) + + type AttributeFilterProcessor struct { + sensitiveKeys map[string]bool + } + + func NewAttributeFilterProcessor() *AttributeFilterProcessor { + return &AttributeFilterProcessor{ + sensitiveKeys: map[string]bool{ + "password": true, + "apiKey": true, + "token": true, + "secret": true, + }, + } + } + + func (p *AttributeFilterProcessor) OnStart(parent context.Context, s trace.ReadWriteSpan) { + // Filter sensitive attributes + attrs := s.Attributes() + for _, attr := range attrs { + if p.sensitiveKeys[string(attr.Key)] { + s.SetAttributes(attribute.String(string(attr.Key), "[REDACTED]")) + } + } + } + + func (p *AttributeFilterProcessor) OnEnd(s trace.ReadOnlySpan) {} + func (p *AttributeFilterProcessor) Shutdown(ctx context.Context) error { return nil } + func (p *AttributeFilterProcessor) ForceFlush(ctx context.Context) error { return nil } + ``` + + + ```python + from opentelemetry.sdk.trace import SpanProcessor + from opentelemetry.trace import Span + + class AttributeFilterProcessor(SpanProcessor): + def __init__(self): + self.sensitive_keys = {'password', 'api_key', 'token', 'secret'} + + def on_start(self, span: Span, parent_context=None): + # Filter sensitive attributes + if hasattr(span, '_attributes'): + for key in list(span._attributes.keys()): + if key.lower() in self.sensitive_keys: + span._attributes[key] = '[REDACTED]' + + def on_end(self, span: Span): + pass + + def shutdown(self): + pass + + def force_flush(self, timeout_millis=None): + pass + + # Add to trace provider + trace_provider.add_span_processor(AttributeFilterProcessor()) + ``` + + + +## Custom Metrics + +Add application-specific metrics to enhance observability: + + + + ```ts + import { metrics } from '@opentelemetry/api'; + + // Create custom meters and instruments + const meter = metrics.getMeter('genkit-app', '1.0.0'); + + // Counters for tracking events + const flowExecutions = meter.createCounter('genkit_flow_executions_total', { + description: 'Total number of flow executions', + }); + + const tokenUsage = meter.createCounter('genkit_tokens_consumed_total', { + description: 'Total tokens consumed by model calls', + }); + + // Histograms for measuring distributions + const flowDuration = meter.createHistogram('genkit_flow_duration_ms', { + description: 'Flow execution duration in milliseconds', + boundaries: [10, 50, 100, 500, 1000, 5000, 10000], + }); + + const modelLatency = meter.createHistogram('genkit_model_latency_ms', { + description: 'Model call latency in milliseconds', + }); + + // Gauges for current values + const activeFlows = meter.createUpDownCounter('genkit_active_flows', { + description: 'Number of currently active flows', + }); + + // Use in your flows + export const instrumentedFlow = ai.defineFlow( + { name: 'instrumentedFlow' }, + async (input) => { + const startTime = Date.now(); + + flowExecutions.add(1, { flow: 'instrumentedFlow', version: '1.0' }); + activeFlows.add(1); + + try { + const result = await ai.generate({ + model: 'gemini-1.5-flash', + prompt: `Process: ${input}`, + }); + + // Track token usage if available + if (result.usage) { + tokenUsage.add(result.usage.totalTokens, { + model: 'gemini-1.5-flash', + flow: 'instrumentedFlow', + }); + } + + const duration = Date.now() - startTime; + flowDuration.record(duration, { + flow: 'instrumentedFlow', + status: 'success', + }); + + return result.text; + } catch (error) { + const duration = Date.now() - startTime; + flowDuration.record(duration, { + flow: 'instrumentedFlow', + status: 'error', + }); + throw error; + } finally { + activeFlows.add(-1); + } + } + ); + ``` + + + ```go + import ( + "go.opentelemetry.io/otel/metric" + "go.opentelemetry.io/otel/attribute" + "time" + ) + + func setupCustomMetrics() { + meter := otel.Meter("genkit-app") + + // Create instruments + flowExecutions, _ := meter.Int64Counter("genkit_flow_executions_total") + tokenUsage, _ := meter.Int64Counter("genkit_tokens_consumed_total") + flowDuration, _ := meter.Float64Histogram("genkit_flow_duration_ms") + activeFlows, _ := meter.Int64UpDownCounter("genkit_active_flows") + + // Use in flows + instrumentedFlow := genkit.DefineFlow(g, "instrumentedFlow", + func(ctx context.Context, input string) (string, error) { + startTime := time.Now() + + flowExecutions.Add(ctx, 1, metric.WithAttributes( + attribute.String("flow", "instrumentedFlow"), + attribute.String("version", "1.0"), + )) + activeFlows.Add(ctx, 1) + + defer func() { + duration := float64(time.Since(startTime).Milliseconds()) + flowDuration.Record(ctx, duration, metric.WithAttributes( + attribute.String("flow", "instrumentedFlow"), + )) + activeFlows.Add(ctx, -1) + }() + + // Process the flow + result, err := processData(input) + if err != nil { + return "", err + } + + return result, nil + }) + } + ``` + + + ```python + from opentelemetry import metrics + import time + + # Create custom meters and instruments + meter = metrics.get_meter("genkit-app", "1.0.0") + + flow_executions = meter.create_counter( + "genkit_flow_executions_total", + description="Total number of flow executions" + ) + + token_usage = meter.create_counter( + "genkit_tokens_consumed_total", + description="Total tokens consumed by model calls" + ) + + flow_duration = meter.create_histogram( + "genkit_flow_duration_ms", + description="Flow execution duration in milliseconds" + ) + + active_flows = meter.create_up_down_counter( + "genkit_active_flows", + description="Number of currently active flows" + ) + + @ai.flow() + async def instrumented_flow(input: str) -> str: + start_time = time.time() + + flow_executions.add(1, {"flow": "instrumented_flow", "version": "1.0"}) + active_flows.add(1) + + try: + result = await ai.generate( + model='gemini-1.5-flash', + prompt=f'Process: {input}', + ) + + # Track token usage if available + if hasattr(result, 'usage') and result.usage: + token_usage.add( + result.usage.total_tokens, + {"model": "gemini-1.5-flash", "flow": "instrumented_flow"} + ) + + duration = (time.time() - start_time) * 1000 # Convert to ms + flow_duration.record(duration, { + "flow": "instrumented_flow", + "status": "success" + }) + + return result.text + + except Exception as error: + duration = (time.time() - start_time) * 1000 + flow_duration.record(duration, { + "flow": "instrumented_flow", + "status": "error" + }) + raise + finally: + active_flows.add(-1) + ``` + + + +## Environment-Specific Configuration + +Configure observability differently for each environment: + + + + ```ts + const environment = process.env.NODE_ENV || 'development'; + + const getObservabilityConfig = () => { + switch (environment) { + case 'production': + return { + sampler: new TraceIdRatioBasedSampler(0.1), // 10% sampling + metricExportIntervalMillis: 300_000, // 5 minutes + disableLoggingInputAndOutput: true, + autoInstrumentationConfig: { + '@opentelemetry/instrumentation-fs': { enabled: false }, + '@opentelemetry/instrumentation-dns': { enabled: false }, + }, + }; + + case 'staging': + return { + sampler: new TraceIdRatioBasedSampler(0.5), // 50% sampling + metricExportIntervalMillis: 60_000, // 1 minute + disableLoggingInputAndOutput: false, + }; + + case 'development': + default: + return { + forceDevExport: true, + sampler: new AlwaysOnSampler(), // 100% sampling + metricExportIntervalMillis: 10_000, // 10 seconds + disableLoggingInputAndOutput: false, + }; + } + }; + + enableFirebaseTelemetry(getObservabilityConfig()); + ``` + + + ```go + func getObservabilityConfig() TelemetryConfig { + env := os.Getenv("ENVIRONMENT") + + switch env { + case "production": + return TelemetryConfig{ + SamplingRate: 0.1, // 10% sampling + ExportInterval: 5 * time.Minute, + EnableInputOutputLogging: false, + } + case "staging": + return TelemetryConfig{ + SamplingRate: 0.5, // 50% sampling + ExportInterval: 1 * time.Minute, + EnableInputOutputLogging: true, + } + default: // development + return TelemetryConfig{ + SamplingRate: 1.0, // 100% sampling + ExportInterval: 10 * time.Second, + EnableInputOutputLogging: true, + ForceDevExport: true, + } + } + } + ``` + + + ```python + import os + + def get_observability_config(): + environment = os.getenv('ENVIRONMENT', 'development') + + if environment == 'production': + return { + 'sampling_rate': 0.1, # 10% sampling + 'export_interval': 300, # 5 minutes + 'disable_input_output_logging': True, + } + elif environment == 'staging': + return { + 'sampling_rate': 0.5, # 50% sampling + 'export_interval': 60, # 1 minute + 'disable_input_output_logging': False, + } + else: # development + return { + 'sampling_rate': 1.0, # 100% sampling + 'export_interval': 10, # 10 seconds + 'disable_input_output_logging': False, + 'force_dev_export': True, + } + + enable_firebase_monitoring(**get_observability_config()) + ``` + + + +## Resource Management + +### Memory and CPU Optimization + +Monitor and optimize resource usage: + + + + ```ts + // Monitor memory usage + const memoryUsage = meter.createHistogram('genkit_memory_usage_mb', { + description: 'Memory usage in megabytes', + }); + + // Track memory periodically + setInterval(() => { + const usage = process.memoryUsage(); + memoryUsage.record(usage.heapUsed / 1024 / 1024, { + type: 'heap_used', + }); + memoryUsage.record(usage.rss / 1024 / 1024, { + type: 'rss', + }); + }, 30000); // Every 30 seconds + + // Optimize telemetry for memory-constrained environments + enableFirebaseTelemetry({ + autoInstrumentationConfig: { + // Disable memory-intensive instrumentations + '@opentelemetry/instrumentation-fs': { enabled: false }, + '@opentelemetry/instrumentation-dns': { enabled: false }, + }, + // Reduce batch sizes + spanProcessorConfig: { + maxExportBatchSize: 256, + maxQueueSize: 1024, + }, + }); + ``` + + + ```go + import ( + "runtime" + "time" + ) + + func monitorResources() { + meter := otel.Meter("genkit-app") + memoryGauge, _ := meter.Float64ObservableGauge("genkit_memory_usage_mb") + + // Register callback for memory monitoring + _, err := meter.RegisterCallback( + func(ctx context.Context, o metric.Observer) error { + var m runtime.MemStats + runtime.ReadMemStats(&m) + + o.ObserveFloat64(memoryGauge, float64(m.Alloc)/1024/1024, + metric.WithAttributes(attribute.String("type", "alloc"))) + o.ObserveFloat64(memoryGauge, float64(m.Sys)/1024/1024, + metric.WithAttributes(attribute.String("type", "sys"))) + + return nil + }, + memoryGauge, + ) + if err != nil { + log.Printf("Failed to register memory callback: %v", err) + } + } + ``` + + + ```python + import psutil + import threading + import time + + def monitor_resources(): + memory_gauge = meter.create_observable_gauge( + "genkit_memory_usage_mb", + description="Memory usage in megabytes" + ) + + def collect_metrics(): + while True: + process = psutil.Process() + memory_info = process.memory_info() + + memory_gauge.set(memory_info.rss / 1024 / 1024, {"type": "rss"}) + memory_gauge.set(memory_info.vms / 1024 / 1024, {"type": "vms"}) + + time.sleep(30) # Every 30 seconds + + # Start monitoring in background thread + monitor_thread = threading.Thread(target=collect_metrics, daemon=True) + monitor_thread.start() + + # Optimize for memory-constrained environments + enable_firebase_monitoring( + batch_size=256, + queue_size=1024, + export_interval=60, # Longer intervals + ) + ``` + + + +## Best Practices Summary + +### Development Environment + +1. **Enable verbose logging**: Use 100% sampling and short export intervals +2. **Test monitoring setup**: Verify telemetry collection before deploying +3. **Use the Developer UI**: Take advantage of built-in observability tools +4. **Monitor resource usage**: Ensure observability doesn't impact development + +### Staging Environment + +1. **Moderate sampling**: Use 50% sampling to balance cost and visibility +2. **Test production config**: Validate monitoring configuration +3. **Performance testing**: Measure observability overhead +4. **Security testing**: Verify data privacy controls + +### Production Environment + +1. **Optimize sampling**: Use 10% or lower sampling for high-volume applications +2. **Protect sensitive data**: Disable input/output logging for sensitive information +3. **Monitor costs**: Track telemetry expenses and optimize accordingly +4. **Set up alerting**: Configure alerts for critical metrics and errors + +### Security Considerations + +1. **Data privacy**: Implement proper data sanitization and filtering +2. **Access control**: Limit who can view observability data +3. **Credential security**: Use proper IAM roles and rotate keys regularly +4. **Network security**: Ensure secure connections to monitoring services + +## Next Steps + +- Learn about [troubleshooting common issues](/unified-docs/observability/troubleshooting) +- Explore the main [observability and monitoring guide](/unified-docs/observability-monitoring) +- Set up [authentication and permissions](/unified-docs/observability/authentication) +- Check out [deployment guides](/unified-docs/deployment) for production configurations diff --git a/src/content/docs/unified-docs/observability/authentication.mdx b/src/content/docs/unified-docs/observability/authentication.mdx new file mode 100644 index 00000000..0434183b --- /dev/null +++ b/src/content/docs/unified-docs/observability/authentication.mdx @@ -0,0 +1,442 @@ +--- +title: Authentication and Setup +description: Learn how to set up authentication and permissions for Genkit observability and monitoring across JavaScript, Go, and Python environments. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +This guide covers the authentication and setup requirements for enabling Genkit observability and monitoring in production environments. + +## Prerequisites + +Before setting up observability, ensure you have: + +1. **Firebase Project**: A Firebase project with the Blaze pricing plan +2. **Google Cloud Project**: Access to the associated Google Cloud project +3. **Required APIs**: Enabled Google Cloud APIs for monitoring services +4. **Service Account**: Proper service account with necessary permissions + +## Required Google Cloud APIs + +Enable these APIs in your Google Cloud project: + +- [Cloud Logging API](https://console.cloud.google.com/apis/library/logging.googleapis.com) +- [Cloud Trace API](https://console.cloud.google.com/apis/library/cloudtrace.googleapis.com) +- [Cloud Monitoring API](https://console.cloud.google.com/apis/library/monitoring.googleapis.com) + +You can enable these APIs using the Google Cloud Console or the `gcloud` CLI: + +```bash +gcloud services enable logging.googleapis.com +gcloud services enable cloudtrace.googleapis.com +gcloud services enable monitoring.googleapis.com +``` + +## Service Account Setup + +### Required IAM Roles + +Grant the following roles to your service account: + +- **Monitoring Metric Writer** (`roles/monitoring.metricWriter`) +- **Cloud Trace Agent** (`roles/cloudtrace.agent`) +- **Logs Writer** (`roles/logging.logWriter`) + +### Creating a Service Account + + + + 1. Go to the [IAM & Admin > Service Accounts](https://console.cloud.google.com/iam-admin/serviceaccounts) page + 2. Click **Create Service Account** + 3. Enter a name and description for the service account + 4. Click **Create and Continue** + 5. Add the required roles: + - Monitoring Metric Writer + - Cloud Trace Agent + - Logs Writer + 6. Click **Continue** and then **Done** + 7. Download the service account key JSON file + + + ```bash + # Create service account + gcloud iam service-accounts create genkit-monitoring \ + --display-name="Genkit Monitoring Service Account" + + # Get your project ID + PROJECT_ID=$(gcloud config get-value project) + + # Grant required roles + gcloud projects add-iam-policy-binding $PROJECT_ID \ + --member="serviceAccount:genkit-monitoring@$PROJECT_ID.iam.gserviceaccount.com" \ + --role="roles/monitoring.metricWriter" + + gcloud projects add-iam-policy-binding $PROJECT_ID \ + --member="serviceAccount:genkit-monitoring@$PROJECT_ID.iam.gserviceaccount.com" \ + --role="roles/cloudtrace.agent" + + gcloud projects add-iam-policy-binding $PROJECT_ID \ + --member="serviceAccount:genkit-monitoring@$PROJECT_ID.iam.gserviceaccount.com" \ + --role="roles/logging.logWriter" + + # Create and download key + gcloud iam service-accounts keys create genkit-monitoring-key.json \ + --iam-account=genkit-monitoring@$PROJECT_ID.iam.gserviceaccount.com + ``` + + + +## Authentication Configuration + +### Environment Variables + +Set up authentication using environment variables: + +```bash +# Path to service account key file +export GOOGLE_APPLICATION_CREDENTIALS="/path/to/genkit-monitoring-key.json" + +# Or set the project ID directly +export GOOGLE_CLOUD_PROJECT="your-project-id" +``` + +### Application Default Credentials + +For production deployments, use Application Default Credentials (ADC): + + + + Cloud Functions automatically use the default service account. Ensure it has the required roles: + + ```bash + # Get the default service account + PROJECT_ID=$(gcloud config get-value project) + DEFAULT_SA="$PROJECT_ID@appspot.gserviceaccount.com" + + # Grant required roles + gcloud projects add-iam-policy-binding $PROJECT_ID \ + --member="serviceAccount:$DEFAULT_SA" \ + --role="roles/monitoring.metricWriter" + + gcloud projects add-iam-policy-binding $PROJECT_ID \ + --member="serviceAccount:$DEFAULT_SA" \ + --role="roles/cloudtrace.agent" + + gcloud projects add-iam-policy-binding $PROJECT_ID \ + --member="serviceAccount:$DEFAULT_SA" \ + --role="roles/logging.logWriter" + ``` + + + Specify a custom service account when deploying: + + ```bash + gcloud run deploy genkit-app \ + --image gcr.io/PROJECT_ID/genkit-app \ + --service-account genkit-monitoring@PROJECT_ID.iam.gserviceaccount.com \ + --region us-central1 + ``` + + + Attach the service account to your VM instance: + + ```bash + gcloud compute instances create genkit-vm \ + --service-account genkit-monitoring@PROJECT_ID.iam.gserviceaccount.com \ + --scopes cloud-platform + ``` + + + +## Language-Specific Setup + +### JavaScript/Node.js + + + + ```bash + # Set environment variables + export GOOGLE_APPLICATION_CREDENTIALS="/path/to/key.json" + export ENABLE_FIREBASE_MONITORING=true + ``` + + ```ts + import { enableFirebaseTelemetry } from '@genkit-ai/firebase'; + + // Automatically uses GOOGLE_APPLICATION_CREDENTIALS + enableFirebaseTelemetry(); + ``` + + + ```ts + import { enableFirebaseTelemetry } from '@genkit-ai/firebase'; + + enableFirebaseTelemetry({ + projectId: 'your-project-id', + // Optional: specify credentials path + keyFilename: '/path/to/key.json', + }); + ``` + + + +### Go + + + + ```bash + export GOOGLE_APPLICATION_CREDENTIALS="/path/to/key.json" + export GOOGLE_CLOUD_PROJECT="your-project-id" + ``` + + ```go + import ( + "github.com/firebase/genkit/go/plugins/googlecloud" + ) + + func main() { + ctx := context.Background() + + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlecloud.GoogleCloud{}), + ) + if err != nil { + log.Fatal(err) + } + } + ``` + + + ```go + import ( + "google.golang.org/api/option" + "github.com/firebase/genkit/go/plugins/googlecloud" + ) + + func main() { + ctx := context.Background() + + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlecloud.GoogleCloud{ + ProjectID: "your-project-id", + ClientOptions: []option.ClientOption{ + option.WithCredentialsFile("/path/to/key.json"), + }, + }), + ) + if err != nil { + log.Fatal(err) + } + } + ``` + + + +### Python + + + + ```bash + export GOOGLE_APPLICATION_CREDENTIALS="/path/to/key.json" + export GOOGLE_CLOUD_PROJECT="your-project-id" + ``` + + ```python + from genkit.ai import Genkit + from genkit.monitoring import enable_firebase_monitoring + + # Automatically uses environment variables + enable_firebase_monitoring() + + ai = Genkit() + ``` + + + ```python + from genkit.ai import Genkit + from genkit.monitoring import enable_firebase_monitoring + + enable_firebase_monitoring( + project_id='your-project-id', + credentials_path='/path/to/key.json' + ) + + ai = Genkit() + ``` + + + +## Local Development Authentication + +For local development and testing, you can use your personal Google Cloud credentials: + +### Using gcloud CLI + +```bash +# Authenticate with your Google account +gcloud auth application-default login + +# Set the project +gcloud config set project your-project-id +``` + +### Impersonating Service Accounts + +For testing with service account permissions locally: + +```bash +# Impersonate the service account +gcloud auth application-default login \ + --impersonate-service-account genkit-monitoring@your-project-id.iam.gserviceaccount.com +``` + +**Note**: You need the `roles/iam.serviceAccountTokenCreator` role to impersonate service accounts. + +## Testing Authentication + +Verify your authentication setup: + + + + ```ts + import { enableFirebaseTelemetry } from '@genkit-ai/firebase'; + + // Enable with local testing + enableFirebaseTelemetry({ + forceDevExport: true, + metricExportIntervalMillis: 10_000, // 10 seconds for testing + }); + + // Test with a simple flow + export const testFlow = ai.defineFlow( + { name: 'testFlow' }, + async () => { + console.log('Testing observability...'); + return 'Authentication successful!'; + } + ); + ``` + + + ```go + func testAuthentication() { + ctx := context.Background() + + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlecloud.GoogleCloud{}), + ) + if err != nil { + log.Fatalf("Authentication failed: %v", err) + } + + testFlow := genkit.DefineFlow(g, "testFlow", + func(ctx context.Context, input string) (string, error) { + log.Println("Testing observability...") + return "Authentication successful!", nil + }) + + // Run the test flow + result, err := testFlow.Run(ctx, "test") + if err != nil { + log.Fatalf("Test failed: %v", err) + } + + log.Printf("Test result: %s", result) + } + ``` + + + ```python + from genkit.ai import Genkit + from genkit.monitoring import enable_firebase_monitoring + + try: + enable_firebase_monitoring(force_dev_export=True) + ai = Genkit() + + @ai.flow() + async def test_flow() -> str: + print("Testing observability...") + return "Authentication successful!" + + # Test the flow + result = await test_flow() + print(f"Test result: {result}") + + except Exception as e: + print(f"Authentication failed: {e}") + ``` + + + +## Security Best Practices + +### Service Account Security + +1. **Principle of Least Privilege**: Only grant the minimum required roles +2. **Key Rotation**: Regularly rotate service account keys +3. **Key Storage**: Store keys securely and never commit them to version control +4. **Access Monitoring**: Monitor service account usage and access patterns + +### Environment Security + +1. **Environment Separation**: Use different service accounts for dev/staging/prod +2. **Secret Management**: Use secret management services for credentials +3. **Network Security**: Restrict network access to monitoring endpoints +4. **Audit Logging**: Enable audit logging for service account activities + +### Credential Management + +```bash +# Example: Using Google Secret Manager +gcloud secrets create genkit-monitoring-key --data-file=key.json + +# Grant access to the secret +gcloud secrets add-iam-policy-binding genkit-monitoring-key \ + --member="serviceAccount:your-app@your-project.iam.gserviceaccount.com" \ + --role="roles/secretmanager.secretAccessor" +``` + +## Troubleshooting Authentication + +### Common Issues + +**Permission Denied Errors**: +- Verify service account has required roles +- Check that APIs are enabled +- Ensure credentials are properly configured + +**Authentication Not Found**: +- Verify `GOOGLE_APPLICATION_CREDENTIALS` path +- Check service account key file exists and is readable +- Ensure project ID is correctly set + +**Quota Exceeded**: +- Check API quotas and limits +- Verify billing is enabled for the project +- Monitor usage in the Google Cloud Console + +### Debugging Commands + +```bash +# Check current authentication +gcloud auth list + +# Verify project configuration +gcloud config list + +# Test API access +gcloud logging logs list --limit=1 + +# Check service account permissions +gcloud projects get-iam-policy your-project-id \ + --flatten="bindings[].members" \ + --filter="bindings.members:serviceAccount:your-sa@your-project.iam.gserviceaccount.com" +``` + +## Next Steps + +- Configure [advanced monitoring options](/unified-docs/observability/advanced-configuration) +- Learn about [troubleshooting common issues](/unified-docs/observability/troubleshooting) +- Explore the main [observability and monitoring guide](/unified-docs/observability-monitoring) +- Set up [production deployment](/unified-docs/deployment) with proper monitoring diff --git a/src/content/docs/unified-docs/observability/overview.mdx b/src/content/docs/unified-docs/observability/overview.mdx new file mode 100644 index 00000000..df8e4256 --- /dev/null +++ b/src/content/docs/unified-docs/observability/overview.mdx @@ -0,0 +1,242 @@ +--- +title: Observability Overview +description: Get started with Genkit's observability features, including local development tools, production monitoring, and telemetry configuration across JavaScript, Go, and Python. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +This guide provides an overview of Genkit's observability capabilities and helps you get started with monitoring your AI workflows in both development and production environments. + +## What is Observability? + +Observability in Genkit refers to the ability to understand the internal state of your AI workflows by examining their outputs. This includes: + +- **Tracing**: Following the execution path of your flows step-by-step +- **Metrics**: Collecting quantitative data about performance and usage +- **Logging**: Recording events and debugging information +- **Monitoring**: Real-time visibility into system health and performance + +## Observability Layers + +### 1. Local Development + +During development, Genkit provides built-in observability through the Developer UI: + +- **Automatic trace collection**: No configuration required +- **Interactive debugging**: Step through flow execution +- **Real-time inspection**: View inputs, outputs, and intermediate states +- **Performance analysis**: Identify bottlenecks and optimization opportunities + +### 2. Production Monitoring + +For deployed applications, Genkit offers production-grade monitoring: + +- **Firebase Genkit Monitoring**: Integrated dashboard for Firebase projects +- **OpenTelemetry export**: Send data to any observability platform +- **Custom metrics**: Application-specific monitoring +- **Alerting and dashboards**: Proactive monitoring and visualization + +## Key Features + +### Automatic Instrumentation + +Genkit automatically instruments your code to collect telemetry data: + + + + ```ts + import { genkit } from 'genkit'; + import { googleAI } from '@genkit-ai/googleai'; + + // Observability is automatically enabled + const ai = genkit({ + plugins: [googleAI()], + }); + + // All flows are automatically traced + export const myFlow = ai.defineFlow( + { name: 'myFlow' }, + async (input) => { + // This execution will be automatically traced + return await ai.generate({ + model: 'gemini-1.5-flash', + prompt: `Process this: ${input}`, + }); + } + ); + ``` + + + ```go + import ( + "context" + "github.com/firebase/genkit/go/genkit" + ) + + func main() { + ctx := context.Background() + + // Observability is built-in + g, err := genkit.Init(ctx) + if err != nil { + log.Fatal(err) + } + + // Flows are automatically instrumented + myFlow := genkit.DefineFlow(g, "myFlow", + func(ctx context.Context, input string) (string, error) { + // Execution is automatically traced + return processData(input) + }) + } + ``` + + + ```python + from genkit.ai import Genkit + + # Observability is enabled by default + ai = Genkit() + + @ai.flow() + async def my_flow(input: str) -> str: + # This flow execution is automatically traced + result = await ai.generate( + model='gemini-1.5-flash', + prompt=f'Process this: {input}', + ) + return result.text + ``` + + + +### Developer UI Integration + +The Developer UI provides comprehensive observability during development: + +1. **Flow Execution Traces**: See every step of your flow execution +2. **Input/Output Inspection**: Examine data at each stage +3. **Performance Metrics**: View timing and resource usage +4. **Error Analysis**: Debug failures with detailed stack traces +5. **Interactive Testing**: Run flows with different inputs + +### Production Telemetry + +For production environments, Genkit exports telemetry data to monitoring platforms: + +- **Metrics**: Performance indicators, error rates, usage statistics +- **Traces**: Detailed execution paths for debugging +- **Logs**: Structured logging with context +- **Custom Events**: Application-specific monitoring points + +## Getting Started + +### 1. Local Development + +Start the Developer UI to begin observing your flows: + +```bash +# JavaScript +npx genkit start + +# Go +genkit start + +# Python +python -m genkit start +``` + +### 2. Production Setup + +Enable production monitoring by configuring telemetry export: + + + + ```bash + # Install Firebase plugin + npm install @genkit-ai/firebase + + # Enable monitoring + export ENABLE_FIREBASE_MONITORING=true + ``` + + + ```bash + # Install Google Cloud plugin + go get github.com/firebase/genkit/go/plugins/googlecloud + ``` + + + ```bash + # Install monitoring dependencies + pip install genkit[monitoring] + ``` + + + +### 3. Configure Permissions + +Set up the required Google Cloud permissions: + +- **Monitoring Metric Writer** (`roles/monitoring.metricWriter`) +- **Cloud Trace Agent** (`roles/cloudtrace.agent`) +- **Logs Writer** (`roles/logging.logWriter`) + +## Observability Best Practices + +### Development + +1. **Use the Developer UI**: Take advantage of built-in tracing +2. **Test with realistic data**: Use production-like inputs for testing +3. **Monitor performance**: Identify bottlenecks early +4. **Debug systematically**: Use traces to understand flow behavior + +### Production + +1. **Implement monitoring**: Set up dashboards and alerts +2. **Control costs**: Use sampling and filtering appropriately +3. **Protect sensitive data**: Configure input/output logging carefully +4. **Regular reviews**: Analyze metrics and optimize performance + +### Security + +1. **Secure credentials**: Use proper IAM roles and service accounts +2. **Data privacy**: Disable logging of sensitive information +3. **Network security**: Ensure secure connections to monitoring services +4. **Access control**: Limit who can view observability data + +## Common Use Cases + +### Debugging Flow Issues + +Use observability to diagnose problems: + +1. **Trace analysis**: Follow execution path to find failures +2. **Input/output inspection**: Verify data transformations +3. **Performance profiling**: Identify slow operations +4. **Error correlation**: Connect errors to specific inputs or conditions + +### Performance Optimization + +Leverage metrics to improve performance: + +1. **Latency analysis**: Find and optimize slow operations +2. **Resource usage**: Monitor memory and CPU consumption +3. **Throughput measurement**: Track requests per second +4. **Cost optimization**: Identify expensive operations + +### Production Monitoring + +Maintain system health in production: + +1. **Health dashboards**: Monitor key metrics and trends +2. **Alerting**: Get notified of issues before users are affected +3. **Capacity planning**: Use metrics to plan for growth +4. **Incident response**: Use traces and logs for troubleshooting + +## Next Steps + +- Learn about [authentication and setup](/unified-docs/observability/authentication) for production monitoring +- Explore [advanced configuration](/unified-docs/observability/advanced-configuration) options +- Check out [troubleshooting](/unified-docs/observability/troubleshooting) common issues +- Read the main [observability and monitoring](/unified-docs/observability-monitoring) guide for detailed implementation diff --git a/src/content/docs/unified-docs/observability/troubleshooting.mdx b/src/content/docs/unified-docs/observability/troubleshooting.mdx new file mode 100644 index 00000000..e6d0de80 --- /dev/null +++ b/src/content/docs/unified-docs/observability/troubleshooting.mdx @@ -0,0 +1,764 @@ +--- +title: Troubleshooting +description: Common issues and solutions for Genkit observability and monitoring across JavaScript, Go, and Python environments. +--- + +import { Tabs, TabItem } from '@astrojs/starlight/components'; + +This guide helps you diagnose and resolve common issues with Genkit's observability and monitoring features. + +## Common Issues + +### Metrics Not Appearing in Dashboard + +**Symptoms:** +- No metrics visible in the Firebase Genkit Monitoring dashboard +- Empty charts or "No data available" messages +- Metrics appear locally but not in production + +**Possible Causes and Solutions:** + + + + **1. Check Firebase plugin configuration:** + ```ts + import { enableFirebaseTelemetry } from '@genkit-ai/firebase'; + + // Verify telemetry is enabled + enableFirebaseTelemetry({ + forceDevExport: true, // For testing + metricExportIntervalMillis: 10_000, // Faster for testing + }); + ``` + + **2. Verify environment variables:** + ```bash + echo $GOOGLE_APPLICATION_CREDENTIALS + echo $GOOGLE_CLOUD_PROJECT + echo $ENABLE_FIREBASE_MONITORING + ``` + + **3. Check API permissions:** + ```bash + # Test if APIs are accessible + gcloud logging logs list --limit=1 + gcloud monitoring metrics list --limit=1 + ``` + + + **1. Verify Google Cloud plugin setup:** + ```go + import ( + "github.com/firebase/genkit/go/plugins/googlecloud" + ) + + func main() { + ctx := context.Background() + + g, err := genkit.Init(ctx, + genkit.WithPlugins(&googlecloud.GoogleCloud{}), + ) + if err != nil { + log.Fatalf("Failed to initialize Genkit: %v", err) + } + + log.Println("Genkit initialized successfully") + } + ``` + + **2. Check environment setup:** + ```bash + echo $GOOGLE_APPLICATION_CREDENTIALS + echo $GOOGLE_CLOUD_PROJECT + ``` + + **3. Test authentication:** + ```go + // Add to your main function for testing + client, err := monitoring.NewMetricClient(ctx) + if err != nil { + log.Fatalf("Failed to create monitoring client: %v", err) + } + defer client.Close() + log.Println("Monitoring client created successfully") + ``` + + + **1. Verify monitoring setup:** + ```python + from genkit.ai import Genkit + from genkit.monitoring import enable_firebase_monitoring + + try: + enable_firebase_monitoring(force_dev_export=True) + print("Monitoring enabled successfully") + except Exception as e: + print(f"Failed to enable monitoring: {e}") + + ai = Genkit() + ``` + + **2. Check dependencies:** + ```bash + pip list | grep -E "(genkit|google|opentelemetry)" + ``` + + **3. Test authentication:** + ```python + from google.cloud import monitoring_v3 + + try: + client = monitoring_v3.MetricServiceClient() + project_name = f"projects/{your_project_id}" + metrics = client.list_metric_descriptors(name=project_name) + print("Authentication successful") + except Exception as e: + print(f"Authentication failed: {e}") + ``` + + + +### Authentication and Permission Issues + +**Symptoms:** +- "Permission denied" errors +- "Authentication failed" messages +- 403 or 401 HTTP status codes + +**Solutions:** + +**1. Verify IAM roles:** +```bash +# Check service account permissions +gcloud projects get-iam-policy YOUR_PROJECT_ID \ + --flatten="bindings[].members" \ + --filter="bindings.members:serviceAccount:YOUR_SERVICE_ACCOUNT" +``` + +**2. Required roles checklist:** +- ✅ `roles/monitoring.metricWriter` +- ✅ `roles/cloudtrace.agent` +- ✅ `roles/logging.logWriter` + +**3. Test service account:** +```bash +# Impersonate service account for testing +gcloud auth application-default login \ + --impersonate-service-account YOUR_SERVICE_ACCOUNT +``` + +**4. Verify API enablement:** +```bash +gcloud services list --enabled --filter="name:(logging|monitoring|cloudtrace)" +``` + +### High Telemetry Costs + +**Symptoms:** +- Unexpected Google Cloud billing charges +- High volume of telemetry data +- Performance impact from telemetry collection + +**Solutions:** + + + + **1. Implement sampling:** + ```ts + import { TraceIdRatioBasedSampler } from '@opentelemetry/sdk-trace-base'; + + enableFirebaseTelemetry({ + // Sample only 10% of traces + sampler: new TraceIdRatioBasedSampler(0.1), + + // Increase export intervals + metricExportIntervalMillis: 600_000, // 10 minutes + + // Disable input/output logging + disableLoggingInputAndOutput: true, + }); + ``` + + **2. Disable unnecessary instrumentation:** + ```ts + enableFirebaseTelemetry({ + autoInstrumentationConfig: { + '@opentelemetry/instrumentation-fs': { enabled: false }, + '@opentelemetry/instrumentation-dns': { enabled: false }, + '@opentelemetry/instrumentation-net': { enabled: false }, + }, + }); + ``` + + + **1. Configure sampling:** + ```go + import "go.opentelemetry.io/otel/sdk/trace" + + // 10% sampling + sampler := trace.TraceIDRatioBased(0.1) + + tp := trace.NewTracerProvider( + trace.WithSampler(sampler), + ) + + otel.SetTracerProvider(tp) + ``` + + **2. Optimize batch processing:** + ```go + batchProcessor := trace.NewBatchSpanProcessor( + exporter, + trace.WithBatchTimeout(60*time.Second), // Longer batches + trace.WithMaxExportBatchSize(1024), // Larger batches + ) + ``` + + + **1. Reduce sampling rate:** + ```python + enable_firebase_monitoring( + sampling_rate=0.1, # 10% sampling + export_interval=600, # 10 minutes + disable_input_output_logging=True, + ) + ``` + + **2. Optimize batch settings:** + ```python + from opentelemetry.sdk.trace.export import BatchSpanProcessor + + batch_processor = BatchSpanProcessor( + exporter, + max_export_batch_size=1024, + schedule_delay_millis=60000, # 1 minute + ) + ``` + + + +### Missing Traces in Production + +**Symptoms:** +- Traces visible in development but not production +- Incomplete trace data +- Gaps in trace timeline + +**Solutions:** + +**1. Check sampling configuration:** +- Ensure sampling rate isn't too low +- Verify parent-based sampling isn't dropping traces +- Test with higher sampling rates temporarily + +**2. Verify network connectivity:** +```bash +# Test connectivity to Google Cloud endpoints +curl -I https://cloudtrace.googleapis.com/ +curl -I https://monitoring.googleapis.com/ +``` + +**3. Check export configuration:** + + + + ```ts + // Add debugging to trace export + enableFirebaseTelemetry({ + // Enable debug logging + debug: true, + + // Reduce export timeout for testing + metricExportTimeoutMillis: 10_000, + + // Force export in all environments + forceDevExport: true, + }); + ``` + + + ```go + // Add logging to trace export + import "go.opentelemetry.io/otel/exporters/stdout/stdouttrace" + + // For debugging, add stdout exporter + stdoutExporter, err := stdouttrace.New( + stdouttrace.WithPrettyPrint(), + ) + if err != nil { + log.Fatal(err) + } + + tp := trace.NewTracerProvider( + trace.WithSpanProcessor(trace.NewSimpleSpanProcessor(stdoutExporter)), + ) + ``` + + + ```python + # Enable debug logging + import logging + logging.basicConfig(level=logging.DEBUG) + + # Add console exporter for debugging + from opentelemetry.exporter.console import ConsoleSpanExporter + from opentelemetry.sdk.trace.export import SimpleSpanProcessor + + console_exporter = ConsoleSpanExporter() + trace_provider.add_span_processor( + SimpleSpanProcessor(console_exporter) + ) + ``` + + + +### Performance Issues + +**Symptoms:** +- Increased application latency +- High CPU or memory usage +- Slow response times + +**Solutions:** + +**1. Optimize telemetry overhead:** + + + + ```ts + // Reduce telemetry overhead + enableFirebaseTelemetry({ + // Use batch processing + spanProcessorConfig: { + maxExportBatchSize: 512, + scheduledDelayMillis: 5000, + maxQueueSize: 2048, + }, + + // Disable expensive instrumentations + autoInstrumentationConfig: { + '@opentelemetry/instrumentation-fs': { enabled: false }, + '@opentelemetry/instrumentation-dns': { enabled: false }, + }, + }); + ``` + + + ```go + // Optimize span processing + batchProcessor := trace.NewBatchSpanProcessor( + exporter, + trace.WithBatchTimeout(5*time.Second), + trace.WithMaxExportBatchSize(512), + trace.WithMaxQueueSize(2048), + ) + + tp := trace.NewTracerProvider( + trace.WithSpanProcessor(batchProcessor), + trace.WithSampler(trace.TraceIDRatioBased(0.1)), + ) + ``` + + + ```python + # Optimize performance + enable_firebase_monitoring( + sampling_rate=0.1, + batch_size=512, + schedule_delay_millis=5000, + max_queue_size=2048, + ) + ``` + + + +**2. Monitor resource usage:** + + + + ```ts + // Monitor memory usage + setInterval(() => { + const usage = process.memoryUsage(); + console.log('Memory usage:', { + heapUsed: Math.round(usage.heapUsed / 1024 / 1024) + 'MB', + heapTotal: Math.round(usage.heapTotal / 1024 / 1024) + 'MB', + rss: Math.round(usage.rss / 1024 / 1024) + 'MB', + }); + }, 30000); + ``` + + + ```go + // Monitor memory usage + import "runtime" + + func logMemoryUsage() { + var m runtime.MemStats + runtime.ReadMemStats(&m) + + log.Printf("Memory usage: Alloc=%dMB, Sys=%dMB, NumGC=%d", + m.Alloc/1024/1024, m.Sys/1024/1024, m.NumGC) + } + + // Call periodically + go func() { + for { + logMemoryUsage() + time.Sleep(30 * time.Second) + } + }() + ``` + + + ```python + import psutil + import threading + import time + + def monitor_memory(): + while True: + process = psutil.Process() + memory_info = process.memory_info() + print(f"Memory usage: RSS={memory_info.rss // 1024 // 1024}MB, " + f"VMS={memory_info.vms // 1024 // 1024}MB") + time.sleep(30) + + # Start monitoring thread + monitor_thread = threading.Thread(target=monitor_memory, daemon=True) + monitor_thread.start() + ``` + + + +## Debugging Tools + +### Enable Debug Logging + + + + ```ts + // Enable OpenTelemetry debug logging + process.env.OTEL_LOG_LEVEL = 'debug'; + + // Enable Genkit debug logging + import { logger } from 'genkit/logging'; + logger.setLogLevel('debug'); + + // Log telemetry configuration + enableFirebaseTelemetry({ + debug: true, + // ... other config + }); + ``` + + + ```go + import ( + "log/slog" + "os" + ) + + // Enable debug logging + logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{ + Level: slog.LevelDebug, + })) + slog.SetDefault(logger) + + // Log telemetry events + slog.Debug("Telemetry configuration", "config", config) + ``` + + + ```python + import logging + import os + + # Enable debug logging + logging.basicConfig( + level=logging.DEBUG, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' + ) + + # Enable OpenTelemetry debug logging + os.environ['OTEL_LOG_LEVEL'] = 'debug' + + # Log monitoring setup + logger = logging.getLogger(__name__) + logger.debug("Enabling Firebase monitoring") + ``` + + + +### Test Telemetry Export + + + + ```ts + // Test telemetry export with console output + import { ConsoleSpanExporter } from '@opentelemetry/exporter-console'; + import { SimpleSpanProcessor } from '@opentelemetry/sdk-trace-base'; + + const consoleExporter = new ConsoleSpanExporter(); + const processor = new SimpleSpanProcessor(consoleExporter); + + // Add to tracer provider for debugging + const tp = new TracerProvider({ + spanProcessors: [processor], + }); + ``` + + + ```go + // Test with stdout exporter + import "go.opentelemetry.io/otel/exporters/stdout/stdouttrace" + + stdoutExporter, err := stdouttrace.New( + stdouttrace.WithPrettyPrint(), + ) + if err != nil { + log.Fatal(err) + } + + tp := trace.NewTracerProvider( + trace.WithSpanProcessor(trace.NewSimpleSpanProcessor(stdoutExporter)), + ) + ``` + + + ```python + # Test with console exporter + from opentelemetry.exporter.console import ConsoleSpanExporter + from opentelemetry.sdk.trace.export import SimpleSpanProcessor + + console_exporter = ConsoleSpanExporter() + trace_provider.add_span_processor( + SimpleSpanProcessor(console_exporter) + ) + ``` + + + +### Validate Configuration + + + + ```ts + // Configuration validation function + function validateTelemetryConfig() { + console.log('Environment variables:'); + console.log('GOOGLE_APPLICATION_CREDENTIALS:', process.env.GOOGLE_APPLICATION_CREDENTIALS); + console.log('GOOGLE_CLOUD_PROJECT:', process.env.GOOGLE_CLOUD_PROJECT); + console.log('ENABLE_FIREBASE_MONITORING:', process.env.ENABLE_FIREBASE_MONITORING); + + // Test API access + const { GoogleAuth } = require('google-auth-library'); + const auth = new GoogleAuth(); + + auth.getProjectId() + .then(projectId => console.log('Project ID:', projectId)) + .catch(err => console.error('Auth error:', err)); + } + + validateTelemetryConfig(); + ``` + + + ```go + func validateTelemetryConfig() { + log.Println("Environment variables:") + log.Println("GOOGLE_APPLICATION_CREDENTIALS:", os.Getenv("GOOGLE_APPLICATION_CREDENTIALS")) + log.Println("GOOGLE_CLOUD_PROJECT:", os.Getenv("GOOGLE_CLOUD_PROJECT")) + + // Test authentication + ctx := context.Background() + creds, err := google.FindDefaultCredentials(ctx) + if err != nil { + log.Printf("Credentials error: %v", err) + } else { + log.Printf("Project ID: %s", creds.ProjectID) + } + } + ``` + + + ```python + def validate_telemetry_config(): + import os + from google.auth import default + + print("Environment variables:") + print(f"GOOGLE_APPLICATION_CREDENTIALS: {os.getenv('GOOGLE_APPLICATION_CREDENTIALS')}") + print(f"GOOGLE_CLOUD_PROJECT: {os.getenv('GOOGLE_CLOUD_PROJECT')}") + + try: + credentials, project_id = default() + print(f"Project ID: {project_id}") + print("Authentication successful") + except Exception as e: + print(f"Authentication error: {e}") + + validate_telemetry_config() + ``` + + + +## Monitoring Health + +### Check Telemetry Pipeline + +```bash +# Check if telemetry data is being generated +gcloud logging read "resource.type=global" --limit=10 --format="table(timestamp,severity,textPayload)" + +# Check metrics +gcloud monitoring metrics list --filter="metric.type:genkit" --limit=10 + +# Check traces +gcloud trace list-traces --limit=10 +``` + +### Monitor Export Success + + + + ```ts + // Monitor export success/failure + import { metrics } from '@opentelemetry/api'; + + const meter = metrics.getMeter('telemetry-health'); + const exportSuccess = meter.createCounter('telemetry_export_success_total'); + const exportFailure = meter.createCounter('telemetry_export_failure_total'); + + // Custom exporter wrapper + class MonitoredExporter { + constructor(baseExporter) { + this.baseExporter = baseExporter; + } + + export(spans, resultCallback) { + this.baseExporter.export(spans, (result) => { + if (result.code === ExportResultCode.SUCCESS) { + exportSuccess.add(1); + } else { + exportFailure.add(1); + console.error('Export failed:', result.error); + } + resultCallback(result); + }); + } + } + ``` + + + ```go + // Monitor export health + type MonitoredExporter struct { + base trace.SpanExporter + successCounter metric.Int64Counter + failureCounter metric.Int64Counter + } + + func (e *MonitoredExporter) ExportSpans(ctx context.Context, spans []trace.ReadOnlySpan) error { + err := e.base.ExportSpans(ctx, spans) + if err != nil { + e.failureCounter.Add(ctx, 1) + log.Printf("Export failed: %v", err) + } else { + e.successCounter.Add(ctx, 1) + } + return err + } + ``` + + + ```python + # Monitor export health + from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult + + class MonitoredExporter(SpanExporter): + def __init__(self, base_exporter): + self.base_exporter = base_exporter + self.success_count = 0 + self.failure_count = 0 + + def export(self, spans): + try: + result = self.base_exporter.export(spans) + if result == SpanExportResult.SUCCESS: + self.success_count += 1 + else: + self.failure_count += 1 + print(f"Export failed: {result}") + return result + except Exception as e: + self.failure_count += 1 + print(f"Export error: {e}") + return SpanExportResult.FAILURE + ``` + + + +## Getting Help + +### Collect Diagnostic Information + +When reporting issues, include: + +1. **Environment details:** + - Operating system and version + - Language runtime version + - Genkit version + - Cloud environment (local, Cloud Run, etc.) + +2. **Configuration:** + - Telemetry configuration + - Environment variables + - IAM roles and permissions + +3. **Error messages:** + - Complete error logs + - Stack traces + - Console output + +4. **Reproduction steps:** + - Minimal code example + - Steps to reproduce the issue + - Expected vs actual behavior + +### Useful Commands + +```bash +# Check Genkit version +npm list genkit # JavaScript +go list -m github.com/firebase/genkit/go # Go +pip show genkit # Python + +# Check Google Cloud SDK +gcloud version + +# Test authentication +gcloud auth list +gcloud config list + +# Check API access +gcloud services list --enabled +gcloud projects get-iam-policy PROJECT_ID + +# View logs +gcloud logging read "resource.type=global" --limit=50 +``` + +### Community Resources + +- [Genkit GitHub Issues](https://github.com/firebase/genkit/issues) +- [Firebase Community](https://firebase.google.com/community) +- [Stack Overflow](https://stackoverflow.com/questions/tagged/genkit) +- [Google Cloud Support](https://cloud.google.com/support) + +## Next Steps + +- Review the main [observability and monitoring guide](/unified-docs/observability-monitoring) +- Check [authentication and setup](/unified-docs/observability/authentication) requirements +- Explore [advanced configuration](/unified-docs/observability/advanced-configuration) options +- Learn about [deployment best practices](/unified-docs/deployment) diff --git a/src/content/docs/unified-docs/plugin-authoring/models.mdx b/src/content/docs/unified-docs/plugin-authoring/models.mdx index e39e7432..c4b3e6d8 100644 --- a/src/content/docs/unified-docs/plugin-authoring/models.mdx +++ b/src/content/docs/unified-docs/plugin-authoring/models.mdx @@ -1114,7 +1114,7 @@ A model plugin consists of three main components: ## Next Steps -- Learn about [writing embedder plugins](/unified-docs/plugin-authoring/embedders) for text embedding models -- Explore [writing retriever plugins](/unified-docs/plugin-authoring/retrievers) for custom data sources -- See [telemetry plugins](/unified-docs/plugin-authoring/telemetry) for monitoring and observability +- Learn about writing embedder plugins for text embedding models +- Explore writing retriever plugins for custom data sources +- See telemetry plugins for monitoring and observability - Check out the [plugin authoring overview](/unified-docs/plugin-authoring/overview) for general plugin concepts diff --git a/src/content/docs/unified-docs/plugin-authoring/overview.mdx b/src/content/docs/unified-docs/plugin-authoring/overview.mdx index 5158fcb0..f120fb90 100644 --- a/src/content/docs/unified-docs/plugin-authoring/overview.mdx +++ b/src/content/docs/unified-docs/plugin-authoring/overview.mdx @@ -462,6 +462,6 @@ Configure observability and monitoring for Genkit applications. ## Next Steps - Learn about [writing model plugins](/unified-docs/plugin-authoring/models) to add new AI models -- Explore [writing retriever plugins](/unified-docs/plugin-authoring/retrievers) for custom data sources -- See [writing embedder plugins](/unified-docs/plugin-authoring/embedders) for custom embedding models -- Check out [telemetry plugins](/unified-docs/plugin-authoring/telemetry) for monitoring and observability +- Explore writing retriever plugins for custom data sources +- See writing embedder plugins for custom embedding models +- Check out telemetry plugins for monitoring and observability diff --git a/src/sidebar.ts b/src/sidebar.ts index d1a01cae..ad78f80c 100644 --- a/src/sidebar.ts +++ b/src/sidebar.ts @@ -297,9 +297,8 @@ const UNIFIED_SIDEBAR = [ { label: "Pause generation using interrupts", slug: "unified-docs/interrupts" }, { label: "Retrieval-augmented generation (RAG)", slug: "unified-docs/rag" }, { label: "Building multi-agent systems", slug: "unified-docs/multi-agent-systems" }, - { label: "Evaluation", slug: "unified-docs/evaluation" }, - { label: "Observability and monitoring", slug: "unified-docs/observability-monitoring" }, { label: "Error handling", slug: "unified-docs/error-handling" }, + { label: "Evaluation", slug: "unified-docs/evaluation" }, ], }, { @@ -353,12 +352,13 @@ const UNIFIED_SIDEBAR = [ ], }, { - label: "Advanced Topics", - collapsed: true, + label: "Observability and Monitoring", items: [ - { label: "Observability and monitoring", slug: "unified-docs/observability-monitoring" }, - { label: "Error handling", slug: "unified-docs/error-handling" }, - { label: "Multi-agent systems", slug: "unified-docs/multi-agent-systems" }, + { label: "Overview", slug: "unified-docs/observability/overview" }, + { label: "Complete Guide", slug: "unified-docs/observability-monitoring" }, + { label: "Authentication & Setup", slug: "unified-docs/observability/authentication" }, + { label: "Advanced Configuration", slug: "unified-docs/observability/advanced-configuration" }, + { label: "Troubleshooting", slug: "unified-docs/observability/troubleshooting" }, ], }, ]; From 59313680e424615f7f581f8b61304b8b9ddb93af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=80=9Cchrisraygill=E2=80=9D?= Date: Fri, 8 Aug 2025 14:01:26 -0400 Subject: [PATCH 7/9] Update auth link to authorization in framework docs Fix broken authentication links in Express.js and Next.js framework documentation by updating the path from `/unified-docs/auth` to `/unified-docs/deployment/authorization` and correcting the link text from "authentication" to "authorization" patterns. --- src/content/docs/unified-docs/frameworks/express.mdx | 2 +- src/content/docs/unified-docs/frameworks/nextjs.mdx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/content/docs/unified-docs/frameworks/express.mdx b/src/content/docs/unified-docs/frameworks/express.mdx index fd1c9d2a..d099b881 100644 --- a/src/content/docs/unified-docs/frameworks/express.mdx +++ b/src/content/docs/unified-docs/frameworks/express.mdx @@ -962,7 +962,7 @@ Express.js integration is primarily available for JavaScript/Node.js. For other ## Next Steps - Learn about [creating flows](/unified-docs/creating-flows) to build more complex AI workflows -- Explore [authentication patterns](/unified-docs/auth) for securing your applications +- Explore [authorization patterns](/unified-docs/deployment/authorization) for securing your applications - See [deployment guides](/unified-docs/deployment) for production deployment strategies - Check out other framework integrations: - [Next.js](/unified-docs/frameworks/nextjs) for React applications diff --git a/src/content/docs/unified-docs/frameworks/nextjs.mdx b/src/content/docs/unified-docs/frameworks/nextjs.mdx index e596ee17..7bca3624 100644 --- a/src/content/docs/unified-docs/frameworks/nextjs.mdx +++ b/src/content/docs/unified-docs/frameworks/nextjs.mdx @@ -1135,7 +1135,7 @@ Next.js integration is primarily available for JavaScript/Node.js. For other lan ## Next Steps - Learn about [creating flows](/unified-docs/creating-flows) to build more complex AI workflows -- Explore [authentication patterns](/unified-docs/auth) for securing your applications +- Explore [authorization patterns](/unified-docs/deployment/authorization) for securing your applications - See [deployment guides](/unified-docs/deployment) for production deployment strategies - Check out other framework integrations: - [Express.js](/unified-docs/frameworks/express) for API-first applications From c3df74f6b84065e72dfb433871f2fc1495351596 Mon Sep 17 00:00:00 2001 From: Michael Bleigh Date: Fri, 8 Aug 2025 15:18:31 -0700 Subject: [PATCH 8/9] Switch to titlebar lang dropdown and use query params for lang switch. --- astro.config.mjs | 173 ++++++------ scripts/refactor-tabs.mjs | 57 ++++ src/components/LangTabItem.astro | 11 + src/components/LangTabs.astro | 38 +++ .../LanguagePreferenceSelector.astro | 90 ++++++ src/content/custom/head.astro | 3 +- src/content/custom/header.astro | 7 +- .../docs/unified-docs/chat-sessions.mdx | 115 ++++---- src/content/docs/unified-docs/context.mdx | 115 ++++---- .../docs/unified-docs/creating-flows.mdx | 179 ++++++------ src/content/docs/unified-docs/deployment.mdx | 143 +++++----- .../unified-docs/deployment/any-platform.mdx | 107 ++++---- .../unified-docs/deployment/authorization.mdx | 19 +- .../unified-docs/deployment/cloud-run.mdx | 67 ++--- .../docs/unified-docs/deployment/firebase.mdx | 131 ++++----- .../docs/unified-docs/developer-tools.mdx | 67 ++--- src/content/docs/unified-docs/dotprompt.mdx | 259 +++++++++--------- .../docs/unified-docs/error-handling.mdx | 115 ++++---- src/content/docs/unified-docs/evaluation.mdx | 245 ++++++++--------- .../docs/unified-docs/frameworks/express.mdx | 99 +++---- .../docs/unified-docs/frameworks/nextjs.mdx | 99 +++---- .../docs/unified-docs/generating-content.mdx | 195 ++++++------- src/content/docs/unified-docs/get-started.mdx | 131 ++++----- src/content/docs/unified-docs/interrupts.mdx | 115 ++++---- .../unified-docs/model-context-protocol.mdx | 115 ++++---- .../docs/unified-docs/multi-agent-systems.mdx | 115 ++++---- .../unified-docs/observability-monitoring.mdx | 99 +++---- .../observability/advanced-configuration.mdx | 163 +++++------ .../observability/authentication.mdx | 19 +- .../unified-docs/observability/overview.mdx | 35 +-- .../observability/troubleshooting.mdx | 147 +++++----- .../unified-docs/plugin-authoring/models.mdx | 99 +++---- .../plugin-authoring/overview.mdx | 67 ++--- .../docs/unified-docs/plugins/anthropic.mdx | 67 ++--- .../docs/unified-docs/plugins/deepseek.mdx | 131 ++++----- .../docs/unified-docs/plugins/google-ai.mdx | 131 ++++----- src/content/docs/unified-docs/plugins/mcp.mdx | 115 ++++---- .../docs/unified-docs/plugins/ollama.mdx | 83 +++--- .../docs/unified-docs/plugins/openai.mdx | 163 +++++------ .../docs/unified-docs/plugins/vertex-ai.mdx | 179 ++++++------ src/content/docs/unified-docs/plugins/xai.mdx | 131 ++++----- src/content/docs/unified-docs/rag.mdx | 163 +++++------ .../docs/unified-docs/tool-calling.mdx | 131 ++++----- .../vector-databases/astra-db.mdx | 67 ++--- .../vector-databases/chromadb.mdx | 83 +++--- .../vector-databases/cloud-firestore.mdx | 131 ++++----- .../vector-databases/cloud-sql-postgresql.mdx | 67 ++--- .../dev-local-vectorstore.mdx | 99 +++---- .../unified-docs/vector-databases/lancedb.mdx | 67 ++--- .../unified-docs/vector-databases/neo4j.mdx | 67 ++--- .../vector-databases/pgvector.mdx | 67 ++--- .../vector-databases/pinecone.mdx | 67 ++--- src/scripts/language-preference.js | 157 ----------- src/scripts/language-preference.ts | 149 ++++++++++ 54 files changed, 2988 insertions(+), 2766 deletions(-) create mode 100644 scripts/refactor-tabs.mjs create mode 100644 src/components/LangTabItem.astro create mode 100644 src/components/LangTabs.astro create mode 100644 src/components/LanguagePreferenceSelector.astro delete mode 100644 src/scripts/language-preference.js create mode 100644 src/scripts/language-preference.ts diff --git a/astro.config.mjs b/astro.config.mjs index 8b5ab49a..da26ab45 100644 --- a/astro.config.mjs +++ b/astro.config.mjs @@ -1,14 +1,14 @@ // @ts-check -import { defineConfig } from "astro/config"; -import starlight from "@astrojs/starlight"; -import starlightLinksValidatorPlugin from "starlight-links-validator"; -import starlightLlmsTxt from "starlight-llms-txt"; -import sitemap from "@astrojs/sitemap"; -import { sidebar } from "./src/sidebar"; -import { GOOGLE_DARK_THEME, GOOGLE_LIGHT_THEME } from "./src/google-theme"; +import { defineConfig } from 'astro/config'; +import starlight from '@astrojs/starlight'; +import starlightLinksValidatorPlugin from 'starlight-links-validator'; +import starlightLlmsTxt from 'starlight-llms-txt'; +import sitemap from '@astrojs/sitemap'; +import { sidebar } from './src/sidebar'; +import { GOOGLE_DARK_THEME, GOOGLE_LIGHT_THEME } from './src/google-theme'; -const site = "https://genkit.dev"; -const ogUrl = new URL("ogimage.png?v=1", site).href; +const site = 'https://genkit.dev'; +const ogUrl = new URL('ogimage.png?v=1', site).href; // https://astro.build/config export default defineConfig({ @@ -16,167 +16,156 @@ export default defineConfig({ site, markdown: { shikiConfig: { - langAlias: { dotprompt: "handlebars" }, + langAlias: { dotprompt: 'handlebars' }, }, }, integrations: [ starlight({ - favicon: "favicon.ico", + favicon: 'favicon.ico', expressiveCode: { themes: [GOOGLE_DARK_THEME, GOOGLE_LIGHT_THEME], }, pagination: false, - title: "Genkit", + title: 'Genkit', components: { - Sidebar: "./src/components/sidebar.astro", - Header: "./src/content/custom/header.astro", - Hero: "./src/content/custom/hero.astro", + Sidebar: './src/components/sidebar.astro', + Header: './src/content/custom/header.astro', + Hero: './src/content/custom/hero.astro', Head: './src/content/custom/head.astro', }, head: [ { - tag: "meta", + tag: 'meta', attrs: { - property: "og:image", + property: 'og:image', content: ogUrl, - width: "1085", - height: "377", + width: '1085', + height: '377', }, }, { - tag: "link", + tag: 'link', attrs: { - href: "https://fonts.gstatic.com", - rel: "preconnect", + href: 'https://fonts.gstatic.com', + rel: 'preconnect', crossorigin: true, }, }, { - tag: "link", + tag: 'link', attrs: { - href: "https://fonts.googleapis.com/css2?family=Google+Sans:wght@400;500&display=swap", - rel: "stylesheet", + href: 'https://fonts.googleapis.com/css2?family=Google+Sans:wght@400;500&display=swap', + rel: 'stylesheet', }, }, { - tag: "link", + tag: 'link', attrs: { - href: "https://fonts.googleapis.com/css2?family=Google+Sans+Text:wght@400;500&display=swap", - rel: "stylesheet", + href: 'https://fonts.googleapis.com/css2?family=Google+Sans+Text:wght@400;500&display=swap', + rel: 'stylesheet', }, }, { - tag: "link", + tag: 'link', attrs: { - href: "https://fonts.googleapis.com/css2?family=Google+Sans+Mono:wght@400;500&display=swap", - rel: "stylesheet", + href: 'https://fonts.googleapis.com/css2?family=Google+Sans+Mono:wght@400;500&display=swap', + rel: 'stylesheet', }, }, { - tag: "link", + tag: 'link', attrs: { - href: "https://fonts.googleapis.com/css2?family=Google+Symbols&display=block", - rel: "stylesheet", + href: 'https://fonts.googleapis.com/css2?family=Google+Symbols&display=block', + rel: 'stylesheet', }, }, ], plugins: [ starlightLinksValidatorPlugin(), starlightLlmsTxt({ - projectName: "Genkit", - description: "Open-source GenAI toolkit for JS, Go, and Python.", + projectName: 'Genkit', + description: 'Open-source GenAI toolkit for JS, Go, and Python.', minify: { whitespace: false }, customSets: [ { - label: "Building AI Workflows", - description: - "Guidance on how to generate content and interact with LLM and image models using Genkit.", + label: 'Building AI Workflows', + description: 'Guidance on how to generate content and interact with LLM and image models using Genkit.', paths: [ - "docs/models", - "docs/context", - "docs/flows", - "docs/dotprompt", - "docs/chat", - "docs/tool-calling", - "docs/interrupts", - "docs/rag", - "docs/multi-agent", - "docs/evaluation", - "docs/local-observability", - "docs/errors/types", + 'docs/models', + 'docs/context', + 'docs/flows', + 'docs/dotprompt', + 'docs/chat', + 'docs/tool-calling', + 'docs/interrupts', + 'docs/rag', + 'docs/multi-agent', + 'docs/evaluation', + 'docs/local-observability', + 'docs/errors/types', ], }, { - label: "Deploying AI Workflows", + label: 'Deploying AI Workflows', description: - "Guidance on how to deploy Genkit code to various environments including Firebase and Cloud Run or use within a Next.js app.", - paths: [ - "docs/firebase", - "docs/cloud-run", - "docs/deploy-node", - "docs/auth", - "docs/nextjs", - ], + 'Guidance on how to deploy Genkit code to various environments including Firebase and Cloud Run or use within a Next.js app.', + paths: ['docs/firebase', 'docs/cloud-run', 'docs/deploy-node', 'docs/auth', 'docs/nextjs'], }, { - label: "Observing AI Workflows", - description: - "Guidance about Genkit's various observability features and how to use them.", + label: 'Observing AI Workflows', + description: "Guidance about Genkit's various observability features and how to use them.", paths: [ - "docs/observability/getting-started", - "docs/observability/authentication", - "docs/observability/advanced-configuration", - "docs/observability/telemetry-collection", - "docs/observability/troubleshooting", + 'docs/observability/getting-started', + 'docs/observability/authentication', + 'docs/observability/advanced-configuration', + 'docs/observability/telemetry-collection', + 'docs/observability/troubleshooting', ], }, { - label: "Writing Plugins", - description: "Guidance about how to author plugins for Genkit.", - paths: [ - "docs/plugin-authoring", - "docs/plugin-authoring-evaluator", - ], + label: 'Writing Plugins', + description: 'Guidance about how to author plugins for Genkit.', + paths: ['docs/plugin-authoring', 'docs/plugin-authoring-evaluator'], }, { - label: "Plugin Documentation", + label: 'Plugin Documentation', description: - "Provider-specific documentation for the Google AI, Vertex AI, Firebase, Ollama, Chroma, and Pinecone plugins.", + 'Provider-specific documentation for the Google AI, Vertex AI, Firebase, Ollama, Chroma, and Pinecone plugins.', paths: [ - "docs/plugins/google-genai", - "docs/plugins/vertex-ai", - "docs/plugins/firebase", - "docs/plugins/ollama", - "docs/plugins/chroma", - "docs/plugins/pinecone", + 'docs/plugins/google-genai', + 'docs/plugins/vertex-ai', + 'docs/plugins/firebase', + 'docs/plugins/ollama', + 'docs/plugins/chroma', + 'docs/plugins/pinecone', ], }, ], }), ], logo: { - dark: "./src/assets/lockup_white_tight2.png", - light: "./src/assets/lockup_dark_tight.png", + dark: './src/assets/lockup_white_tight2.png', + light: './src/assets/lockup_dark_tight.png', replacesTitle: true, }, social: [ { - icon: "github", - label: "GitHub", - href: "https://github.com/firebase/genkit", + icon: 'github', + label: 'GitHub', + href: 'https://github.com/firebase/genkit', }, { - icon: "discord", - label: "Discord", - href: "https://discord.gg/qXt5zzQKpc", + icon: 'discord', + label: 'Discord', + href: 'https://discord.gg/qXt5zzQKpc', }, ], sidebar, - customCss: ["./src/tailwind.css"], + customCss: ['./src/tailwind.css'], }), sitemap(), ], redirects: { - "/discord": 'https://discord.gg/qXt5zzQKpc', + '/discord': 'https://discord.gg/qXt5zzQKpc', }, }); diff --git a/scripts/refactor-tabs.mjs b/scripts/refactor-tabs.mjs new file mode 100644 index 00000000..5b96576c --- /dev/null +++ b/scripts/refactor-tabs.mjs @@ -0,0 +1,57 @@ +import fs from 'fs/promises'; +import path from 'path'; + +const directoryPath = 'src/content/docs/unified-docs'; + +async function processFile(filePath) { + try { + let content = await fs.readFile(filePath, 'utf-8'); + let changed = false; + + if (content.includes('')) { + changed = true; + + content = content.replace(//g, ''); + content = content.replace(/<\/Tabs>/g, ''); + + content = content.replace(/]*>/g, ''); + content = content.replace(/]*>/g, ''); + content = content.replace(/]*>/g, ''); + content = content.replace(/<\/TabItem>/g, ''); + + const importRegex = /import { Tabs, TabItem } from '@astrojs\/starlight\/components';/g; + const newImport = + "import LangTabs from '../../../components/LangTabs.astro';\nimport LangTabItem from '../../../components/LangTabItem.astro';"; + + if (importRegex.test(content)) { + content = content.replace(importRegex, newImport); + } + } + + if (changed) { + await fs.writeFile(filePath, content, 'utf-8'); + console.log(`Updated: ${filePath}`); + } + } catch (error) { + console.error(`Error processing file ${filePath}:`, error); + } +} + +async function walk(dir) { + const files = await fs.readdir(dir, { withFileTypes: true }); + for (const file of files) { + const res = path.resolve(dir, file.name); + if (file.isDirectory()) { + await walk(res); + } else { + if (res.endsWith('.mdx')) { + await processFile(res); + } + } + } +} + +console.log('Starting tab replacement script...'); +walk(directoryPath).then(() => { + console.log('Tab replacement script finished.'); +}); diff --git a/src/components/LangTabItem.astro b/src/components/LangTabItem.astro new file mode 100644 index 00000000..cb4949a5 --- /dev/null +++ b/src/components/LangTabItem.astro @@ -0,0 +1,11 @@ +--- +// src/components/LangTabItem.astro +interface Props { + lang: 'js' | 'go' | 'python'; +} + +const { lang } = Astro.props; +--- +
+ +
diff --git a/src/components/LangTabs.astro b/src/components/LangTabs.astro new file mode 100644 index 00000000..4db708c8 --- /dev/null +++ b/src/components/LangTabs.astro @@ -0,0 +1,38 @@ +--- +// src/components/LangTabs.astro +--- +
+ +
+ + diff --git a/src/components/LanguagePreferenceSelector.astro b/src/components/LanguagePreferenceSelector.astro new file mode 100644 index 00000000..83344794 --- /dev/null +++ b/src/components/LanguagePreferenceSelector.astro @@ -0,0 +1,90 @@ +--- +import { Icon } from '@astrojs/starlight/components'; +--- +
+
Language:
+
+ + +
+
+ + diff --git a/src/content/custom/head.astro b/src/content/custom/head.astro index a39be5fd..e00eb0ba 100644 --- a/src/content/custom/head.astro +++ b/src/content/custom/head.astro @@ -19,5 +19,4 @@ if (id.startsWith('docs/')) { {finalTitle} {head.filter(({ tag }) => tag !== 'title').map(({ tag: Tag, attrs, content }) => )} - - + ``` -
- +
+ ```go // App Check verification middleware func appCheckMiddleware() func(http.Handler) http.Handler { @@ -505,8 +506,8 @@ Firebase Cloud Functions provides a serverless platform for deploying Genkit flo firebaseAuthMiddleware(authClient)( genkit.Handler(secureFlow)))) ``` -
- +
+ ```python from firebase_admin import app_check @@ -543,13 +544,13 @@ Firebase Cloud Functions provides a serverless platform for deploying Genkit flo 'app_check_verified': True } ``` -
-
+ + ## Secrets Management - - + + **Set up secrets**: ```bash # Store API key in Firebase Functions secrets @@ -571,8 +572,8 @@ Firebase Cloud Functions provides a serverless platform for deploying Genkit flo myFlow ); ``` - - + + **Use Google Secret Manager**: ```go import ( @@ -613,8 +614,8 @@ Firebase Cloud Functions provides a serverless platform for deploying Genkit flo ) } ``` - - + + **Use Google Secret Manager**: ```python from google.cloud import secretmanager @@ -635,13 +636,13 @@ Firebase Cloud Functions provides a serverless platform for deploying Genkit flo model=google_genai_name('gemini-2.5-flash'), ) ``` - - + + ## Deployment - - + + **Deploy to Firebase Functions**: ```bash # Deploy all functions @@ -666,8 +667,8 @@ Firebase Cloud Functions provides a serverless platform for deploying Genkit flo -H "Content-Type: application/json" \ -d '{"data": {"subject": "mountains"}}' ``` - - + + **Deploy to Cloud Run with Firebase integration**: ```bash # Build and deploy @@ -684,8 +685,8 @@ Firebase Cloud Functions provides a serverless platform for deploying Genkit flo -H "Content-Type: application/json" \ -d '{"subject": "mountains"}' ``` - - + + **Deploy to Cloud Run with Firebase integration**: ```bash # Create requirements.txt @@ -709,15 +710,15 @@ Firebase Cloud Functions provides a serverless platform for deploying Genkit flo -H "Content-Type: application/json" \ -d '{"data": "mountains"}' ``` - - + + ## Client Integration ### Web Client - - + + ```html @@ -783,8 +784,8 @@ Firebase Cloud Functions provides a serverless platform for deploying Genkit flo ``` - - + + ```javascript // Client-side JavaScript for Go backend import { initializeApp } from 'firebase/app'; @@ -817,8 +818,8 @@ Firebase Cloud Functions provides a serverless platform for deploying Genkit flo return response.json(); } ``` - - + + ```javascript // Client-side JavaScript for Python backend import { initializeApp } from 'firebase/app'; @@ -851,13 +852,13 @@ Firebase Cloud Functions provides a serverless platform for deploying Genkit flo return response.json(); } ``` - - + + ## Local Development - - + + **Firebase Emulator Suite**: ```bash # Start emulators with Genkit @@ -874,8 +875,8 @@ Firebase Cloud Functions provides a serverless platform for deploying Genkit flo export GEMINI_API_KEY=your-api-key export GOOGLE_CLOUD_PROJECT=your-project-id ``` - - + + **Local development**: ```bash # Set up environment @@ -886,8 +887,8 @@ Firebase Cloud Functions provides a serverless platform for deploying Genkit flo # Start with Genkit dev UI genkit start -- go run . ``` - - + + **Local development**: ```bash # Set up environment @@ -898,8 +899,8 @@ Firebase Cloud Functions provides a serverless platform for deploying Genkit flo # Start with Genkit dev UI genkit start -- python main.py ``` - - + + ## Best Practices diff --git a/src/content/docs/unified-docs/developer-tools.mdx b/src/content/docs/unified-docs/developer-tools.mdx index 577bdc16..b4530d14 100644 --- a/src/content/docs/unified-docs/developer-tools.mdx +++ b/src/content/docs/unified-docs/developer-tools.mdx @@ -3,7 +3,8 @@ title: Developer Tools description: Explore Genkit's developer tools, including the CLI for command-line operations and the local web-based Developer UI for interactive testing and development across JavaScript, Go, and Python. --- -import { Tabs, TabItem } from '@astrojs/starlight/components'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; Genkit provides powerful developer tools that work consistently across JavaScript, Go, and Python: @@ -27,8 +28,8 @@ npm install -g genkit-cli The core pattern is the same across all languages - provide an entrypoint command to run your application: - - + + ```bash # Start the developer UI with your application genkit start -- @@ -41,8 +42,8 @@ The core pattern is the same across all languages - provide an entrypoint comman # Auto-open in browser genkit start -o -- npm run dev ``` - - + + ```bash # Start the developer UI with your Go application genkit start -- go run . @@ -51,8 +52,8 @@ The core pattern is the same across all languages - provide an entrypoint comman # Auto-open in browser genkit start -o -- go run . ``` - - + + ```bash # Start the developer UI with your Python application genkit start -- python app.py @@ -62,8 +63,8 @@ The core pattern is the same across all languages - provide an entrypoint comman # Auto-open in browser genkit start -o -- python app.py ``` - - + + After running the command, you'll see output like: @@ -91,8 +92,8 @@ genkit flow:batchRun ### Evaluation Commands - - + + ```bash # Evaluate a specific flow genkit eval:flow @@ -106,8 +107,8 @@ genkit flow:batchRun # Run evaluation on extracted data genkit eval:run results.json ``` - - + + ```bash # Evaluate a specific flow genkit eval:flow @@ -121,11 +122,11 @@ genkit flow:batchRun # Run evaluation on extracted data genkit eval:run results.json ``` - - + + Evaluation commands are not yet available for Python. Use external evaluation tools or the Developer UI for testing. - - + + ### Configuration Commands @@ -197,8 +198,8 @@ This works the same way across all languages. ### OpenTelemetry Integration - - + + Genkit is fully instrumented with [OpenTelemetry](https://opentelemetry.io/) and provides hooks to export telemetry data to various monitoring systems. Configure telemetry export in your application: @@ -215,8 +216,8 @@ This works the same way across all languages. }, }); ``` - - + + Genkit is fully instrumented with [OpenTelemetry](https://opentelemetry.io/) and provides hooks to export telemetry data. The [Google Cloud plugin](/go/docs/plugins/google-cloud) exports telemetry to Cloud's operations suite. @@ -235,11 +236,11 @@ This works the same way across all languages. // ... } ``` - - + + OpenTelemetry integration is available but may have limited features compared to JavaScript and Go implementations. - - + + ## Analytics and Privacy @@ -271,25 +272,25 @@ genkit config get analyticsOptOut ### Debugging Tips - - + + - Use `console.log()` statements in your flows - they'll appear in the Developer UI - Check the trace inspector for detailed execution information - Use the model runner to isolate model-specific issues - Test prompts independently before integrating them into flows - - + + - Use `log.Printf()` for debugging output - Check traces in the Developer UI for execution details - Test individual components before integrating them - Use the monitoring features to track performance - - + + - Use `print()` statements for debugging - Test components individually in the Developer UI - Monitor application logs for errors and performance issues - - + + ### Common Patterns diff --git a/src/content/docs/unified-docs/dotprompt.mdx b/src/content/docs/unified-docs/dotprompt.mdx index 036fd15e..6191d2b9 100644 --- a/src/content/docs/unified-docs/dotprompt.mdx +++ b/src/content/docs/unified-docs/dotprompt.mdx @@ -3,7 +3,8 @@ title: Managing prompts with Dotprompt description: Learn how to use Dotprompt to manage prompts, models, and parameters for generative AI models across JavaScript and Go, with a streamlined approach to prompt engineering and iteration. --- -import { Tabs, TabItem } from '@astrojs/starlight/components'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; Prompt engineering is the primary way that you, as an app developer, influence the output of generative AI models. For example, when using LLMs, you can craft @@ -65,17 +66,17 @@ Before reading this page, you should be familiar with the content covered on the If you want to run the code examples on this page, first complete the steps in the Getting started guide for your language: - - + + Complete the [Get started](/docs/get-started) guide. All examples assume you have already installed Genkit as a dependency in your project. - - + + Complete the [Get started](/go/docs/get-started-go) guide. All examples assume you have already installed Genkit as a dependency in your project. - - + + Dotprompt is not currently available for Python. You can define prompts directly in your Python code using the standard prompt patterns shown in the [Generating content](/unified-docs/generating-content) guide. - - + + ## Creating prompt files @@ -91,8 +92,8 @@ project root and automatically loads any prompts it finds there. By default, this directory is named `prompts`. For example, using the default directory name, your project structure might look something like this: - - + + ``` your-project/ ├── lib/ @@ -104,8 +105,8 @@ name, your project structure might look something like this: ├── package.json └── tsconfig.json ``` - - + + ``` your-project/ ├── prompts/ @@ -114,33 +115,33 @@ name, your project structure might look something like this: ├── go.mod └── go.sum ``` - - + + Dotprompt is not currently available for Python. Use standard prompt definition patterns in your Python code. - - + + If you want to use a different directory, you can specify it when you configure Genkit: - - + + ```ts const ai = genkit({ promptDir: './llm_prompts', // (Other settings...) }); ``` - - + + ```go g, err := genkit.Init(ctx.Background(), genkit.WithPromptDir("./llm_prompts")) ``` - - + + Not applicable - Dotprompt is not available for Python. - - + + ### Creating a prompt file @@ -175,8 +176,8 @@ You can also create a prompt file using the model runner in the developer UI. Start with application code that imports the Genkit library and configures it to use the model plugin you're interested in: - - + + ```ts import { genkit } from 'genkit'; @@ -200,8 +201,8 @@ use the model plugin you're interested in: ```bash genkit start -- tsx --watch src/your-code.ts ``` - - + + ```go package main @@ -230,11 +231,11 @@ use the model plugin you're interested in: ```bash genkit start -- go run . ``` - - + + Dotprompt is not currently available for Python. Use the standard Genkit Python patterns for prompt definition. - - + + In the Models section, choose the model you want to use from the list of models provided by the plugin. @@ -255,8 +256,8 @@ specify it when you configure Genkit. ### Run prompts from code - - + + To use a prompt, first load it using the `prompt('file_name')` method: ```ts @@ -314,8 +315,8 @@ specify it when you configure Genkit. See [Generate content with AI models](/unified-docs/generating-content) for descriptions of the available options. - - + + To use a prompt, first load it using the `genkit.LookupPrompt()` function: ```go @@ -339,8 +340,8 @@ specify it when you configure Genkit. See [Generate content with AI models](/unified-docs/generating-content) for descriptions of the available options. - - + + Dotprompt is not currently available for Python. Use the standard prompt patterns: ```python @@ -350,8 +351,8 @@ specify it when you configure Genkit. model="googleai/gemini-2.5-flash" ) ``` - - + + ### Using the developer UI @@ -359,25 +360,25 @@ As you're refining your app's prompts, you can run them in the Genkit developer UI to quickly iterate on prompts and model configurations, independently from your application code. - - + + Load the developer UI from your project directory: ```bash genkit start -- tsx --watch src/your-code.ts ``` - - + + Load the developer UI from your project directory: ```bash genkit start -- go run . ``` - - + + Dotprompt is not available for Python, but you can still use the developer UI to test prompts defined in your code. - - + + Once you've loaded prompts into the developer UI, you can run them with different input values, and experiment with how changes to the prompt wording or @@ -406,8 +407,8 @@ config: These values map directly to the configuration parameters: - - + + ```ts const response3 = await helloPrompt( {}, @@ -422,8 +423,8 @@ These values map directly to the configuration parameters: }, ); ``` - - + + ```go resp, err := helloPrompt.Execute(context.Background(), ai.WithConfig(&googlegenai.GeminiConfig{ @@ -434,11 +435,11 @@ These values map directly to the configuration parameters: StopSequences: []string{"", ""}, })) ``` - - + + Not applicable - use standard configuration patterns in Python code. - - + + See [Generate content with AI models](/unified-docs/generating-content) for descriptions of the available options. @@ -470,8 +471,8 @@ These schemas are used in much the same way as those passed to a `generate()` request or a flow definition. For example, the prompt defined above produces structured output: - - + + ```ts const menuPrompt = ai.prompt('menu'); const { output } = await menuPrompt({ theme: 'medieval' }); @@ -479,8 +480,8 @@ structured output: const dishName = output['dishname']; const description = output['description']; ``` - - + + ```go menuPrompt := genkit.LookupPrompt(g, "menu") if menuPrompt == nil { @@ -502,11 +503,11 @@ structured output: log.Println(output["dishname"]) log.Println(output["description"]) ``` - - + + Not applicable - use standard schema patterns in Python code. - - + + You have several options for defining schemas in a `.prompt` file: Dotprompt's own schema definition format, Picoschema; standard JSON Schema; or, as @@ -541,8 +542,8 @@ schema: The above schema is equivalent to the following type definitions: - - + + ```ts interface Article { title: string; @@ -571,8 +572,8 @@ The above schema is equivalent to the following type definitions: [key: string]: any; } ``` - - + + ```go type Article struct { Title string `json:"title"` @@ -592,11 +593,11 @@ The above schema is equivalent to the following type definitions: Extra any `json:"extra"` // Arbitrary extra data } ``` - - + + Not applicable - use standard schema patterns in Python code. - - + + Picoschema supports scalar types `string`, `integer`, `number`, `boolean`, and `any`. Objects, arrays, and enums are denoted by a parenthetical after the field @@ -628,8 +629,8 @@ output: ### Schema references defined in code - - + + In addition to directly defining schemas in the `.prompt` file, you can reference a schema registered with `defineSchema()` by name. If you're using TypeScript, this approach will let you take advantage of the language's static @@ -677,14 +678,14 @@ output: const dishName = output?.dishname; const description = output?.description; ``` - - + + Schema references in Go are not yet implemented. Use Picoschema or JSON Schema directly in your `.prompt` files. - - + + Not applicable - use standard schema patterns in Python code. - - + + ## Prompt templates @@ -720,14 +721,14 @@ In this example, the Handlebars expression, `{{theme}}`, resolves to the value of the input's `theme` property when you run the prompt. To pass input to the prompt: - - + + ```ts const menuPrompt = ai.prompt('menu'); const { output } = await menuPrompt({ theme: 'medieval' }); ``` - - + + ```go menuPrompt := genkit.LookupPrompt(g, "menu") @@ -735,11 +736,11 @@ prompt. To pass input to the prompt: ai.WithInput(map[string]any{"theme": "medieval"}), ) ``` - - + + Not applicable - use standard templating patterns in Python code. - - + + Note that because the input schema declared the `theme` property to be optional and provided a default, you could have omitted the property, @@ -814,16 +815,16 @@ Describe this image in a detailed paragraph: The URL can be `https:` or base64-encoded `data:` URIs for "inline" image usage. In code, this would be: - - + + ```ts const multimodalPrompt = ai.prompt('multimodal'); const { text } = await multimodalPrompt({ photoUrl: 'https://example.com/photo.jpg', }); ``` - - + + ```go multimodalPrompt := genkit.LookupPrompt(g, "multimodal") @@ -831,11 +832,11 @@ In code, this would be: ai.WithInput(map[string]any{"photoUrl": "https://example.com/photo.jpg"}), ) ``` - - + + Not applicable - use standard multimodal patterns in Python code. - - + + See also [Multimodal input](/unified-docs/generating-content#multimodal-input), on the Generating content page, for an example of constructing a `data:` URL. @@ -909,33 +910,33 @@ Help the user decide between these vacation destinations: You can also define partials in code: - - + + ```ts ai.definePartial('personality', 'Talk like a {{#if style}}{{style}}{{else}}helpful assistant{{/if}}.'); ``` Code-defined partials are available in all prompts. - - + + ```go genkit.DefinePartial(g, "personality", "Talk like a {{#if style}}{{style}}{{else}}helpful assistant{{/if}}.") ``` Code-defined partials are available in all prompts. - - + + Not applicable - use standard templating patterns in Python code. - - + + ### Defining Custom Helpers You can define custom helpers to process and manage data inside of a prompt. Helpers are registered globally: - - + + ```ts ai.defineHelper('shout', (text: string) => text.toUpperCase()); ``` @@ -952,8 +953,8 @@ Helpers are registered globally: HELLO, {{shout name}}!!! ``` - - + + ```go genkit.DefineHelper(g, "shout", func(input string) string { return strings.ToUpper(input) @@ -972,11 +973,11 @@ Helpers are registered globally: HELLO, {{shout name}}!!! ``` - - + + Not applicable - use standard templating patterns in Python code. - - + + ## Prompt variants @@ -995,25 +996,25 @@ Pro would perform better, you might create two files: To use a prompt variant: - - + + Specify the variant option when loading: ```ts const myPrompt = ai.prompt('my_prompt', { variant: 'gemini25pro' }); ``` - - + + Specify the variant in the prompt name when loading: ```go myPrompt := genkit.LookupPrompt(g, "my_prompt.gemini25pro") ``` - - + + Not applicable - use standard prompt patterns in Python code. - - + + The name of the variant is included in the metadata of generation traces, so you can compare and contrast actual performance between variants in the Genkit trace @@ -1029,8 +1030,8 @@ its authors consider it to be the best developer experience overall. However, if you have use cases that are not well supported by this setup, you can also define prompts in code: - - + + Use the `definePrompt()` function. The first parameter is analogous to the front matter block of a `.prompt` file; the second parameter can either be a Handlebars template string, as in a prompt file, or a function that returns a `GenerateRequest`: @@ -1067,8 +1068,8 @@ you can also define prompts in code: }, }); ``` - - + + Use the `genkit.DefinePrompt()` function: ```go @@ -1123,8 +1124,8 @@ you can also define prompts in code: Note that all prompt options carry over to `GenerateActionOptions` with the exception of `WithMiddleware()`, which must be passed separately if using `Prompt.Render()` instead of `Prompt.Execute()`. - - + + Define prompts directly in your Python code using standard patterns: ```python @@ -1138,8 +1139,8 @@ you can also define prompts in code: model="googleai/gemini-2.5-flash" ) ``` - - + + ## Next steps diff --git a/src/content/docs/unified-docs/error-handling.mdx b/src/content/docs/unified-docs/error-handling.mdx index e84c4cf9..a9fde76d 100644 --- a/src/content/docs/unified-docs/error-handling.mdx +++ b/src/content/docs/unified-docs/error-handling.mdx @@ -3,43 +3,44 @@ title: Error handling description: Learn about error handling in Genkit, including specialized error types, best practices, and debugging techniques across JavaScript, Go, and Python. --- -import { Tabs, TabItem } from '@astrojs/starlight/components'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; Proper error handling is crucial for building robust AI applications. Genkit provides different error handling mechanisms and best practices across languages to help you build reliable and secure applications. ## Availability and Approach - - + + JavaScript provides specialized error types and comprehensive error handling: - `GenkitError` for internal framework errors - `UserFacingError` for application-level errors - Automatic error sanitization in web hosting plugins - Built-in error tracing and debugging - Security-focused error handling - - + + Go uses standard Go error handling patterns: - Standard `error` interface for all errors - Custom error types for specific scenarios - Error wrapping and unwrapping - Context-aware error handling - Structured error information - - + + Python uses standard exception handling: - Built-in exception types - Custom exception classes - Try-catch error handling - Exception chaining and context - Framework-specific error handling - - + + ## Error types and classification - - + + Genkit knows about two specialized types: `GenkitError` and `UserFacingError`. The separation between these two error types helps you better understand where your error is coming from. ### GenkitError @@ -103,8 +104,8 @@ Proper error handling is crucial for building robust AI applications. Genkit pro } }); ``` - - + + Go uses the standard error interface for all error handling. You can create custom error types for specific scenarios: ```go @@ -191,8 +192,8 @@ Proper error handling is crucial for building robust AI applications. Genkit pro log.Printf("Unknown error: %v", err) } ``` - - + + Python uses standard exception handling with custom exception classes: ```python @@ -242,13 +243,13 @@ Proper error handling is crucial for building robust AI applications. Genkit pro except Exception as e: print(f"Unexpected error: {e}") ``` - - + + ## Error handling in flows - - + + Implement comprehensive error handling in your flows: ```typescript @@ -305,8 +306,8 @@ Proper error handling is crucial for building robust AI applications. Genkit pro }); }); ``` - - + + Implement robust error handling with retries and proper error classification: ```go @@ -361,8 +362,8 @@ Proper error handling is crucial for building robust AI applications. Genkit pro return "", fmt.Errorf("operation failed after %d attempts, last error: %w", maxRetries, lastErr) } ``` - - + + Implement comprehensive error handling with retries: ```python @@ -408,13 +409,13 @@ Proper error handling is crucial for building robust AI applications. Genkit pro details={'last_error': str(last_error)} ) ``` - - + + ## Error handling in tools - - + + Tools should handle errors gracefully and provide meaningful feedback: ```typescript @@ -463,8 +464,8 @@ Proper error handling is crucial for building robust AI applications. Genkit pro } }); ``` - - + + Implement proper error handling in tools: ```go @@ -510,8 +511,8 @@ Proper error handling is crucial for building robust AI applications. Genkit pro return strings.Contains(err.Error(), "timeout") } ``` - - + + Implement comprehensive error handling in tools: ```python @@ -555,88 +556,88 @@ Proper error handling is crucial for building robust AI applications. Genkit pro message='Database query failed' ) ``` - - + + ## Best practices ### Error classification - - + + - Use `UserFacingError` for errors that can be safely shown to users - Use `GenkitError` for internal framework errors - Let other errors be automatically sanitized by web hosting plugins - Provide meaningful error messages and status codes - Include relevant context in error details - - + + - Create custom error types for different error categories - Use error wrapping to preserve error context - Implement error classification functions - Provide structured error information - Use appropriate logging levels for different error types - - + + - Create custom exception classes for different error types - Use exception chaining to preserve error context - Implement proper exception handling hierarchies - Provide meaningful error messages - Use structured logging for error information - - + + ### Security considerations - - + + - Never expose internal system details in user-facing errors - Use error sanitization in production environments - Log detailed error information for debugging - Implement rate limiting for error-prone operations - Validate all inputs to prevent injection attacks - - + + - Sanitize error messages before returning to clients - Use structured logging to separate internal and external error details - Implement proper input validation - Use context timeouts to prevent resource exhaustion - Implement circuit breakers for external dependencies - - + + - Sanitize exception messages in production - Use different exception types for internal vs external errors - Implement proper input validation and sanitization - Use async timeouts for long-running operations - Implement retry logic with exponential backoff - - + + ### Debugging and monitoring - - + + - Use the Developer UI to inspect error traces - Implement comprehensive logging with error context - Set up error monitoring and alerting - Use error tracking services for production - Include correlation IDs for distributed tracing - - + + - Use structured logging for better error analysis - Implement error metrics and monitoring - Use distributed tracing for complex workflows - Set up alerting for error rate thresholds - Include request IDs for error correlation - - + + - Use structured logging for error analysis - Implement error tracking and monitoring - Use APM tools for error insights - Set up alerting for error patterns - Include trace IDs for error correlation - - + + ## Next steps diff --git a/src/content/docs/unified-docs/evaluation.mdx b/src/content/docs/unified-docs/evaluation.mdx index d620cd83..6962b760 100644 --- a/src/content/docs/unified-docs/evaluation.mdx +++ b/src/content/docs/unified-docs/evaluation.mdx @@ -3,8 +3,9 @@ title: Evaluation description: Learn about Genkit's evaluation capabilities across JavaScript and Go, including inference-based and raw evaluation, dataset creation, and how to use the Developer UI and CLI for testing and analysis. --- -import { Tabs, TabItem } from '@astrojs/starlight/components'; -import ThemeImage from '../../../components/ThemeImage.astro'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; +import ThemeImage from '@/components/ThemeImage.astro'; Evaluation is a form of testing that helps you validate your LLM's responses and ensure they meet your quality bar. @@ -46,22 +47,22 @@ This section explains how to perform inference-based evaluation using Genkit. 1. Use an existing Genkit app or create a new one by following the Getting started guide for your language: - - + + Follow the [Get started](/docs/get-started) guide. - - + + Follow the [Get started](/go/docs/get-started-go) guide. - - + + Evaluation features are not yet available for Python. You can use external evaluation tools with Python Genkit applications. - - + + 2. Add the following code to define a simple RAG application to evaluate. For this guide, we use a dummy retriever that always returns the same documents. - - + + ```js import { genkit, z, Document } from 'genkit'; import { googleAI } from '@genkit-ai/googleai'; @@ -103,8 +104,8 @@ This section explains how to perform inference-based evaluation using Genkit. }, ); ``` - - + + ```go package main @@ -168,16 +169,16 @@ This section explains how to perform inference-based evaluation using Genkit. }) } ``` - - + + Evaluation features are not yet available for Python. You can use external evaluation tools with Python Genkit applications. - - + + 3. (Optional) Add evaluation metrics to your application to use while evaluating: - - + + This guide uses the `MALICIOUSNESS` metric from the `genkitEval` plugin. ```js @@ -201,8 +202,8 @@ This section explains how to perform inference-based evaluation using Genkit. ```bash npm install @genkit-ai/evaluator ``` - - + + This guide uses the `EvaluatorRegex` metric from the `evaluators` package. ```go @@ -235,29 +236,29 @@ This section explains how to perform inference-based evaluation using Genkit. ```bash go get github.com/firebase/genkit/go/plugins/evaluators ``` - - + + Not applicable - evaluation features are not yet available for Python. - - + + 4. Start your Genkit application: - - + + ```bash genkit start -- ``` - - + + ```bash genkit start -- go run main.go ``` - - + + Not applicable - evaluation features are not yet available for Python. - - + + ### Create a dataset @@ -281,8 +282,8 @@ Create a dataset to define the examples we want to use for evaluating our flow. b. Only the `input` field is required. Add the input data: - - + + Enter `{"query": "Who is man's best friend?"}` in the `input` field, and click **Save** to add the example to your dataset. Repeat this process to add more examples: @@ -291,8 +292,8 @@ Create a dataset to define the examples we want to use for evaluating our flow. {"query": "Can I give milk to my cats?"} {"query": "From which animals did dogs evolve?"} ``` - - + + Enter `"Who is man's best friend?"` in the `Input` field, and click **Save** to add the example to your dataset. If you have configured the `EvaluatorRegex` metric and would like to try it out, you need to specify a Reference string that contains the pattern to match the output against. For the preceding input, set the `Reference output` text to `"(?i)dog"`, which is a case-insensitive regular-expression pattern to match the word "dog" in the flow output. @@ -310,11 +311,11 @@ Create a dataset to define the examples we want to use for evaluating our flow. "(?i)don't know" "(?i)wolf|wolves" ``` - - + + Not applicable - evaluation features are not yet available for Python. - - + + By the end of this step, your dataset should have 3 examples in it, with the values mentioned above. @@ -380,43 +381,43 @@ running an evaluation. Genkit includes built-in evaluators to help you get started: - - + + Genkit includes a small number of native evaluators, inspired by [RAGAS](https://docs.ragas.io/en/stable/): - **Faithfulness** -- Measures the factual consistency of the generated answer against the given context - **Answer Relevancy** -- Assesses how pertinent the generated answer is to the given prompt - **Maliciousness** -- Measures whether the generated output intends to deceive, harm, or exploit - - + + Genkit includes a small number of built-in evaluators, ported from the [JS evaluators plugin](https://js.api.genkit.dev/enums/_genkit-ai_evaluator.GenkitMetric.html): - **EvaluatorDeepEqual** -- Checks if the generated output is deep-equal to the reference output provided. - **EvaluatorRegex** -- Checks if the generated output matches the regular expression provided in the reference field. - **EvaluatorJsonata** -- Checks if the generated output matches the [JSONATA](https://jsonata.org/) expression provided in the reference field. - - + + Evaluation features are not yet available for Python. - - + + ### Evaluator plugins Genkit supports additional evaluators through plugins: - - + + - [Vertex Rapid Evaluators](/docs/plugins/vertex-ai#evaluators) via the VertexAI Plugin - Custom evaluators through the plugin system - - + + - Custom evaluators through the plugin system - Third-party evaluation tools through plugins - - + + Not applicable - evaluation features are not yet available for Python. - - + + ## Advanced use @@ -519,27 +520,27 @@ genkit eval:flow qaFlow --input testInputs.json Note: Make sure that you start your genkit app before running these CLI commands. - - + + ```bash genkit start -- ``` - - + + ```bash genkit start -- go run main.go ``` - - + + Not applicable - evaluation features are not yet available for Python. - - + + Here, `testInputs.json` should be an array of objects containing an `input` field and an optional `reference` field: - - + + ```json [ { @@ -551,8 +552,8 @@ field and an optional `reference` field: } ] ``` - - + + ```json [ { @@ -564,11 +565,11 @@ field and an optional `reference` field: } ] ``` - - + + Not applicable - evaluation features are not yet available for Python. - - + + If your flow requires auth, you may specify it using the `--context` argument: @@ -580,21 +581,21 @@ By default, the `eval:flow` and `eval:run` commands use all available metrics for evaluation. To run on a subset of the configured evaluators, use the `--evaluators` flag and provide a comma-separated list of evaluators by name: - - + + ```bash genkit eval:flow qaFlow --input testInputs.json --evaluators=genkitEval/maliciousness,genkitEval/answer_relevancy ``` - - + + ```bash genkit eval:flow qaFlow --input testInputs.json --evaluators=genkitEval/regex,genkitEval/jsonata ``` - - + + Not applicable - evaluation features are not yet available for Python. - - + + You can view the results of your evaluation run in the Dev UI at `localhost:4000/evaluate`. @@ -613,39 +614,39 @@ inference. Run your flow over your test inputs: - - + + ```bash genkit flow:batchRun qaFlow testInputs.json --label firstRunSimple ``` - - + + ```bash genkit flow:batchRun qaFlow testInputs.json ``` - - + + Not applicable - evaluation features are not yet available for Python. - - + + Extract the evaluation data: - - + + ```bash genkit eval:extractData qaFlow --label firstRunSimple --output factsEvalDataset.json ``` - - + + ```bash genkit eval:extractData qaFlow --maxRows 2 --output factsEvalDataset.json ``` - - + + Not applicable - evaluation features are not yet available for Python. - - + + The exported data has a format different from the dataset format presented earlier. This is because this data is intended to be used with evaluation @@ -676,8 +677,8 @@ UI, located at `localhost:4000/evaluate`. ### Batching evaluations - - + + :::note This feature is only available in the Node.js SDK. ::: @@ -697,19 +698,19 @@ UI, located at `localhost:4000/evaluate`. ``` Batching is also available in the Dev UI for Genkit (JS) applications. You can set batch size when running a new evaluation, to enable parallelization. - - + + Batching features are not yet available for Go. Evaluations run sequentially. - - + + Not applicable - evaluation features are not yet available for Python. - - + + ### Custom extractors - - + + Genkit provides reasonable default logic for extracting the necessary fields (`input`, `output` and `context`) while doing an evaluation. However, you may find that you need more control over the extraction logic for these fields. @@ -811,19 +812,19 @@ UI, located at `localhost:4000/evaluate`. to the extractor. For example, if you use context: `{ outputOf: 'foo-step' }`, and `foo-step` returns an array of objects, the extracted context is also an array of objects. - - + + Custom extractors are not yet available for Go. Use the default extraction logic provided by Genkit. - - + + Not applicable - evaluation features are not yet available for Python. - - + + ### Synthesizing test data using an LLM - - + + Here is an example flow that uses a PDF file to generate potential user questions. @@ -892,14 +893,14 @@ UI, located at `localhost:4000/evaluate`. ```bash genkit flow:run synthesizeQuestions '{"filePath": "my_input.pdf"}' --output synthesizedQuestions.json ``` - - + + Test data synthesis features are not yet available for Go. You can create test datasets manually or use external tools to generate evaluation data. - - + + Not applicable - evaluation features are not yet available for Python. - - + + ## Next steps diff --git a/src/content/docs/unified-docs/frameworks/express.mdx b/src/content/docs/unified-docs/frameworks/express.mdx index d099b881..e01b43bd 100644 --- a/src/content/docs/unified-docs/frameworks/express.mdx +++ b/src/content/docs/unified-docs/frameworks/express.mdx @@ -3,7 +3,8 @@ title: Express.js Integration description: Learn how to integrate Genkit with Express.js applications across JavaScript, Go, and Python, including REST API endpoints, authentication, and deployment strategies. --- -import { Tabs, TabItem } from '@astrojs/starlight/components'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; import { Badge } from '@astrojs/starlight/components'; @@ -18,8 +19,8 @@ Express.js integration is primarily available for JavaScript/Node.js. For other ## Installation and Setup - - + + Install the Express plugin: ```bash @@ -67,8 +68,8 @@ Express.js integration is primarily available for JavaScript/Node.js. For other console.log('Express server listening on port 8080'); }); ``` - - + + For Go applications, use Gin or standard HTTP handlers. Here's an equivalent setup: ```go @@ -136,8 +137,8 @@ Express.js integration is primarily available for JavaScript/Node.js. For other r.Run(":8080") } ``` - - + + For Python applications, use Flask or FastAPI. Here's a Flask equivalent: ```python @@ -173,13 +174,13 @@ Express.js integration is primarily available for JavaScript/Node.js. For other if __name__ == '__main__': app.run(host='0.0.0.0', port=8080) ``` - - + + ## Client Integration - - + + Access your Express-hosted flows from client applications: ```ts @@ -250,8 +251,8 @@ Express.js integration is primarily available for JavaScript/Node.js. For other ); } ``` - - + + Access your Go HTTP endpoints from client applications: ```go @@ -307,8 +308,8 @@ Express.js integration is primarily available for JavaScript/Node.js. For other fmt.Printf("Response: %s\n", result.Output) } ``` - - + + Access your Python Flask endpoints from client applications: ```python @@ -346,13 +347,13 @@ Express.js integration is primarily available for JavaScript/Node.js. For other asyncio.run(main()) ``` - - + + ## Authentication and Security - - + + Implement authentication for your Express endpoints: ### API Key Authentication @@ -459,8 +460,8 @@ Express.js integration is primarily available for JavaScript/Node.js. For other // Protected endpoint app.post('/protectedFlow', authMiddleware, expressHandler(protectedFlow)); ``` - - + + Implement authentication in Go applications: ```go @@ -513,8 +514,8 @@ Express.js integration is primarily available for JavaScript/Node.js. For other r.Run(":8080") } ``` - - + + Implement authentication in Flask applications: ```python @@ -552,15 +553,15 @@ Express.js integration is primarily available for JavaScript/Node.js. For other result = await handle_protected_flow(input_data, user) return jsonify(result) ``` - - + + ## Advanced Features ### Multiple Flows and Server Configuration - - + + Use `startFlowServer` for multiple flows with advanced configuration: ```ts @@ -607,8 +608,8 @@ Express.js integration is primarily available for JavaScript/Node.js. For other }, }); ``` - - + + Configure multiple endpoints with Gin: ```go @@ -644,8 +645,8 @@ Express.js integration is primarily available for JavaScript/Node.js. For other r.Run(":4567") } ``` - - + + Configure multiple endpoints with Flask: ```python @@ -670,13 +671,13 @@ Express.js integration is primarily available for JavaScript/Node.js. For other if __name__ == '__main__': app.run(host='0.0.0.0', port=4567) ``` - - + + ### Error Handling and Validation - - + + Implement comprehensive error handling: ```ts @@ -728,8 +729,8 @@ Express.js integration is primarily available for JavaScript/Node.js. For other }); }); ``` - - + + Implement error handling in Go: ```go @@ -774,8 +775,8 @@ Express.js integration is primarily available for JavaScript/Node.js. For other c.JSON(200, gin.H{"result": result}) } ``` - - + + Implement error handling in Flask: ```python @@ -821,15 +822,15 @@ Express.js integration is primarily available for JavaScript/Node.js. For other 'code': 'INTERNAL' }), 500 ``` - - + + ## Deployment Considerations ### Production Configuration - - + + Production-ready Express setup: ```ts @@ -865,8 +866,8 @@ Express.js integration is primarily available for JavaScript/Node.js. For other console.log(`Server running on port ${port}`); }); ``` - - + + Production-ready Go setup: ```go @@ -918,8 +919,8 @@ Express.js integration is primarily available for JavaScript/Node.js. For other return r } ``` - - + + Production-ready Flask setup: ```python @@ -956,8 +957,8 @@ Express.js integration is primarily available for JavaScript/Node.js. For other port = int(os.environ.get('PORT', 8080)) app.run(host='0.0.0.0', port=port, debug=False) ``` - - + + ## Next Steps diff --git a/src/content/docs/unified-docs/frameworks/nextjs.mdx b/src/content/docs/unified-docs/frameworks/nextjs.mdx index 7bca3624..fbd21837 100644 --- a/src/content/docs/unified-docs/frameworks/nextjs.mdx +++ b/src/content/docs/unified-docs/frameworks/nextjs.mdx @@ -3,7 +3,8 @@ title: Next.js Integration description: Learn how to integrate Genkit with Next.js applications across JavaScript, Go, and Python, including API routes, client-side calls, streaming, and deployment strategies. --- -import { Tabs, TabItem } from '@astrojs/starlight/components'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; import { Badge } from '@astrojs/starlight/components'; @@ -18,8 +19,8 @@ Next.js integration is primarily available for JavaScript/Node.js. For other lan ## Installation and Setup - - + + ### Create a Next.js Project If you don't have an existing Next.js project: @@ -60,8 +61,8 @@ Next.js integration is primarily available for JavaScript/Node.js. For other lan ├── package.json └── next.config.js ``` - - + + For Go applications, create a separate backend API and React frontend: ### Backend Setup (Go) @@ -117,8 +118,8 @@ Next.js integration is primarily available for JavaScript/Node.js. For other lan npx create-next-app@latest . --typescript npm install ``` - - + + For Python applications, create a FastAPI backend with React frontend: ### Backend Setup (Python) @@ -169,13 +170,13 @@ Next.js integration is primarily available for JavaScript/Node.js. For other lan npx create-next-app@latest . --typescript npm install ``` - - + + ## Define Genkit Flows - - + + Create your Genkit flows in `src/genkit/flows.ts`: ```ts @@ -240,8 +241,8 @@ Next.js integration is primarily available for JavaScript/Node.js. For other lan } ); ``` - - + + Define flows in your Go backend: ```go @@ -321,8 +322,8 @@ Next.js integration is primarily available for JavaScript/Node.js. For other lan }) } ``` - - + + Define flows in your Python backend: ```python @@ -372,13 +373,13 @@ Next.js integration is primarily available for JavaScript/Node.js. For other lan response = await ai.generate(context) return ChatOutput(response=response.text) ``` - - + + ## Create API Routes - - + + Create API routes using the Genkit Next.js plugin: ### Individual Route Files @@ -427,8 +428,8 @@ Next.js integration is primarily available for JavaScript/Node.js. For other lan return appRoute(flow)(request); } ``` - - + + Set up API routes in your Gin router: ```go @@ -458,8 +459,8 @@ Next.js integration is primarily available for JavaScript/Node.js. For other lan return r } ``` - - + + Set up API routes in FastAPI: ```python @@ -478,13 +479,13 @@ Next.js integration is primarily available for JavaScript/Node.js. For other lan async def health_check(): return {"status": "healthy"} ``` - - + + ## Frontend Implementation - - + + Create your React components with Genkit integration: ### Basic Usage @@ -708,8 +709,8 @@ Next.js integration is primarily available for JavaScript/Node.js. For other lan ); } ``` - - + + Create React components that call your Go backend: ```tsx @@ -789,8 +790,8 @@ Next.js integration is primarily available for JavaScript/Node.js. For other lan ); } ``` - - + + Create React components that call your Python backend: ```tsx @@ -870,13 +871,13 @@ Next.js integration is primarily available for JavaScript/Node.js. For other lan ); } ``` - - + + ## Authentication and Security - - + + ### API Key Authentication ```tsx @@ -947,8 +948,8 @@ Next.js integration is primarily available for JavaScript/Node.js. For other lan return NextResponse.json({ error: 'Invalid credentials' }, { status: 401 }); } ``` - - + + Implement JWT authentication in your Go backend: ```go @@ -985,8 +986,8 @@ Next.js integration is primarily available for JavaScript/Node.js. For other lan // Apply to protected routes api.POST("/menuSuggestion", authMiddleware(), handleMenuSuggestion) ``` - - + + Implement JWT authentication in your FastAPI backend: ```python @@ -1018,15 +1019,15 @@ Next.js integration is primarily available for JavaScript/Node.js. For other lan result = await menu_suggestion_flow(input_data) return result ``` - - + + ## Deployment Considerations ### Environment Variables - - + + Configure environment variables for production: ```bash @@ -1069,8 +1070,8 @@ Next.js integration is primarily available for JavaScript/Node.js. For other lan CMD ["npm", "start"] ``` - - + + Deploy your Go backend: ```dockerfile @@ -1102,8 +1103,8 @@ Next.js integration is primarily available for JavaScript/Node.js. For other lan gcloud builds submit --tag gcr.io/PROJECT_ID/genkit-backend gcloud run deploy --image gcr.io/PROJECT_ID/genkit-backend --platform managed ``` - - + + Deploy your Python backend: ```dockerfile @@ -1129,8 +1130,8 @@ Next.js integration is primarily available for JavaScript/Node.js. For other lan gcloud builds submit --tag gcr.io/PROJECT_ID/genkit-backend gcloud run deploy --image gcr.io/PROJECT_ID/genkit-backend --platform managed ``` - - + + ## Next Steps diff --git a/src/content/docs/unified-docs/generating-content.mdx b/src/content/docs/unified-docs/generating-content.mdx index 1a3547c8..e967a600 100644 --- a/src/content/docs/unified-docs/generating-content.mdx +++ b/src/content/docs/unified-docs/generating-content.mdx @@ -3,7 +3,8 @@ title: Generating content with AI models description: Learn how to generate content with AI models using Genkit's unified interface across JavaScript, Go, and Python, covering basic usage, configuration, structured output, streaming, and multimodal input/output. --- -import { Tabs, TabItem } from '@astrojs/starlight/components'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; import LLMSummary from '@/components/llm-summary.astro'; import ExampleLink from '@/components/ExampleLink.astro'; @@ -14,8 +15,8 @@ Genkit provides a unified interface to interact with various generative AI model **Basic Usage:** - - + + ```typescript import { googleAI } from '@genkit-ai/googleai'; import { genkit } from 'genkit'; @@ -43,8 +44,8 @@ Genkit provides a unified interface to interact with various generative AI model }); console.log(response3.text); ``` - - + + ```go import ( "context" @@ -73,8 +74,8 @@ Genkit provides a unified interface to interact with various generative AI model log.Println(resp.Text()) } ``` - - + + ```python import asyncio from genkit.ai import Genkit @@ -93,8 +94,8 @@ Genkit provides a unified interface to interact with various generative AI model ai.run_main(main()) ``` - - + + **Configuration:** @@ -158,17 +159,17 @@ If you want to run the code examples on this page, first complete the steps in the Getting started guide for your language. All of the examples assume that you have already installed Genkit as a dependency in your project. - - + + Complete the [Getting started](/docs/get-started) guide. - - + + Complete the [Get started](/go/docs/get-started-go) guide. - - + + Complete the [Get started](/python/docs/get-started) guide. - - + + ### Models supported by Genkit @@ -218,8 +219,8 @@ models is the `generate()` method. The simplest `generate()` call specifies the model you want to use and a text prompt: - - + + ```ts import { googleAI } from '@genkit-ai/googleai'; import { genkit } from 'genkit'; @@ -237,8 +238,8 @@ prompt: run(); ``` - - + + ```go package main @@ -272,8 +273,8 @@ prompt: log.Println(resp.Text()) } ``` - - + + ```python import asyncio from genkit.ai import Genkit @@ -292,8 +293,8 @@ prompt: ai.run_main(main()) ``` - - + + When you run this brief example, it will print out some debugging information followed by the output of the `generate()` call, which will usually be Markdown @@ -321,8 +322,8 @@ which you specified when you configured the Genkit instance. You can also specify a model for a single `generate()` call: - - + + ```ts import { googleAI } from '@genkit-ai/googleai'; @@ -340,24 +341,24 @@ You can also specify a model for a single `generate()` call: prompt: 'Invent a menu item for a restaurant with a pirate theme.', }); ``` - - + + ```go resp, err := genkit.Generate(ctx, g, ai.WithModelName("googleai/gemini-2.5-pro"), ai.WithPrompt("Invent a menu item for a pirate themed restaurant."), ) ``` - - + + ```python result = await ai.generate( prompt='Invent a menu item for a pirate themed restaurant.', model='googleai/gemini-2.0-pro', ) ``` - - + + A model string identifier looks like `providerid/modelid`, where the provider ID (in this case, `googleai`) identifies the plugin, and the model ID is a @@ -383,16 +384,16 @@ of its responses, the format of its responses, and so on. If the model you're using supports system prompts, you can provide one: - - + + ```ts const response = await ai.generate({ prompt: 'What is your quest?', system: "You are a knight from Monty Python's Flying Circus.", }); ``` - - + + ```go resp, err := genkit.Generate(ctx, g, ai.WithSystem("You are a food industry marketing consultant."), @@ -402,23 +403,23 @@ If the model you're using supports system prompts, you can provide one: For models that don't support system prompts, `ai.WithSystem()` simulates it by modifying the request to appear _like_ a system prompt. - - + + ```python result = await ai.generate( system='You are a food industry marketing consultant.', prompt='Invent a menu item for a pirate themed restaurant.', ) ``` - - + + ### Multi-turn conversations with messages For multi-turn conversations, you can use the `messages` parameter instead of `prompt` to provide a conversation history. This is particularly useful when you need to maintain context across multiple interactions with the model. - - + + The `messages` parameter accepts an array of message objects, where each message has a `role` (one of `'system'`, `'user'`, `'model'`, or `'tool'`) and `content`: ```ts @@ -446,8 +447,8 @@ For multi-turn conversations, you can use the `messages` parameter instead of `p - Use the `messages` parameter for simple multi-turn conversations where you manually manage the conversation history - For persistent chat sessions with automatic history management, use the [Chat API](/docs/chat) instead - - + + ```go resp, err := genkit.Generate(ctx, g, ai.WithModelName("googleai/gemini-2.5-flash"), @@ -464,8 +465,8 @@ For multi-turn conversations, you can use the `messages` parameter instead of `p ), ) ``` - - + + ```python # Multi-turn conversation support varies by Python implementation # Check the specific plugin documentation for message handling @@ -473,16 +474,16 @@ For multi-turn conversations, you can use the `messages` parameter instead of `p prompt='Continue our conversation about trip planning to Japan.', ) ``` - - + + ### Model parameters The `generate()` function takes a `config` parameter, through which you can specify optional settings that control how the model generates content: - - + + ```ts const response = await ai.generate({ prompt: 'Invent a menu item for a restaurant with a pirate theme.', @@ -495,8 +496,8 @@ specify optional settings that control how the model generates content: }, }); ``` - - + + ```go resp, err := genkit.Generate(ctx, g, ai.WithModelName("googleai/gemini-2.5-flash"), @@ -510,8 +511,8 @@ specify optional settings that control how the model generates content: }), ) ``` - - + + ```python result = await ai.generate( prompt='Invent a menu item for a pirate themed restaurant.', @@ -524,8 +525,8 @@ specify optional settings that control how the model generates content: }, ) ``` - - + + The exact parameters that are supported depend on the individual model and model API. However, the parameters in the previous example are common to almost every @@ -638,8 +639,8 @@ or feeding the output of one model into another, structured output is a must. In Genkit, you can request structured output from a model by specifying a schema when you call `generate()`: - - + + ```ts import { z } from 'genkit'; @@ -660,8 +661,8 @@ when you call `generate()`: library. In addition to a schema definition language, Zod also provides runtime type checking, which bridges the gap between static TypeScript types and the unpredictable output of generative AI models. - - + + ```go type MenuItem struct { Name string `json:"name"` @@ -683,8 +684,8 @@ when you call `generate()`: [`invopop/jsonschema`](https://github.com/invopop/jsonschema) package. This provides runtime type checking, which bridges the gap between static Go types and the unpredictable output of generative AI models. - - + + ```python from pydantic import BaseModel @@ -703,8 +704,8 @@ when you call `generate()`: Model output schemas are specified using [Pydantic Models](https://docs.pydantic.dev/latest/concepts/models/). In addition to a schema definition language, Pydantic also provides runtime type checking, which bridges the gap between static Python types and the unpredictable output of generative AI models. - - + + When you specify a schema in `generate()`, Genkit does several things behind the scenes: @@ -719,8 +720,8 @@ scenes: To get structured output from a successful generate call, use the response object's `output` property: - - + + ```ts const menuItem = response.output; // Typed as z.infer console.log(menuItem?.name); @@ -728,8 +729,8 @@ object's `output` property: Note that the `output` property can be `null`. This can happen when the model fails to generate output that conforms to the schema. - - + + ```go var item MenuItem if err := resp.Output(&item); err != nil { @@ -750,13 +751,13 @@ object's `output` property: log.Fatal(err) } ``` - - + + ```python output = response.output ``` - - + + #### Handling errors @@ -791,8 +792,8 @@ illusion of chatting with an intelligent counterpart. In Genkit, you can stream output using the streaming methods: - - + + ```ts const { stream, response } = ai.generateStream({ prompt: 'Tell a story.', @@ -823,8 +824,8 @@ In Genkit, you can stream output using the streaming methods: const finalResponse = await response; console.log(finalResponse.output); ``` - - + + ```go resp, err := genkit.Generate(ctx, g, ai.WithPrompt("Suggest a complete menu for a pirate themed restaurant."), @@ -840,8 +841,8 @@ In Genkit, you can stream output using the streaming methods: log.Println(resp.Text()) ``` - - + + ```python stream, response = ai.generate_stream( prompt='Suggest a complete menu for a pirate themed restaurant.', @@ -868,8 +869,8 @@ In Genkit, you can stream output using the streaming methods: print((await response).output) ``` - - + + Streaming structured output works a little differently from streaming text: the `output` property of a response chunk is an object constructed from the @@ -899,8 +900,8 @@ To provide a media prompt to a model that supports it, instead of passing a simple text prompt to `generate`, pass an array consisting of a media part and a text part: - - + + ```ts const response = await ai.generate({ prompt: [{ media: { url: 'https://.../image.jpg' } }, { text: 'What is in this image?' }], @@ -917,8 +918,8 @@ text part: prompt: [{ media: { url: `data:image/jpeg;base64,${data.toString('base64')}` } }, { text: 'What is in this image?' }], }); ``` - - + + ```go resp, err := genkit.Generate(ctx, g, ai.WithModelName("googleai/gemini-2.5-flash"), @@ -949,8 +950,8 @@ text part: ), ) ``` - - + + ```python from genkit.ai import Part @@ -981,8 +982,8 @@ text part: ], ) ``` - - + + All models that support media input support both data URLs and HTTPS URLs. Some model plugins add support for other media sources. For example, the Vertex AI @@ -1000,8 +1001,8 @@ Genkit returns generated media as a **data URL**, a widely supported format for To generate an image, you can use models that support image generation. Here's an example using Google AI's image generation capabilities: - - + + ```ts import { googleAI } from '@genkit-ai/googleai'; import { parseDataUrl } from 'data-urls'; @@ -1021,20 +1022,20 @@ To generate an image, you can use models that support image generation. Here's a } } ``` - - + + ```go // Image generation support varies by Go implementation // Check the specific plugin documentation for media generation ``` - - + + ```python # Image generation support varies by Python implementation # Check the specific plugin documentation for media generation ``` - - + + ### Next steps diff --git a/src/content/docs/unified-docs/get-started.mdx b/src/content/docs/unified-docs/get-started.mdx index 59ddc951..7f4c6944 100644 --- a/src/content/docs/unified-docs/get-started.mdx +++ b/src/content/docs/unified-docs/get-started.mdx @@ -3,8 +3,9 @@ title: Get started with Genkit description: Learn how to get started with Genkit across JavaScript, Go, and Python, including project setup, installing packages, configuring API keys, creating your first flow, and testing in the Developer UI. --- -import { Tabs, TabItem } from '@astrojs/starlight/components'; import { LinkButton } from '@astrojs/starlight/components'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; This guide shows you how to get started with Genkit in your preferred language and test it in the Developer UI. @@ -12,32 +13,32 @@ This guide shows you how to get started with Genkit in your preferred language a Before you begin, make sure your environment meets these requirements: - - + + - Node.js v20 or later - npm This guide assumes you're already familiar with building Node.js applications. - - + + - Go 1.24 or later ([Download and install](https://go.dev/doc/install)) This guide assumes you're already familiar with building Go applications. - - + + - Python 3.10 or later ([Download and install](https://www.python.org/downloads/)) - Node.js 20 or later (for the Genkit CLI and UI) :::note[Alpha Release] The Genkit libraries for Python are currently in **Alpha**. You might see API and functional changes as development progresses. We recommend using it only for prototyping and exploration. ::: - - + + ## Set up your project - - + + Create a new Node.js project and configure TypeScript: ```sh @@ -55,8 +56,8 @@ Before you begin, make sure your environment meets these requirements: ``` This sets up your project structure and a TypeScript entry point at `src/index.ts`. - - + + Initialize a new Go project directory: ```bash @@ -66,8 +67,8 @@ Before you begin, make sure your environment meets these requirements: ``` Create a `main.go` file for your application entry point. - - + + Create a new project directory and set up a virtual environment: ```bash @@ -85,13 +86,13 @@ Before you begin, make sure your environment meets these requirements: ```bash source bin/activate # for bash ``` - - + + ## Install Genkit packages - - + + First, install the Genkit CLI globally. This gives you access to local developer tools, including the Developer UI: ```bash @@ -106,8 +107,8 @@ Before you begin, make sure your environment meets these requirements: - `genkit` provides Genkit core capabilities. - `@genkit-ai/googleai` provides access to the Google AI Gemini models. - - + + Install the Genkit package for Go: ```bash @@ -115,8 +116,8 @@ Before you begin, make sure your environment meets these requirements: ``` This provides Genkit core capabilities and access to Google AI Gemini models. - - + + Install the required Python packages: ```bash @@ -136,8 +137,8 @@ Before you begin, make sure your environment meets these requirements: ```bash pip3 install -r requirements.txt ``` - - + + ## Configure your model API key @@ -164,8 +165,8 @@ Genkit also supports models from Vertex AI, Anthropic, OpenAI, Cohere, Ollama, a ## Create your first application - - + + A flow is a special Genkit function with built-in observability, type safety, and tooling integration. Update `src/index.ts` with the following: @@ -251,8 +252,8 @@ Genkit also supports models from Vertex AI, Anthropic, OpenAI, Cohere, Ollama, a - Integrates with the Developer UI - Easy deployment as APIs - Built-in tracing and observability - - + + Create a `main.go` file with the following sample code: ```go @@ -296,8 +297,8 @@ Genkit also supports models from Vertex AI, Anthropic, OpenAI, Cohere, Ollama, a - Prints the model's response For more advanced examples with flows and structured output, see [creating flows](/unified-docs/creating-flows). - - + + Create a `main.py` file: ```python title="main.py" @@ -336,13 +337,13 @@ Genkit also supports models from Vertex AI, Anthropic, OpenAI, Cohere, Ollama, a - Defines a structured output schema using Pydantic - Creates a flow to generate RPG characters - Runs the flow and prints the structured result - - + + ## Run your application - - + + Run your application to see it in action: ```bash @@ -350,8 +351,8 @@ Genkit also supports models from Vertex AI, Anthropic, OpenAI, Cohere, Ollama, a ``` You should see a structured recipe output in your console. - - + + Run the app to see the model response: ```bash @@ -361,8 +362,8 @@ Genkit also supports models from Vertex AI, Anthropic, OpenAI, Cohere, Ollama, a # personal question. Many find meaning through connection, growth, # contribution, happiness, or discovering their own purpose. ``` - - + + Run your app (Genkit apps are just regular Python applications): ```bash @@ -370,8 +371,8 @@ Genkit also supports models from Vertex AI, Anthropic, OpenAI, Cohere, Ollama, a ``` You should see a structured RPG character output in JSON format. - - + + ## Test in the Developer UI @@ -379,11 +380,11 @@ The **Developer UI** is a local tool for testing and inspecting Genkit component ### Install the Genkit CLI (if needed) - - + + If you followed the installation steps above, you already have the Genkit CLI installed. - - + + Install the Genkit CLI using npm: ```bash @@ -391,8 +392,8 @@ The **Developer UI** is a local tool for testing and inspecting Genkit component ``` This requires Node.js to be installed on your system. - - + + If you don't already have Node 20 or newer on your system, install it now. **Recommendation**: The [`nvm`](https://github.com/nvm-sh/nvm) and [`nvm-windows`](https://github.com/coreybutler/nvm-windows) tools are a convenient way to install specific versions of Node. @@ -416,13 +417,13 @@ The **Developer UI** is a local tool for testing and inspecting Genkit component ```bash npm install -g genkit-cli ``` - - + + ### Start the Developer UI - - + + Run the following command from your project root: ```bash @@ -450,8 +451,8 @@ The **Developer UI** is a local tool for testing and inspecting Genkit component ```sh npm run genkit:ui ``` - - + + Run the following command from your project root: ```bash @@ -459,8 +460,8 @@ The **Developer UI** is a local tool for testing and inspecting Genkit component ``` This starts your app and launches the Developer UI at `http://localhost:4000` by default. - - + + To inspect your app with Genkit Dev UI, run: ```bash @@ -472,13 +473,13 @@ The **Developer UI** is a local tool for testing and inspecting Genkit component ``` Genkit Developer UI: http://localhost:4000 ``` - - + + ### Run and inspect flows - - + + In the Developer UI: 1. Select the `recipeGeneratorFlow` from the list of flows @@ -504,8 +505,8 @@ The **Developer UI** is a local tool for testing and inspecting Genkit component Your browser does not support the video tag. - - + + In the Developer UI, you can: - Test generation requests with different prompts @@ -514,8 +515,8 @@ The **Developer UI** is a local tool for testing and inspecting Genkit component - Debug any issues with your Genkit integration For more advanced flows and structured output, see [creating flows](/unified-docs/creating-flows). - - + + In the Developer UI: 1. Select the `generate_character` flow from the list of flows @@ -523,8 +524,8 @@ The **Developer UI** is a local tool for testing and inspecting Genkit component 3. Click **Run** You'll see the generated RPG character as structured output, along with execution traces for debugging. - - + + ## Next steps diff --git a/src/content/docs/unified-docs/interrupts.mdx b/src/content/docs/unified-docs/interrupts.mdx index 2b0bff57..0cffe8aa 100644 --- a/src/content/docs/unified-docs/interrupts.mdx +++ b/src/content/docs/unified-docs/interrupts.mdx @@ -3,7 +3,8 @@ title: Pause generation using interrupts description: Learn how to use interrupts in Genkit to pause and resume LLM generation, enabling human-in-the-loop interactions, asynchronous processing, and controlled task completion across JavaScript and Python. --- -import { Tabs, TabItem } from '@astrojs/starlight/components'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; :::caution[Beta] This feature of Genkit is in **Beta,** which means it is not yet part of Genkit's stable API. APIs of beta features may change in minor version releases. @@ -28,17 +29,17 @@ The most common uses for interrupts fall into a few categories: ## Availability - - + + Interrupts are fully supported in JavaScript with comprehensive APIs for defining, using, and responding to interrupts. - - + + Interrupts are not currently available in Go. Use alternative patterns like conditional tool execution or external coordination mechanisms. - - + + Interrupts are supported in Python with similar functionality to JavaScript, though with some API differences. - - + + ## Before you begin @@ -77,8 +78,8 @@ interacting with an LLM: The most common kind of interrupt allows the LLM to request clarification from the user, for example by asking a multiple-choice question. - - + + Use the Genkit instance's `defineInterrupt()` method: ```ts @@ -104,8 +105,8 @@ the user, for example by asking a multiple-choice question. Note that the `outputSchema` of an interrupt corresponds to the response data you will provide as opposed to something that will be automatically populated by a tool function. - - + + Interrupts are not currently available in Go. Consider using alternative patterns: ```go @@ -123,8 +124,8 @@ the user, for example by asking a multiple-choice question. return executeAction(input) } ``` - - + + Use the Genkit instance's `tool()` decorator: ```python @@ -143,8 +144,8 @@ the user, for example by asking a multiple-choice question. Note that the return type annotation of an interrupt corresponds to the response data you will provide as opposed to something that will be automatically populated by a tool function. - - + + ## Use interrupts @@ -152,8 +153,8 @@ Interrupts are passed into the `tools` array when generating content, just like other types of tools. You can pass both normal tools and interrupts to the same `generate` call: - - + + ### Generate ```ts @@ -214,19 +215,19 @@ same `generate` call: const response = await chat.send("make a plan for my birthday party"); ``` - - + + Interrupts are not available in Go. Use alternative patterns like conditional tool execution or external coordination mechanisms. - - + + ```python interrupted_response = await ai.generate( prompt='Ask me a movie trivia question.', tools=['ask_question'], ) ``` - - + + Genkit immediately returns a response on receipt of an interrupt tool call. @@ -235,8 +236,8 @@ Genkit immediately returns a response on receipt of an interrupt tool call. If you've passed one or more interrupts to your generate call, you need to check the response for interrupts so that you can handle them: - - + + ```ts // you can check the 'finishReason' of the response response.finishReason === 'interrupted'; @@ -284,11 +285,11 @@ need to check the response for interrupts so that you can handle them: // no more interrupts, we can see the final response console.log(response.text); ``` - - + + Not applicable - interrupts are not available in Go. - - + + ```python # You can check the 'finish_reason' attribute of the response if interrupted_response.finish_reason == 'interrupted': @@ -315,8 +316,8 @@ need to check the response for interrupts so that you can handle them: tools=['ask_question'], ) ``` - - + + ## Tools with restartable interrupts @@ -324,8 +325,8 @@ Another common pattern for interrupts is the need to _confirm_ an action that the LLM suggests before actually performing it. For example, a payments app might want the user to confirm certain kinds of transfers. - - + + For this use case, you can use the standard `defineTool` method to add custom logic around when to trigger an interrupt, and what to do when an interrupt is _restarted_ with additional metadata. @@ -423,8 +424,8 @@ might want the user to confirm certain kinds of transfers. // no more interrupts, we can see the final response console.log(response.text); ``` - - + + Not applicable - interrupts are not available in Go. Consider implementing confirmation logic within your tools: ```go @@ -442,41 +443,41 @@ might want the user to confirm certain kinds of transfers. return executeTransfer(input) } ``` - - + + Similar patterns are available in Python, though the specific APIs may differ. Consult the Python documentation for the most current implementation details. - - + + ## Best practices ### When to use interrupts - - + + - **User confirmation**: For actions that have significant consequences (payments, deletions, etc.) - **Missing information**: When the LLM needs clarification to proceed - **Async operations**: For long-running tasks that need to complete out-of-band - **Safety checks**: To add human oversight to autonomous AI workflows - - + + Since interrupts are not available, consider these alternatives: - **Conditional tools**: Return status codes that indicate when confirmation is needed - **Multi-step flows**: Break complex operations into smaller, confirmable steps - **External coordination**: Use external systems to manage approval workflows - - + + - **User confirmation**: For actions that have significant consequences - **Missing information**: When the LLM needs clarification to proceed - **Async operations**: For long-running tasks that need to complete out-of-band - **Safety checks**: To add human oversight to autonomous AI workflows - - + + ### Error handling - - + + Always handle the case where interrupts might not be responded to: ```ts @@ -503,8 +504,8 @@ might want the user to confirm certain kinds of transfers. } } ``` - - + + Implement proper error handling in your conditional tools: ```go @@ -524,8 +525,8 @@ might want the user to confirm certain kinds of transfers. return result, nil } ``` - - + + Always handle the case where interrupts might not be responded to: ```python @@ -539,8 +540,8 @@ might want the user to confirm certain kinds of transfers. print(f"Error handling interrupt: {e}") # Handle error appropriately ``` - - + + ## Next steps diff --git a/src/content/docs/unified-docs/model-context-protocol.mdx b/src/content/docs/unified-docs/model-context-protocol.mdx index f6d13d31..19cc6c45 100644 --- a/src/content/docs/unified-docs/model-context-protocol.mdx +++ b/src/content/docs/unified-docs/model-context-protocol.mdx @@ -3,7 +3,8 @@ title: Model Context Protocol (MCP) description: Learn how to extend Genkit's capabilities using the Model Context Protocol to connect with external tools, resources, and data sources across JavaScript, Go, and Python. --- -import { Tabs, TabItem } from '@astrojs/starlight/components'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; The Model Context Protocol (MCP) is an open standard that enables AI applications to securely connect with external tools, resources, and data sources. With Genkit's MCP integration, you can: @@ -36,8 +37,8 @@ MCP enables your AI applications to: ## Setting Up MCP with Genkit - - + + Install the MCP plugin: ```bash @@ -79,8 +80,8 @@ MCP enables your AI applications to: }, }); ``` - - + + Import the MCP package: ```bash @@ -137,8 +138,8 @@ MCP enables your AI applications to: } } ``` - - + + Install the MCP plugin: ```bash @@ -180,8 +181,8 @@ MCP enables your AI applications to: if __name__ == "__main__": asyncio.run(main()) ``` - - + + ## Using MCP in AI Workflows @@ -189,8 +190,8 @@ MCP enables your AI applications to: MCP servers provide tools that your AI can use just like any other Genkit tool: - - + + ```ts // Get all available tools from connected MCP servers const mcpTools = await mcpHost.getActiveTools(ai); @@ -211,8 +212,8 @@ MCP servers provide tools that your AI can use just like any other Genkit tool: tools: [...fsTools, ...memoryTools], }); ``` - - + + ```go // Get all tools from all connected servers tools, err := manager.GetActiveTools(ctx, g) @@ -236,8 +237,8 @@ MCP servers provide tools that your AI can use just like any other Genkit tool: log.Fatal(err) } ``` - - + + ```python # Get all available tools tools = await mcp_plugin.get_active_tools() @@ -258,15 +259,15 @@ MCP servers provide tools that your AI can use just like any other Genkit tool: tools=fs_tools + memory_tools, ) ``` - - + + ### Working with Resources MCP resources provide contextual information that can enhance your AI's understanding: - - + + ```ts // Get resources from MCP servers const resources = await mcpHost.getActiveResources(ai); @@ -280,8 +281,8 @@ MCP resources provide contextual information that can enhance your AI's understa // Access specific resources const systemInfo = await mcpHost.getResource('system', 'system://info'); ``` - - + + ```go // Get resources from MCP servers resources, err := manager.GetActiveResources(ctx, g) @@ -295,8 +296,8 @@ MCP resources provide contextual information that can enhance your AI's understa ai.WithResources(resources...), ) ``` - - + + ```python # Get resources from MCP servers resources = await mcp_plugin.get_active_resources() @@ -307,15 +308,15 @@ MCP resources provide contextual information that can enhance your AI's understa resources=resources, ) ``` - - + + ### Using MCP Prompts MCP servers can provide reusable prompt templates: - - + + ```ts // Get a prompt from an MCP server const analysisPrompt = await mcpHost.getPrompt('memory', 'analyze_data'); @@ -326,8 +327,8 @@ MCP servers can provide reusable prompt templates: focus: 'user behavior patterns' }); ``` - - + + ```go // Get prompt from specific server prompt, err := manager.GetPrompt(ctx, g, "memory", "analyze_data", map[string]any{ @@ -338,8 +339,8 @@ MCP servers can provide reusable prompt templates: log.Fatal(err) } ``` - - + + ```python # Get prompt from specific server prompt = await mcp_plugin.get_prompt("memory", "analyze_data") @@ -350,15 +351,15 @@ MCP servers can provide reusable prompt templates: "focus": "user behavior patterns" }) ``` - - + + ## Building Flows with MCP ### Example: Document Analysis Workflow - - + + ```ts const documentAnalysisFlow = ai.defineFlow( { @@ -404,8 +405,8 @@ MCP servers can provide reusable prompt templates: } ); ``` - - + + ```go type DocumentAnalysisInput struct { Directory string `json:"directory"` @@ -443,8 +444,8 @@ MCP servers can provide reusable prompt templates: }, nil }) ``` - - + + ```python @ai.flow() async def document_analysis_flow(directory: str, analysis_type: str): @@ -476,15 +477,15 @@ MCP servers can provide reusable prompt templates: "recommendations": [] # Parse from insights } ``` - - + + ## Advanced MCP Patterns ### Dynamic Server Management - - + + ```ts // Connect to servers dynamically based on user needs const connectWeatherServer = async (apiKey: string) => { @@ -503,8 +504,8 @@ MCP servers can provide reusable prompt templates: const activeServers = await mcpHost.getActiveServers(); console.log('Connected servers:', activeServers); ``` - - + + ```go // Connect to server dynamically err = manager.Connect("weather", mcp.MCPClientOptions{ @@ -526,8 +527,8 @@ MCP servers can provide reusable prompt templates: log.Fatal(err) } ``` - - + + ```python # Connect to server dynamically await mcp_plugin.connect_server("weather", { @@ -544,13 +545,13 @@ MCP servers can provide reusable prompt templates: active_servers = await mcp_plugin.get_active_servers() print(f"Active servers: {active_servers}") ``` - - + + ### Error Handling and Resilience - - + + ```ts const robustMcpFlow = ai.defineFlow( { @@ -593,8 +594,8 @@ MCP servers can provide reusable prompt templates: } ); ``` - - + + ```go robustMcpFlow := genkit.DefineFlow(g, "robustMcpFlow", func(ctx context.Context, input struct{ Task string }) (string, error) { @@ -631,8 +632,8 @@ MCP servers can provide reusable prompt templates: return resp.Text(), nil }) ``` - - + + ```python @ai.flow() async def robust_mcp_flow(task: str) -> str: @@ -667,8 +668,8 @@ MCP servers can provide reusable prompt templates: # Clean up connections await mcp_plugin.close() ``` - - + + ## Best Practices diff --git a/src/content/docs/unified-docs/multi-agent-systems.mdx b/src/content/docs/unified-docs/multi-agent-systems.mdx index 52d2c22e..c8a71e6c 100644 --- a/src/content/docs/unified-docs/multi-agent-systems.mdx +++ b/src/content/docs/unified-docs/multi-agent-systems.mdx @@ -3,7 +3,8 @@ title: Building multi-agent systems description: Learn how to build multi-agent systems in Genkit by delegating tasks to specialized agents, addressing challenges of complex agentic workflows across different languages. --- -import { Tabs, TabItem } from '@astrojs/starlight/components'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; :::caution[Beta] This feature of Genkit is in **Beta,** which means it is not yet part of Genkit's stable API. APIs of beta features may change in minor version releases. @@ -19,8 +20,8 @@ Multi-agent systems take this concept further by using specialized agents that c ## Availability and Approach - - + + JavaScript provides built-in multi-agent system support through Genkit's prompt-as-tool architecture. You can define specialized agents as prompts and use them as tools in other agents, creating hierarchical delegation patterns. Features include: @@ -29,22 +30,22 @@ Multi-agent systems take this concept further by using specialized agents that c - Specialized agent contexts and capabilities - Integration with chat sessions and flows - Built-in orchestration patterns - - + + Go doesn't have built-in multi-agent system APIs. You need to implement agent coordination manually by: - Creating separate functions or flows for each agent - Implementing your own delegation logic - Managing agent state and context manually - Building coordination patterns using flows and tools - - + + Python doesn't have built-in multi-agent system APIs. You need to implement agent coordination manually by: - Creating separate functions or flows for each agent - Implementing your own delegation logic - Managing agent state and context manually - Building coordination patterns using flows and tools - - + + ## Why use multi-agent systems? @@ -60,8 +61,8 @@ Multi-agent systems address these issues by creating specialized agents that can Let's start with a simple customer service agent to understand the progression to multi-agent systems: - - + + Here are some excerpts from a very simple customer service agent built using a single prompt and several tools: ```typescript @@ -110,8 +111,8 @@ Let's start with a simple customer service agent to understand the progression t tools: [menuLookupTool, reservationTool], }); ``` - - + + In Go, you would implement a single agent using flows and tools: ```go @@ -152,8 +153,8 @@ Let's start with a simple customer service agent to understand the progression t return resp.Text(), nil } ``` - - + + In Python, you would implement a single agent using flows and tools: ```python @@ -185,13 +186,13 @@ Let's start with a simple customer service agent to understand the progression t return response.text ``` - - + + ## Multi-agent architecture - - + + One approach you can use to deal with the issues that arise when building complex agents is to create many specialized agents and use a general purpose agent to delegate tasks to them. Genkit supports this architecture by allowing @@ -230,8 +231,8 @@ Let's start with a simple customer service agent to understand the progression t // Start a chat session, initially with the triage agent const chat = ai.chat(triageAgent); ``` - - + + In Go, you can implement multi-agent patterns by creating separate flows for each agent and implementing delegation logic: ```go @@ -328,8 +329,8 @@ Let's start with a simple customer service agent to understand the progression t } } ``` - - + + In Python, you can implement multi-agent patterns by creating separate flows for each agent: ```python @@ -404,13 +405,13 @@ Let's start with a simple customer service agent to understand the progression t # Handle general queries directly return await general_greeting(user_input, ctx) ``` - - + + ## Agent coordination patterns - - + + ### Hierarchical delegation The most common pattern is hierarchical delegation, where a triage agent routes requests to specialized agents: @@ -478,8 +479,8 @@ Let's start with a simple customer service agent to understand the progression t return fulfillment.trackingNumber; }); ``` - - + + ### Hierarchical delegation Implement delegation through function routing: @@ -546,8 +547,8 @@ Let's start with a simple customer service agent to understand the progression t return fulfillment.TrackingNumber, nil } ``` - - + + ### Hierarchical delegation Implement delegation through flow routing: @@ -606,88 +607,88 @@ Let's start with a simple customer service agent to understand the progression t return fulfillment.get('tracking_number') ``` - - + + ## Best practices ### Agent design principles - - + + - **Single responsibility**: Each agent should have a clear, focused purpose - **Clear interfaces**: Define clear input/output schemas for agent communication - **Graceful delegation**: Always explain to users when transferring between agents - **Error handling**: Implement fallback strategies when specialist agents fail - **Context preservation**: Maintain conversation context across agent transfers - - + + - **Modular design**: Keep agent functions focused and composable - **Error propagation**: Handle errors gracefully and provide meaningful feedback - **State management**: Carefully manage state between agent calls - **Resource efficiency**: Avoid unnecessary agent calls through smart routing - **Testing**: Test individual agents and coordination logic separately - - + + - **Flow composition**: Use flows to create reusable agent patterns - **Type safety**: Use proper type hints for agent inputs and outputs - **Async patterns**: Leverage async/await for efficient agent coordination - **Error handling**: Implement comprehensive error handling and recovery - **Monitoring**: Add logging and metrics to track agent performance - - + + ### Performance considerations - - + + - **Minimize agent hops**: Avoid unnecessary delegation chains - **Cache agent responses**: Cache responses for repeated queries - **Parallel execution**: Use Promise.all() for independent agent calls - **Context size**: Keep agent contexts focused to reduce token usage - **Tool selection**: Provide only relevant tools to each agent - - + + - **Concurrent execution**: Use goroutines for parallel agent processing - **Connection pooling**: Reuse connections for agent communications - **Memory management**: Be mindful of memory usage in long-running agent systems - **Timeout handling**: Implement timeouts for agent calls - **Resource limits**: Set appropriate limits on agent execution - - + + - **Async coordination**: Use asyncio for efficient agent orchestration - **Resource pooling**: Pool expensive resources across agents - **Memory optimization**: Monitor memory usage in complex agent workflows - **Caching strategies**: Implement intelligent caching for agent responses - **Load balancing**: Distribute agent workloads appropriately - - + + ### Security and safety - - + + - **Agent isolation**: Ensure agents can only access their designated tools - **Input validation**: Validate all inputs before passing between agents - **Permission boundaries**: Define clear permission boundaries for each agent - **Audit trails**: Log agent interactions for debugging and compliance - **Rate limiting**: Implement rate limiting to prevent agent abuse - - + + - **Access control**: Implement proper access controls for agent functions - **Input sanitization**: Sanitize all inputs to prevent injection attacks - **Resource limits**: Set limits on agent resource consumption - **Logging**: Implement comprehensive logging for agent activities - **Validation**: Validate agent outputs before using them - - + + - **Flow security**: Secure flow execution and data passing - **Input validation**: Validate all agent inputs and outputs - **Access patterns**: Control access to sensitive operations - **Monitoring**: Monitor agent behavior for anomalies - **Sandboxing**: Consider sandboxing for untrusted agent code - - + + ## Next steps diff --git a/src/content/docs/unified-docs/observability-monitoring.mdx b/src/content/docs/unified-docs/observability-monitoring.mdx index 880e8373..a9579a0c 100644 --- a/src/content/docs/unified-docs/observability-monitoring.mdx +++ b/src/content/docs/unified-docs/observability-monitoring.mdx @@ -3,7 +3,8 @@ title: Observability and Monitoring description: Learn how to monitor and observe your Genkit AI workflows across JavaScript, Go, and Python, including local development tools, production monitoring, and OpenTelemetry integration. --- -import { Tabs, TabItem } from '@astrojs/starlight/components'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; Genkit provides comprehensive observability features to help you understand, debug, and optimize your AI workflows. Whether you're developing locally or running in production, Genkit offers the tools you need to monitor performance, trace execution, and troubleshoot issues. @@ -23,8 +24,8 @@ Genkit's observability stack includes: During development, Genkit automatically collects traces and provides detailed debugging capabilities through the Developer UI: - - + + The Developer UI is automatically available when you run: ```bash @@ -37,8 +38,8 @@ During development, Genkit automatically collects traces and provides detailed d - **Performance metrics**: View latency and execution statistics - **Error debugging**: Detailed error information and stack traces - **Flow testing**: Run and test flows directly from the UI - - + + The trace store feature is enabled automatically in development environments: ```bash @@ -52,8 +53,8 @@ During development, Genkit automatically collects traces and provides detailed d - **Input/output inspection**: Debug data transformations - **Performance analysis**: Identify bottlenecks and optimization opportunities - **Interactive testing**: Test flows with different inputs - - + + Development observability is built into the Genkit runtime: ```bash @@ -66,15 +67,15 @@ During development, Genkit automatically collects traces and provides detailed d - **Real-time debugging**: Inspect flows as they execute - **Data flow visualization**: See how data moves through your workflow - **Error analysis**: Detailed error reporting and debugging - - + + ### Local Logging Genkit provides a centralized logging system that integrates with the observability stack: - - + + ```ts import { logger } from 'genkit/logging'; @@ -98,8 +99,8 @@ Genkit provides a centralized logging system that integrates with the observabil } ); ``` - - + + ```go import ( "context" @@ -120,8 +121,8 @@ Genkit provides a centralized logging system that integrates with the observabil return result, nil } ``` - - + + ```python import logging from genkit.ai import Genkit @@ -144,8 +145,8 @@ Genkit provides a centralized logging system that integrates with the observabil logger.error(f"Flow failed: {error}") raise ``` - - + + ## Production Monitoring @@ -155,8 +156,8 @@ For production deployments, Genkit integrates with Firebase to provide comprehen #### Setup and Configuration - - + + **1. Install the Firebase plugin:** ```bash @@ -186,8 +187,8 @@ For production deployments, Genkit integrates with Firebase to provide comprehen disableTraces: false, }); ``` - - + + **1. Install the Google Cloud plugin:** ```bash @@ -214,8 +215,8 @@ For production deployments, Genkit integrates with Firebase to provide comprehen // Telemetry is automatically configured } ``` - - + + **1. Install monitoring dependencies:** ```bash @@ -233,8 +234,8 @@ For production deployments, Genkit integrates with Firebase to provide comprehen ai = Genkit() ``` - - + + #### Required Google Cloud APIs @@ -268,8 +269,8 @@ Genkit is fully instrumented with [OpenTelemetry](https://opentelemetry.io/), al ### Custom OpenTelemetry Configuration - - + + ```ts import { NodeSDK } from '@opentelemetry/sdk-node'; import { getNodeAutoInstrumentations } from '@opentelemetry/auto-instrumentations-node'; @@ -289,8 +290,8 @@ Genkit is fully instrumented with [OpenTelemetry](https://opentelemetry.io/), al import { genkit } from 'genkit'; const ai = genkit({ /* config */ }); ``` - - + + ```go import ( "go.opentelemetry.io/otel" @@ -315,8 +316,8 @@ Genkit is fully instrumented with [OpenTelemetry](https://opentelemetry.io/), al otel.SetTracerProvider(tp) } ``` - - + + ```python from opentelemetry import trace from opentelemetry.exporter.jaeger.thrift import JaegerExporter @@ -335,8 +336,8 @@ Genkit is fully instrumented with [OpenTelemetry](https://opentelemetry.io/), al span_processor = BatchSpanProcessor(jaeger_exporter) trace.get_tracer_provider().add_span_processor(span_processor) ``` - - + + ### Popular Observability Platforms @@ -356,8 +357,8 @@ Genkit's OpenTelemetry integration works with: Control telemetry collection to balance observability with performance: - - + + ```ts import { enableFirebaseTelemetry } from '@genkit-ai/firebase'; import { TraceIdRatioBasedSampler } from '@opentelemetry/sdk-trace-base'; @@ -379,8 +380,8 @@ Control telemetry collection to balance observability with performance: }, }); ``` - - + + ```go import ( "go.opentelemetry.io/otel/sdk/trace" @@ -398,8 +399,8 @@ Control telemetry collection to balance observability with performance: otel.SetTracerProvider(tp) } ``` - - + + ```python from opentelemetry.sdk.trace.sampling import TraceIdRatioBasedSampler @@ -409,15 +410,15 @@ Control telemetry collection to balance observability with performance: trace_provider = TracerProvider(sampler=sampler) trace.set_tracer_provider(trace_provider) ``` - - + + ### Custom Metrics Add application-specific metrics to your observability stack: - - + + ```ts import { metrics } from '@opentelemetry/api'; @@ -448,8 +449,8 @@ Add application-specific metrics to your observability stack: } ); ``` - - + + ```go import ( "go.opentelemetry.io/otel/metric" @@ -467,8 +468,8 @@ Add application-specific metrics to your observability stack: )) } ``` - - + + ```python from opentelemetry import metrics @@ -495,8 +496,8 @@ Add application-specific metrics to your observability stack: ) raise ``` - - + + ## Troubleshooting diff --git a/src/content/docs/unified-docs/observability/advanced-configuration.mdx b/src/content/docs/unified-docs/observability/advanced-configuration.mdx index a8df3baf..5409d359 100644 --- a/src/content/docs/unified-docs/observability/advanced-configuration.mdx +++ b/src/content/docs/unified-docs/observability/advanced-configuration.mdx @@ -3,7 +3,8 @@ title: Advanced Configuration description: Learn advanced configuration options for Genkit observability, including sampling, performance tuning, custom metrics, and telemetry optimization across JavaScript, Go, and Python. --- -import { Tabs, TabItem } from '@astrojs/starlight/components'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; This guide covers advanced configuration options for fine-tuning Genkit's observability features to optimize performance, control costs, and customize telemetry collection. @@ -20,8 +21,8 @@ Genkit's observability system can be configured at multiple levels: Understanding the default settings helps you make informed configuration decisions: - - + + ```ts // Default Firebase telemetry configuration { @@ -38,8 +39,8 @@ Understanding the default settings helps you make informed configuration decisio sampler: AlwaysOnSampler(), // 100% sampling } ``` - - + + ```go // Default configuration is handled by the Google Cloud plugin // Telemetry is automatically configured with sensible defaults @@ -52,8 +53,8 @@ Understanding the default settings helps you make informed configuration decisio ExportInterval time.Duration // 5 minutes } ``` - - + + ```python # Default monitoring configuration default_config = { @@ -65,8 +66,8 @@ Understanding the default settings helps you make informed configuration decisio 'disable_input_output_logging': False, } ``` - - + + ## Sampling Configuration @@ -74,8 +75,8 @@ Sampling reduces telemetry volume and costs while maintaining observability: ### Trace Sampling - - + + ```ts import { enableFirebaseTelemetry } from '@genkit-ai/firebase'; import { @@ -103,8 +104,8 @@ Sampling reduces telemetry volume and costs while maintaining observability: sampler: new TraceIdRatioBasedSampler(samplingRate), }); ``` - - + + ```go import ( "go.opentelemetry.io/otel/sdk/trace" @@ -132,8 +133,8 @@ Sampling reduces telemetry volume and costs while maintaining observability: otel.SetTracerProvider(tp) } ``` - - + + ```python from opentelemetry.sdk.trace.sampling import ( TraceIdRatioBasedSampler, @@ -158,15 +159,15 @@ Sampling reduces telemetry volume and costs while maintaining observability: sampling_rate=sampling_rate ) ``` - - + + ### Custom Sampling Strategies Implement custom sampling logic based on your application needs: - - + + ```ts import { Sampler, SamplingResult, SamplingDecision } from '@opentelemetry/sdk-trace-base'; @@ -199,8 +200,8 @@ Implement custom sampling logic based on your application needs: sampler: new CustomSampler(), }); ``` - - + + ```go import ( "go.opentelemetry.io/otel/sdk/trace" @@ -240,8 +241,8 @@ Implement custom sampling logic based on your application needs: return "CustomSampler" } ``` - - + + ```python from opentelemetry.sdk.trace.sampling import Sampler, SamplingResult from opentelemetry.trace import SpanKind @@ -267,8 +268,8 @@ Implement custom sampling logic based on your application needs: custom_sampler=CustomSampler() ) ``` - - + + ## Performance Optimization @@ -276,8 +277,8 @@ Implement custom sampling logic based on your application needs: Optimize telemetry export for your application's performance requirements: - - + + ```ts enableFirebaseTelemetry({ // Reduce export frequency for high-volume applications @@ -299,8 +300,8 @@ Optimize telemetry export for your application's performance requirements: maxQueueSize: 2048, // Default: 2048 }); ``` - - + + ```go import ( "go.opentelemetry.io/otel/sdk/trace" @@ -324,8 +325,8 @@ Optimize telemetry export for your application's performance requirements: otel.SetTracerProvider(tp) } ``` - - + + ```python from opentelemetry.sdk.trace.export import BatchSpanProcessor @@ -343,15 +344,15 @@ Optimize telemetry export for your application's performance requirements: batch_processor=batch_processor ) ``` - - + + ### Auto-Instrumentation Control Fine-tune automatic instrumentation to reduce overhead: - - + + ```ts enableFirebaseTelemetry({ autoInstrumentationConfig: { @@ -388,8 +389,8 @@ Fine-tune automatic instrumentation to reduce overhead: }, }); ``` - - + + ```go // Go auto-instrumentation is typically handled through // manual instrumentation or specific library integrations @@ -421,8 +422,8 @@ Fine-tune automatic instrumentation to reduce overhead: ) } ``` - - + + ```python from opentelemetry.instrumentation.requests import RequestsInstrumentor from opentelemetry.instrumentation.flask import FlaskInstrumentor @@ -446,8 +447,8 @@ Fine-tune automatic instrumentation to reduce overhead: } ) ``` - - + + ## Data Privacy and Security @@ -455,8 +456,8 @@ Fine-tune automatic instrumentation to reduce overhead: Protect sensitive data by controlling what gets logged: - - + + ```ts enableFirebaseTelemetry({ // Disable all input/output logging @@ -489,8 +490,8 @@ Protect sensitive data by controlling what gets logged: } ); ``` - - + + ```go import ( "context" @@ -525,8 +526,8 @@ Protect sensitive data by controlling what gets logged: return result, nil } ``` - - + + ```python import logging from typing import Any, Dict @@ -556,15 +557,15 @@ Protect sensitive data by controlling what gets logged: return result ``` - - + + ### Attribute Filtering Filter sensitive attributes from telemetry data: - - + + ```ts import { SpanProcessor, Span } from '@opentelemetry/sdk-trace-base'; @@ -591,8 +592,8 @@ Filter sensitive attributes from telemetry data: spanProcessors: [new AttributeFilterProcessor()], }); ``` - - + + ```go import ( "go.opentelemetry.io/otel/sdk/trace" @@ -628,8 +629,8 @@ Filter sensitive attributes from telemetry data: func (p *AttributeFilterProcessor) Shutdown(ctx context.Context) error { return nil } func (p *AttributeFilterProcessor) ForceFlush(ctx context.Context) error { return nil } ``` - - + + ```python from opentelemetry.sdk.trace import SpanProcessor from opentelemetry.trace import Span @@ -657,15 +658,15 @@ Filter sensitive attributes from telemetry data: # Add to trace provider trace_provider.add_span_processor(AttributeFilterProcessor()) ``` - - + + ## Custom Metrics Add application-specific metrics to enhance observability: - - + + ```ts import { metrics } from '@opentelemetry/api'; @@ -739,8 +740,8 @@ Add application-specific metrics to enhance observability: } ); ``` - - + + ```go import ( "go.opentelemetry.io/otel/metric" @@ -786,8 +787,8 @@ Add application-specific metrics to enhance observability: }) } ``` - - + + ```python from opentelemetry import metrics import time @@ -853,15 +854,15 @@ Add application-specific metrics to enhance observability: finally: active_flows.add(-1) ``` - - + + ## Environment-Specific Configuration Configure observability differently for each environment: - - + + ```ts const environment = process.env.NODE_ENV || 'development'; @@ -898,8 +899,8 @@ Configure observability differently for each environment: enableFirebaseTelemetry(getObservabilityConfig()); ``` - - + + ```go func getObservabilityConfig() TelemetryConfig { env := os.Getenv("ENVIRONMENT") @@ -927,8 +928,8 @@ Configure observability differently for each environment: } } ``` - - + + ```python import os @@ -957,8 +958,8 @@ Configure observability differently for each environment: enable_firebase_monitoring(**get_observability_config()) ``` - - + + ## Resource Management @@ -966,8 +967,8 @@ Configure observability differently for each environment: Monitor and optimize resource usage: - - + + ```ts // Monitor memory usage const memoryUsage = meter.createHistogram('genkit_memory_usage_mb', { @@ -999,8 +1000,8 @@ Monitor and optimize resource usage: }, }); ``` - - + + ```go import ( "runtime" @@ -1031,8 +1032,8 @@ Monitor and optimize resource usage: } } ``` - - + + ```python import psutil import threading @@ -1065,8 +1066,8 @@ Monitor and optimize resource usage: export_interval=60, # Longer intervals ) ``` - - + + ## Best Practices Summary diff --git a/src/content/docs/unified-docs/observability/authentication.mdx b/src/content/docs/unified-docs/observability/authentication.mdx index 0434183b..5cb802c5 100644 --- a/src/content/docs/unified-docs/observability/authentication.mdx +++ b/src/content/docs/unified-docs/observability/authentication.mdx @@ -1,9 +1,12 @@ --- +# FLAG: This needs more review title: Authentication and Setup description: Learn how to set up authentication and permissions for Genkit observability and monitoring across JavaScript, Go, and Python environments. --- import { Tabs, TabItem } from '@astrojs/starlight/components'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; This guide covers the authentication and setup requirements for enabling Genkit observability and monitoring in production environments. @@ -296,8 +299,8 @@ gcloud auth application-default login \ Verify your authentication setup: - - + + ```ts import { enableFirebaseTelemetry } from '@genkit-ai/firebase'; @@ -316,8 +319,8 @@ Verify your authentication setup: } ); ``` - - + + ```go func testAuthentication() { ctx := context.Background() @@ -344,8 +347,8 @@ Verify your authentication setup: log.Printf("Test result: %s", result) } ``` - - + + ```python from genkit.ai import Genkit from genkit.monitoring import enable_firebase_monitoring @@ -366,8 +369,8 @@ Verify your authentication setup: except Exception as e: print(f"Authentication failed: {e}") ``` - - + + ## Security Best Practices diff --git a/src/content/docs/unified-docs/observability/overview.mdx b/src/content/docs/unified-docs/observability/overview.mdx index df8e4256..8dc1c7e6 100644 --- a/src/content/docs/unified-docs/observability/overview.mdx +++ b/src/content/docs/unified-docs/observability/overview.mdx @@ -3,7 +3,8 @@ title: Observability Overview description: Get started with Genkit's observability features, including local development tools, production monitoring, and telemetry configuration across JavaScript, Go, and Python. --- -import { Tabs, TabItem } from '@astrojs/starlight/components'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; This guide provides an overview of Genkit's observability capabilities and helps you get started with monitoring your AI workflows in both development and production environments. @@ -42,8 +43,8 @@ For deployed applications, Genkit offers production-grade monitoring: Genkit automatically instruments your code to collect telemetry data: - - + + ```ts import { genkit } from 'genkit'; import { googleAI } from '@genkit-ai/googleai'; @@ -65,8 +66,8 @@ Genkit automatically instruments your code to collect telemetry data: } ); ``` - - + + ```go import ( "context" @@ -90,8 +91,8 @@ Genkit automatically instruments your code to collect telemetry data: }) } ``` - - + + ```python from genkit.ai import Genkit @@ -107,8 +108,8 @@ Genkit automatically instruments your code to collect telemetry data: ) return result.text ``` - - + + ### Developer UI Integration @@ -150,8 +151,8 @@ python -m genkit start Enable production monitoring by configuring telemetry export: - - + + ```bash # Install Firebase plugin npm install @genkit-ai/firebase @@ -159,20 +160,20 @@ Enable production monitoring by configuring telemetry export: # Enable monitoring export ENABLE_FIREBASE_MONITORING=true ``` - - + + ```bash # Install Google Cloud plugin go get github.com/firebase/genkit/go/plugins/googlecloud ``` - - + + ```bash # Install monitoring dependencies pip install genkit[monitoring] ``` - - + + ### 3. Configure Permissions diff --git a/src/content/docs/unified-docs/observability/troubleshooting.mdx b/src/content/docs/unified-docs/observability/troubleshooting.mdx index e6d0de80..94e496dd 100644 --- a/src/content/docs/unified-docs/observability/troubleshooting.mdx +++ b/src/content/docs/unified-docs/observability/troubleshooting.mdx @@ -3,7 +3,8 @@ title: Troubleshooting description: Common issues and solutions for Genkit observability and monitoring across JavaScript, Go, and Python environments. --- -import { Tabs, TabItem } from '@astrojs/starlight/components'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; This guide helps you diagnose and resolve common issues with Genkit's observability and monitoring features. @@ -18,8 +19,8 @@ This guide helps you diagnose and resolve common issues with Genkit's observabil **Possible Causes and Solutions:** - - + + **1. Check Firebase plugin configuration:** ```ts import { enableFirebaseTelemetry } from '@genkit-ai/firebase'; @@ -44,8 +45,8 @@ This guide helps you diagnose and resolve common issues with Genkit's observabil gcloud logging logs list --limit=1 gcloud monitoring metrics list --limit=1 ``` - - + + **1. Verify Google Cloud plugin setup:** ```go import ( @@ -82,8 +83,8 @@ This guide helps you diagnose and resolve common issues with Genkit's observabil defer client.Close() log.Println("Monitoring client created successfully") ``` - - + + **1. Verify monitoring setup:** ```python from genkit.ai import Genkit @@ -115,8 +116,8 @@ This guide helps you diagnose and resolve common issues with Genkit's observabil except Exception as e: print(f"Authentication failed: {e}") ``` - - + + ### Authentication and Permission Issues @@ -161,8 +162,8 @@ gcloud services list --enabled --filter="name:(logging|monitoring|cloudtrace)" **Solutions:** - - + + **1. Implement sampling:** ```ts import { TraceIdRatioBasedSampler } from '@opentelemetry/sdk-trace-base'; @@ -189,8 +190,8 @@ gcloud services list --enabled --filter="name:(logging|monitoring|cloudtrace)" }, }); ``` - - + + **1. Configure sampling:** ```go import "go.opentelemetry.io/otel/sdk/trace" @@ -213,8 +214,8 @@ gcloud services list --enabled --filter="name:(logging|monitoring|cloudtrace)" trace.WithMaxExportBatchSize(1024), // Larger batches ) ``` - - + + **1. Reduce sampling rate:** ```python enable_firebase_monitoring( @@ -234,8 +235,8 @@ gcloud services list --enabled --filter="name:(logging|monitoring|cloudtrace)" schedule_delay_millis=60000, # 1 minute ) ``` - - + + ### Missing Traces in Production @@ -260,8 +261,8 @@ curl -I https://monitoring.googleapis.com/ **3. Check export configuration:** - - + + ```ts // Add debugging to trace export enableFirebaseTelemetry({ @@ -275,8 +276,8 @@ curl -I https://monitoring.googleapis.com/ forceDevExport: true, }); ``` - - + + ```go // Add logging to trace export import "go.opentelemetry.io/otel/exporters/stdout/stdouttrace" @@ -293,8 +294,8 @@ curl -I https://monitoring.googleapis.com/ trace.WithSpanProcessor(trace.NewSimpleSpanProcessor(stdoutExporter)), ) ``` - - + + ```python # Enable debug logging import logging @@ -309,8 +310,8 @@ curl -I https://monitoring.googleapis.com/ SimpleSpanProcessor(console_exporter) ) ``` - - + + ### Performance Issues @@ -323,8 +324,8 @@ curl -I https://monitoring.googleapis.com/ **1. Optimize telemetry overhead:** - - + + ```ts // Reduce telemetry overhead enableFirebaseTelemetry({ @@ -342,8 +343,8 @@ curl -I https://monitoring.googleapis.com/ }, }); ``` - - + + ```go // Optimize span processing batchProcessor := trace.NewBatchSpanProcessor( @@ -358,8 +359,8 @@ curl -I https://monitoring.googleapis.com/ trace.WithSampler(trace.TraceIDRatioBased(0.1)), ) ``` - - + + ```python # Optimize performance enable_firebase_monitoring( @@ -369,13 +370,13 @@ curl -I https://monitoring.googleapis.com/ max_queue_size=2048, ) ``` - - + + **2. Monitor resource usage:** - - + + ```ts // Monitor memory usage setInterval(() => { @@ -387,8 +388,8 @@ curl -I https://monitoring.googleapis.com/ }); }, 30000); ``` - - + + ```go // Monitor memory usage import "runtime" @@ -409,8 +410,8 @@ curl -I https://monitoring.googleapis.com/ } }() ``` - - + + ```python import psutil import threading @@ -428,15 +429,15 @@ curl -I https://monitoring.googleapis.com/ monitor_thread = threading.Thread(target=monitor_memory, daemon=True) monitor_thread.start() ``` - - + + ## Debugging Tools ### Enable Debug Logging - - + + ```ts // Enable OpenTelemetry debug logging process.env.OTEL_LOG_LEVEL = 'debug'; @@ -451,8 +452,8 @@ curl -I https://monitoring.googleapis.com/ // ... other config }); ``` - - + + ```go import ( "log/slog" @@ -468,8 +469,8 @@ curl -I https://monitoring.googleapis.com/ // Log telemetry events slog.Debug("Telemetry configuration", "config", config) ``` - - + + ```python import logging import os @@ -487,13 +488,13 @@ curl -I https://monitoring.googleapis.com/ logger = logging.getLogger(__name__) logger.debug("Enabling Firebase monitoring") ``` - - + + ### Test Telemetry Export - - + + ```ts // Test telemetry export with console output import { ConsoleSpanExporter } from '@opentelemetry/exporter-console'; @@ -507,8 +508,8 @@ curl -I https://monitoring.googleapis.com/ spanProcessors: [processor], }); ``` - - + + ```go // Test with stdout exporter import "go.opentelemetry.io/otel/exporters/stdout/stdouttrace" @@ -524,8 +525,8 @@ curl -I https://monitoring.googleapis.com/ trace.WithSpanProcessor(trace.NewSimpleSpanProcessor(stdoutExporter)), ) ``` - - + + ```python # Test with console exporter from opentelemetry.exporter.console import ConsoleSpanExporter @@ -536,13 +537,13 @@ curl -I https://monitoring.googleapis.com/ SimpleSpanProcessor(console_exporter) ) ``` - - + + ### Validate Configuration - - + + ```ts // Configuration validation function function validateTelemetryConfig() { @@ -562,8 +563,8 @@ curl -I https://monitoring.googleapis.com/ validateTelemetryConfig(); ``` - - + + ```go func validateTelemetryConfig() { log.Println("Environment variables:") @@ -580,8 +581,8 @@ curl -I https://monitoring.googleapis.com/ } } ``` - - + + ```python def validate_telemetry_config(): import os @@ -600,8 +601,8 @@ curl -I https://monitoring.googleapis.com/ validate_telemetry_config() ``` - - + + ## Monitoring Health @@ -620,8 +621,8 @@ gcloud trace list-traces --limit=10 ### Monitor Export Success - - + + ```ts // Monitor export success/failure import { metrics } from '@opentelemetry/api'; @@ -649,8 +650,8 @@ gcloud trace list-traces --limit=10 } } ``` - - + + ```go // Monitor export health type MonitoredExporter struct { @@ -670,8 +671,8 @@ gcloud trace list-traces --limit=10 return err } ``` - - + + ```python # Monitor export health from opentelemetry.sdk.trace.export import SpanExporter, SpanExportResult @@ -696,8 +697,8 @@ gcloud trace list-traces --limit=10 print(f"Export error: {e}") return SpanExportResult.FAILURE ``` - - + + ## Getting Help diff --git a/src/content/docs/unified-docs/plugin-authoring/models.mdx b/src/content/docs/unified-docs/plugin-authoring/models.mdx index c4b3e6d8..fd0c0468 100644 --- a/src/content/docs/unified-docs/plugin-authoring/models.mdx +++ b/src/content/docs/unified-docs/plugin-authoring/models.mdx @@ -3,7 +3,8 @@ title: Writing Model Plugins description: Learn how to create Genkit model plugins across JavaScript, Go, and Python to integrate new generative AI models with comprehensive examples and best practices. --- -import { Tabs, TabItem } from '@astrojs/starlight/components'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; Model plugins add generative AI models to the Genkit registry. A model represents any generative model capable of receiving a prompt as input and generating text, media, or data as output. This guide covers creating model plugins across all supported languages. @@ -17,8 +18,8 @@ A model plugin consists of three main components: ## Basic Model Plugin - - + + ```ts import { Genkit, z } from 'genkit'; import { GenkitPlugin, genkitPlugin } from 'genkit/plugin'; @@ -109,8 +110,8 @@ A model plugin consists of three main components: }; } ``` - - + + ```go package mymodelplugin @@ -326,8 +327,8 @@ A model plugin consists of three main components: return ai.NewModelRef(fmt.Sprintf("%s/%s", ProviderID, name), config) } ``` - - + + ```python import os import asyncio @@ -510,15 +511,15 @@ A model plugin consists of three main components: """Create a model reference for use in generate calls""" return f"myModel/{name}" ``` - - + + ## Advanced Model Features ### Supporting Tool Calling - - + + ```ts // In your model definition ai.defineModel({ @@ -567,8 +568,8 @@ A model plugin consists of three main components: }; }); ``` - - + + ```go // In your model generation function func(ctx context.Context, req *ai.ModelRequest, cb ai.ModelStreamCallback) (*ai.ModelResponse, error) { @@ -627,8 +628,8 @@ A model plugin consists of three main components: }, nil } ``` - - + + ```python async def _generate_text(self, request: Dict[str, Any]) -> Dict[str, Any]: """Generate text with tool calling support""" @@ -678,13 +679,13 @@ A model plugin consists of three main components: "usage": api_response.get("usage", {}), } ``` - - + + ### Supporting Media Input - - + + ```ts ai.defineModel({ name: 'myModel/vision-model', @@ -720,8 +721,8 @@ A model plugin consists of three main components: return transformResponse(apiResponse); }); ``` - - + + ```go // In your request transformation function func transformMessages(messages []*ai.Message) []APIMessage { @@ -767,8 +768,8 @@ A model plugin consists of three main components: Detail string `json:"detail"` } ``` - - + + ```python def _transform_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """Transform messages with media support""" @@ -799,15 +800,15 @@ A model plugin consists of three main components: return api_messages ``` - - + + ## Best Practices ### Error Handling - - + + ```ts import { GenkitError } from 'genkit'; @@ -837,8 +838,8 @@ A model plugin consists of three main components: } } ``` - - + + ```go import "google.golang.org/grpc/codes" @@ -867,8 +868,8 @@ A model plugin consists of three main components: strings.Contains(err.Error(), "401") } ``` - - + + ```python import httpx from genkit.exceptions import GenkitError @@ -903,13 +904,13 @@ A model plugin consists of three main components: message=f"Unexpected error: {str(e)}" ) ``` - - + + ### Configuration Validation - - + + ```ts const MyModelConfigSchema = GenerationCommonConfigSchema.extend({ temperature: z.number().min(0).max(2).default(0.7), @@ -921,8 +922,8 @@ A model plugin consists of three main components: // In your generation function const config = MyModelConfigSchema.parse(request.config || {}); ``` - - + + ```go func validateConfig(config *MyModelConfig) error { if config.Temperature < 0 || config.Temperature > 2 { @@ -939,8 +940,8 @@ A model plugin consists of three main components: return nil, fmt.Errorf("invalid configuration: %w", err) } ``` - - + + ```python from pydantic import BaseModel, Field, validator @@ -955,15 +956,15 @@ A model plugin consists of three main components: raise ValueError('temperature must be between 0 and 2') return v ``` - - + + ## Testing Your Model Plugin ### Unit Testing - - + + ```ts import { describe, it, expect, beforeEach } from 'vitest'; import { genkit } from 'genkit'; @@ -1009,8 +1010,8 @@ A model plugin consists of three main components: }); }); ``` - - + + ```go package mymodelplugin_test @@ -1060,8 +1061,8 @@ A model plugin consists of three main components: }) } ``` - - + + ```python import pytest import asyncio @@ -1109,8 +1110,8 @@ A model plugin consists of three main components: assert "tool_calls" in response ``` - - + + ## Next Steps diff --git a/src/content/docs/unified-docs/plugin-authoring/overview.mdx b/src/content/docs/unified-docs/plugin-authoring/overview.mdx index f120fb90..8eef22d7 100644 --- a/src/content/docs/unified-docs/plugin-authoring/overview.mdx +++ b/src/content/docs/unified-docs/plugin-authoring/overview.mdx @@ -3,7 +3,8 @@ title: Writing Genkit Plugins description: Learn how to extend Genkit's capabilities by writing custom plugins across JavaScript, Go, and Python, covering plugin creation, models, retrievers, and publishing. --- -import { Tabs, TabItem } from '@astrojs/starlight/components'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; Genkit's capabilities are designed to be extended by plugins. Genkit plugins are configurable modules that can provide models, retrievers, indexers, trace stores, and more. You've already seen plugins in action just by using Genkit - every AI provider, vector database, and framework integration is implemented as a plugin. @@ -16,8 +17,8 @@ Plugins in Genkit follow a consistent architecture across all languages, providi - **Extend functionality**: Add new capabilities to the Genkit ecosystem - **Maintain consistency**: Follow established patterns for reliability and discoverability - - + + In JavaScript, plugins are created using the `genkitPlugin` helper: ```ts @@ -30,8 +31,8 @@ Plugins in Genkit follow a consistent architecture across all languages, providi ``` The Vertex AI plugin takes configuration and registers models, embedders, and more with the Genkit registry, which powers the local UI and serves as a lookup service for named actions at runtime. - - + + In Go, plugins implement the `genkit.Plugin` interface: ```go @@ -52,8 +53,8 @@ Plugins in Genkit follow a consistent architecture across all languages, providi ``` Plugins register resources with unique identifiers to prevent naming conflicts with other plugins. - - + + In Python, plugins are classes that extend the base plugin functionality: ```python @@ -68,15 +69,15 @@ Plugins in Genkit follow a consistent architecture across all languages, providi ``` Python plugins follow similar patterns to JavaScript and Go, providing consistent APIs across languages. - - + + ## Creating a Plugin ### Project Setup - - + + Create a new NPM package for your plugin: ```bash @@ -143,8 +144,8 @@ Plugins in Genkit follow a consistent architecture across all languages, providi ); } ``` - - + + Create a Go package that implements the `genkit.Plugin` interface: ```go @@ -204,8 +205,8 @@ Plugins in Genkit follow a consistent architecture across all languages, providi ), ) ``` - - + + Create a Python package with a plugin class: ```python @@ -264,8 +265,8 @@ Plugins in Genkit follow a consistent architecture across all languages, providi ], ) ``` - - + + ## Plugin Configuration Best Practices @@ -273,8 +274,8 @@ Plugins in Genkit follow a consistent architecture across all languages, providi For any plugin options that require secret values (API keys, tokens), provide both configuration options and environment variable defaults: - - + + ```ts interface MyPluginOptions { apiKey?: string; @@ -304,8 +305,8 @@ For any plugin options that require secret values (API keys, tokens), provide bo }); } ``` - - + + ```go type MyPlugin struct { APIKey string @@ -339,8 +340,8 @@ For any plugin options that require secret values (API keys, tokens), provide bo return g.DefineModel(/* ... */) } ``` - - + + ```python class MyPlugin(Plugin): def __init__( @@ -369,8 +370,8 @@ For any plugin options that require secret values (API keys, tokens), provide bo timeout=self.timeout ) ``` - - + + ## Plugin Types @@ -398,8 +399,8 @@ Configure observability and monitoring for Genkit applications. ### Package Naming and Keywords - - + + Use the `genkitx-{name}` naming convention and include relevant keywords in your `package.json`: ```json @@ -426,8 +427,8 @@ Configure observability and monitoring for Genkit applications. - `genkit-telemetry`: If your plugin provides telemetry - `genkit-deploy`: If your plugin includes deployment helpers - `genkit-flow`: If your plugin enhances flows - - + + Use descriptive package names that include "genkit" for discoverability: ``` @@ -440,8 +441,8 @@ Configure observability and monitoring for Genkit applications. - Configuration options - Usage examples - API documentation - - + + Use the `genkit-{name}` naming convention and include relevant classifiers in your `setup.py` or `pyproject.toml`: ```toml @@ -456,8 +457,8 @@ Configure observability and monitoring for Genkit applications. "Topic :: Scientific/Engineering :: Artificial Intelligence", ] ``` - - + + ## Next Steps diff --git a/src/content/docs/unified-docs/plugins/anthropic.mdx b/src/content/docs/unified-docs/plugins/anthropic.mdx index 10f45dea..ad4f1401 100644 --- a/src/content/docs/unified-docs/plugins/anthropic.mdx +++ b/src/content/docs/unified-docs/plugins/anthropic.mdx @@ -3,14 +3,15 @@ title: Anthropic (Claude) Plugin description: Learn how to use Anthropic's Claude models with Genkit across JavaScript, Go, and Python for advanced reasoning, analysis, and conversational AI. --- -import { Tabs, TabItem } from '@astrojs/starlight/components'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; Anthropic's Claude models are known for their advanced reasoning capabilities, safety features, and nuanced understanding of complex topics. Claude excels at analysis, writing, math, coding, and thoughtful conversation while maintaining helpful, harmless, and honest interactions. ## Installation and Setup - - + + Claude models are available in JavaScript through Vertex AI Model Garden. You'll need access to Claude models in your Google Cloud project. Install the Vertex AI plugin: @@ -53,8 +54,8 @@ Anthropic's Claude models are known for their advanced reasoning capabilities, s - **claude-3-haiku**: Fast and efficient for simple tasks - **claude-3-sonnet**: Balanced performance and capability - **claude-3-opus**: Most capable for complex reasoning - - + + Claude models are available in Go through the OpenAI-compatible Anthropic plugin. Install the required packages: @@ -106,8 +107,8 @@ Anthropic's Claude models are known for their advanced reasoning capabilities, s - **claude-3-5-sonnet-20240620**: Balanced Claude 3.5 Sonnet - **claude-3-opus-20240229**: Most capable Claude 3 model - **claude-3-haiku-20240307**: Fastest Claude 3 model - - + + Claude models are currently not directly supported in Python Genkit. However, you can access Claude through: 1. **Vertex AI Model Garden** (if available in your region) @@ -139,15 +140,15 @@ Anthropic's Claude models are known for their advanced reasoning capabilities, s # or export ANTHROPIC_API_KEY=your_anthropic_api_key ``` - - + + ## Basic Usage ### Text Generation - - + + Use Claude models for text generation through Vertex AI: ```ts @@ -202,8 +203,8 @@ Anthropic's Claude models are known for their advanced reasoning capabilities, s }, ); ``` - - + + Use Claude models with the Anthropic plugin: ```go @@ -256,8 +257,8 @@ Anthropic's Claude models are known for their advanced reasoning capabilities, s fmt.Println(reasoningResp.Text()) } ``` - - + + Use Claude models through available integrations: ```python @@ -300,15 +301,15 @@ Anthropic's Claude models are known for their advanced reasoning capabilities, s ) return response.text ``` - - + + ## Advanced Features ### Complex Reasoning - - + + Leverage Claude's reasoning capabilities: ```ts @@ -407,8 +408,8 @@ Be thorough and consider multiple perspectives.`, }, ); ``` - - + + Leverage Claude's reasoning capabilities: ```go @@ -520,8 +521,8 @@ Be thorough and consider multiple perspectives.`, scenario) }, nil } ``` - - + + Leverage Claude's reasoning capabilities: ```python @@ -624,13 +625,13 @@ Be thorough and consider multiple perspectives.""" print(f"Ethical analysis failed: {error}") return {"ethical_considerations": [], "stakeholders": [], "recommendations": ""} ``` - - + + ### Conversational AI - - + + Build sophisticated conversational applications: ```ts @@ -675,8 +676,8 @@ Be thorough and consider multiple perspectives.""" }, ); ``` - - + + Build sophisticated conversational applications: ```go @@ -725,8 +726,8 @@ Be thorough and consider multiple perspectives.""" return resp.Text(), nil } ``` - - + + Build sophisticated conversational applications: ```python @@ -771,8 +772,8 @@ Be thorough and consider multiple perspectives.""" print(f"Conversation failed: {error}") return "I'm sorry, I couldn't process your message at the moment." ``` - - + + ## Model Comparison diff --git a/src/content/docs/unified-docs/plugins/deepseek.mdx b/src/content/docs/unified-docs/plugins/deepseek.mdx index 92f28aec..09d1fba9 100644 --- a/src/content/docs/unified-docs/plugins/deepseek.mdx +++ b/src/content/docs/unified-docs/plugins/deepseek.mdx @@ -3,14 +3,15 @@ title: DeepSeek Plugin description: Learn how to use DeepSeek's advanced AI models with Genkit across JavaScript, Go, and Python, including reasoning models, code generation, and cost-effective solutions. --- -import { Tabs, TabItem } from '@astrojs/starlight/components'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; The DeepSeek plugin provides access to DeepSeek's powerful AI models, including their advanced reasoning models and cost-effective solutions. DeepSeek models are known for their strong performance in coding, mathematics, and reasoning tasks. ## Installation and Setup - - + + Install the DeepSeek plugin: ```bash @@ -45,8 +46,8 @@ The DeepSeek plugin provides access to DeepSeek's powerful AI models, including ``` Get your API key from [DeepSeek Platform](https://platform.deepseek.com/). - - + + For Go applications, use the OpenAI-compatible client with DeepSeek endpoints: ```go @@ -82,8 +83,8 @@ The DeepSeek plugin provides access to DeepSeek's powerful AI models, including ```bash export DEEPSEEK_API_KEY=your_deepseek_api_key ``` - - + + For Python applications, use the OpenAI-compatible client: ```bash @@ -112,15 +113,15 @@ The DeepSeek plugin provides access to DeepSeek's powerful AI models, including ```bash export DEEPSEEK_API_KEY=your_deepseek_api_key ``` - - + + ## Basic Usage ### Text Generation - - + + Use DeepSeek models for text generation: ```ts @@ -165,8 +166,8 @@ The DeepSeek plugin provides access to DeepSeek's powerful AI models, including }, }); ``` - - + + Use DeepSeek models with the generation API: ```go @@ -206,8 +207,8 @@ The DeepSeek plugin provides access to DeepSeek's powerful AI models, including fmt.Println(reasoningResp.Text()) } ``` - - + + Use DeepSeek models with the generation API: ```python @@ -243,13 +244,13 @@ The DeepSeek plugin provides access to DeepSeek's powerful AI models, including ) print(reasoning_response.text) ``` - - + + ### Code Generation - - + + Use DeepSeek for code generation and programming tasks: ```ts @@ -305,8 +306,8 @@ The DeepSeek plugin provides access to DeepSeek's powerful AI models, including }, ); ``` - - + + Use DeepSeek for code generation and programming tasks: ```go @@ -348,8 +349,8 @@ The DeepSeek plugin provides access to DeepSeek's powerful AI models, including return resp.Text(), nil } ``` - - + + Use DeepSeek for code generation and programming tasks: ```python @@ -387,15 +388,15 @@ The DeepSeek plugin provides access to DeepSeek's powerful AI models, including return {'review': review, 'suggestions': suggestions} ``` - - + + ## Advanced Features ### Mathematical Reasoning - - + + Leverage DeepSeek's mathematical reasoning capabilities: ```ts @@ -455,8 +456,8 @@ The DeepSeek plugin provides access to DeepSeek's powerful AI models, including }, ); ``` - - + + Leverage DeepSeek's mathematical reasoning capabilities: ```go @@ -519,8 +520,8 @@ The DeepSeek plugin provides access to DeepSeek's powerful AI models, including return resp.Text(), nil } ``` - - + + Leverage DeepSeek's mathematical reasoning capabilities: ```python @@ -561,13 +562,13 @@ The DeepSeek plugin provides access to DeepSeek's powerful AI models, including return response.text ``` - - + + ### Conversational AI - - + + Build conversational applications with DeepSeek: ```ts @@ -649,8 +650,8 @@ The DeepSeek plugin provides access to DeepSeek's powerful AI models, including }, ); ``` - - + + Build conversational applications with DeepSeek: ```go @@ -699,8 +700,8 @@ The DeepSeek plugin provides access to DeepSeek's powerful AI models, including return resp.Text(), nil } ``` - - + + Build conversational applications with DeepSeek: ```python @@ -743,8 +744,8 @@ The DeepSeek plugin provides access to DeepSeek's powerful AI models, including return response.text ``` - - + + ## Model Comparison @@ -758,8 +759,8 @@ The DeepSeek plugin provides access to DeepSeek's powerful AI models, including ### Performance Characteristics - - + + ```ts // Performance comparison example const performanceTest = async () => { @@ -785,8 +786,8 @@ The DeepSeek plugin provides access to DeepSeek's powerful AI models, including console.log(`General length: ${generalResponse.text.length}, Coder length: ${coderResponse.text.length}`); }; ``` - - + + ```go func performanceTest(ctx context.Context) { prompt := "Explain the time complexity of quicksort algorithm" @@ -812,8 +813,8 @@ The DeepSeek plugin provides access to DeepSeek's powerful AI models, including len(generalResp.Text()), len(coderResp.Text())) } ``` - - + + ```python import time @@ -839,15 +840,15 @@ The DeepSeek plugin provides access to DeepSeek's powerful AI models, including print(f"General: {general_time:.2f}s, Coder: {coder_time:.2f}s") print(f"General length: {len(general_response.text)}, Coder length: {len(coder_response.text)}") ``` - - + + ## Advanced Configuration ### Custom Model Configuration - - + + ```ts // Advanced configuration with passthrough options const response = await ai.generate({ @@ -882,8 +883,8 @@ The DeepSeek plugin provides access to DeepSeek's powerful AI models, including const config = environmentConfig[process.env.NODE_ENV || 'development']; ``` - - + + ```go // Advanced configuration resp, err := genkit.Generate(ctx, g, @@ -901,8 +902,8 @@ The DeepSeek plugin provides access to DeepSeek's powerful AI models, including }), ) ``` - - + + ```python # Advanced configuration response = await ai.generate( @@ -920,8 +921,8 @@ The DeepSeek plugin provides access to DeepSeek's powerful AI models, including } ) ``` - - + + ## Best Practices @@ -941,8 +942,8 @@ The DeepSeek plugin provides access to DeepSeek's powerful AI models, including ### Error Handling - - + + ```ts const robustDeepSeekFlow = ai.defineFlow( { @@ -978,8 +979,8 @@ The DeepSeek plugin provides access to DeepSeek's powerful AI models, including }, ); ``` - - + + ```go func robustDeepSeekGenerate(ctx context.Context, query string) (string, error) { resp, err := genkit.Generate(ctx, g, @@ -1012,8 +1013,8 @@ The DeepSeek plugin provides access to DeepSeek's powerful AI models, including return resp.Text(), nil } ``` - - + + ```python async def robust_deepseek_generate(query: str) -> str: try: @@ -1039,8 +1040,8 @@ The DeepSeek plugin provides access to DeepSeek's powerful AI models, including return fallback_response.text raise error ``` - - + + ## Next Steps diff --git a/src/content/docs/unified-docs/plugins/google-ai.mdx b/src/content/docs/unified-docs/plugins/google-ai.mdx index 4b4ecec7..3c27e258 100644 --- a/src/content/docs/unified-docs/plugins/google-ai.mdx +++ b/src/content/docs/unified-docs/plugins/google-ai.mdx @@ -3,14 +3,15 @@ title: Google AI plugin description: Learn how to use Google's Gemini models with Genkit across JavaScript, Go, and Python, including text generation, embeddings, TTS, video generation, and context caching. --- -import { Tabs, TabItem } from '@astrojs/starlight/components'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; The Google AI plugin provides interfaces to Google's Gemini models through the [Gemini API](https://ai.google.dev/docs/gemini_api_overview), offering powerful text generation, embeddings, text-to-speech, video generation, and context caching capabilities. ## Installation and Setup - - + + Install the Google AI plugin: ```bash @@ -27,8 +28,8 @@ The Google AI plugin provides interfaces to Google's Gemini models through the [ plugins: [googleAI()], }); ``` - - + + The Google AI plugin is included with the Genkit Go package: ```go @@ -47,8 +48,8 @@ The Google AI plugin provides interfaces to Google's Gemini models through the [ } } ``` - - + + Install the Google AI plugin: ```bash @@ -65,15 +66,15 @@ The Google AI plugin provides interfaces to Google's Gemini models through the [ plugins=[GoogleGenai()], ) ``` - - + + ## API Key Configuration The plugin requires an API key for the Gemini API, which you can get from [Google AI Studio](https://aistudio.google.com/app/apikey). - - + + Configure your API key by doing one of the following: - Set the `GEMINI_API_KEY` environment variable: @@ -89,8 +90,8 @@ The plugin requires an API key for the Gemini API, which you can get from [Googl :::caution Don't embed your API key directly in code! Use environment variables or a service like Cloud Secret Manager. ::: - - + + Set the `GEMINI_API_KEY` environment variable: ```bash @@ -98,8 +99,8 @@ The plugin requires an API key for the Gemini API, which you can get from [Googl ``` The plugin will automatically use this environment variable. - - + + Set the `GEMINI_API_KEY` environment variable: ```bash @@ -107,13 +108,13 @@ The plugin requires an API key for the Gemini API, which you can get from [Googl ``` The plugin will automatically use this environment variable. - - + + ## Basic Usage - - + + Use the helper functions to reference models and embedders: ```ts @@ -141,8 +142,8 @@ The plugin requires an API key for the Gemini API, which you can get from [Googl content: 'Hello world', }); ``` - - + + Use the models directly with the generation API: ```go @@ -174,8 +175,8 @@ The plugin requires an API key for the Gemini API, which you can get from [Googl fmt.Println(resp.Text()) } ``` - - + + Use the models with the generation API: ```python @@ -197,13 +198,13 @@ The plugin requires an API key for the Gemini API, which you can get from [Googl content='Hello world', ) ``` - - + + ## Working with Files - - + + You can use files uploaded to the Gemini Files API: ```ts @@ -228,8 +229,8 @@ The plugin requires an API key for the Gemini API, which you can get from [Googl ], }); ``` - - + + File handling in Go requires using the Google AI SDK directly for file uploads, then referencing the files in Genkit: ```go @@ -244,8 +245,8 @@ The plugin requires an API key for the Gemini API, which you can get from [Googl }), ) ``` - - + + File handling in Python requires using the Google AI SDK for uploads: ```python @@ -265,13 +266,13 @@ The plugin requires an API key for the Gemini API, which you can get from [Googl model=google_genai_name('gemini-2.5-flash'), ) ``` - - + + ## Fine-tuned Models - - + + You can use models fine-tuned with the Google Gemini API. Follow the instructions from the [Gemini API](https://ai.google.dev/gemini-api/docs/model-tuning/tutorial?lang=python) or fine-tune using [AI Studio](https://aistudio.corp.google.com/app/tune). When calling a tuned model, use the tuned model's ID directly: @@ -282,8 +283,8 @@ The plugin requires an API key for the Gemini API, which you can get from [Googl model: googleAI.model('tunedModels/my-example-model-apbm8oqbvuv2'), }); ``` - - + + Use fine-tuned models by specifying the tuned model ID: ```go @@ -292,8 +293,8 @@ The plugin requires an API key for the Gemini API, which you can get from [Googl ai.WithModelName("googleai/tunedModels/my-example-model-apbm8oqbvuv2"), ) ``` - - + + Use fine-tuned models by specifying the tuned model ID: ```python @@ -302,13 +303,13 @@ The plugin requires an API key for the Gemini API, which you can get from [Googl model=google_genai_name('tunedModels/my-example-model-apbm8oqbvuv2'), ) ``` - - + + ## Text-to-Speech (TTS) - - + + Generate audio using the Gemini TTS model: ```ts @@ -369,8 +370,8 @@ The plugin requires an API key for the Gemini API, which you can get from [Googl Speaker2: "I thought it was a framework."`, }); ``` - - + + Text-to-speech functionality is currently available primarily in JavaScript. For Go applications, you would need to: 1. Use the Google AI SDK directly for TTS functionality @@ -382,8 +383,8 @@ The plugin requires an API key for the Gemini API, which you can get from [Googl // Consider using Google Cloud Text-to-Speech API or // a JavaScript service for TTS functionality ``` - - + + Text-to-speech functionality is currently available primarily in JavaScript. For Python applications, you would need to: 1. Use the Google AI SDK directly for TTS functionality @@ -395,13 +396,13 @@ The plugin requires an API key for the Gemini API, which you can get from [Googl # Consider using Google Cloud Text-to-Speech API or # a JavaScript service for TTS functionality ``` - - + + ## Video Generation (Veo) - - + + Generate videos using the Veo models: ```ts @@ -461,8 +462,8 @@ The plugin requires an API key for the Gemini API, which you can get from [Googl }, }); ``` - - + + Video generation functionality is currently available primarily in JavaScript. For Go applications, you would need to: 1. Use the Google AI SDK directly for video generation @@ -474,8 +475,8 @@ The plugin requires an API key for the Gemini API, which you can get from [Googl // Consider using the Google AI SDK directly or // a JavaScript service for video generation functionality ``` - - + + Video generation functionality is currently available primarily in JavaScript. For Python applications, you would need to: 1. Use the Google AI SDK directly for video generation @@ -487,13 +488,13 @@ The plugin requires an API key for the Gemini API, which you can get from [Googl # Consider using the Google AI SDK directly or # a JavaScript service for video generation functionality ``` - - + + ## Context Caching - - + + Context caching allows models to reuse previously cached content to optimize performance: ```ts @@ -551,8 +552,8 @@ The plugin requires an API key for the Gemini API, which you can get from [Googl prompt: 'Analyze the relationship between Pierre and Natasha.', }); ``` - - + + Context caching functionality is currently available primarily in JavaScript. For Go applications, you would need to implement caching manually or use the Google AI SDK directly. ```go @@ -560,8 +561,8 @@ The plugin requires an API key for the Gemini API, which you can get from [Googl // Consider implementing your own caching layer or // using the Google AI SDK directly for caching functionality ``` - - + + Context caching functionality is currently available primarily in JavaScript. For Python applications, you would need to implement caching manually or use the Google AI SDK directly. ```python @@ -569,8 +570,8 @@ The plugin requires an API key for the Gemini API, which you can get from [Googl # Consider implementing your own caching layer or # using the Google AI SDK directly for caching functionality ``` - - + + ## Available Models diff --git a/src/content/docs/unified-docs/plugins/mcp.mdx b/src/content/docs/unified-docs/plugins/mcp.mdx index 8ab8f73f..b45c9d49 100644 --- a/src/content/docs/unified-docs/plugins/mcp.mdx +++ b/src/content/docs/unified-docs/plugins/mcp.mdx @@ -3,7 +3,8 @@ title: Model Context Protocol (MCP) Plugin description: Learn how to integrate MCP servers with Genkit across JavaScript, Go, and Python for extensible tool and resource management. --- -import { Tabs, TabItem } from '@astrojs/starlight/components'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; The Model Context Protocol (MCP) plugin enables integration between Genkit and the [Model Context Protocol](https://modelcontextprotocol.io), an open standard for connecting AI applications with external tools, resources, and prompts. MCP allows you to: @@ -13,8 +14,8 @@ The Model Context Protocol (MCP) plugin enables integration between Genkit and t ## Installation and Setup - - + + Install the MCP plugin: ```bash @@ -47,8 +48,8 @@ The Model Context Protocol (MCP) plugin enables integration between Genkit and t }, }); ``` - - + + Import the MCP package: ```bash @@ -95,8 +96,8 @@ The Model Context Protocol (MCP) plugin enables integration between Genkit and t } } ``` - - + + Install the MCP plugin: ```bash @@ -132,15 +133,15 @@ The Model Context Protocol (MCP) plugin enables integration between Genkit and t if __name__ == "__main__": asyncio.run(main()) ``` - - + + ## MCP Client Usage ### Connecting to MCP Servers - - + + #### Multiple Servers with MCP Host ```ts @@ -208,8 +209,8 @@ The Model Context Protocol (MCP) plugin enables integration between Genkit and t await fsClient.disable(); ``` - - + + #### Multiple Servers with Manager ```go @@ -289,8 +290,8 @@ The Model Context Protocol (MCP) plugin enables integration between Genkit and t log.Fatal(err) } ``` - - + + #### Multiple Servers ```python @@ -354,13 +355,13 @@ The Model Context Protocol (MCP) plugin enables integration between Genkit and t await client.disconnect() ``` - - + + ### Using MCP Tools and Resources - - + + ```ts // Get tools from specific servers const fsTools = await mcpHost.getActiveTools(ai, ['fs']); @@ -386,8 +387,8 @@ The Model Context Protocol (MCP) plugin enables integration between Genkit and t // Tool responses are automatically processed // Raw responses can be enabled with rawToolResponses: true ``` - - + + ```go // Get tools from specific server timeTool, err := manager.GetTool(ctx, g, "time", "get_current_time") @@ -435,8 +436,8 @@ The Model Context Protocol (MCP) plugin enables integration between Genkit and t log.Fatal(err) } ``` - - + + ```python # Get tools from specific servers fs_tools = await mcp_plugin.get_tools_from_server("filesystem") @@ -468,15 +469,15 @@ The Model Context Protocol (MCP) plugin enables integration between Genkit and t # Disconnect server await mcp_plugin.disconnect_server("weather") ``` - - + + ## MCP Server Usage ### Exposing Genkit as MCP Server - - + + ```ts import { createMcpServer } from '@genkit-ai/mcp'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; @@ -539,8 +540,8 @@ The Model Context Protocol (MCP) plugin enables integration between Genkit and t await server.server?.connect(transport); }); ``` - - + + ```go package main @@ -590,8 +591,8 @@ The Model Context Protocol (MCP) plugin enables integration between Genkit and t } } ``` - - + + ```python import asyncio from genkit.ai import Genkit @@ -645,15 +646,15 @@ The Model Context Protocol (MCP) plugin enables integration between Genkit and t if __name__ == "__main__": asyncio.run(main()) ``` - - + + ## Advanced Configuration ### Transport Options - - + + ```ts // Stdio transport (default) const mcpHost = createMcpHost({ @@ -692,8 +693,8 @@ The Model Context Protocol (MCP) plugin enables integration between Genkit and t }, }); ``` - - + + ```go // Stdio transport client, err := mcp.NewGenkitMCPClient(mcp.MCPClientOptions{ @@ -722,8 +723,8 @@ The Model Context Protocol (MCP) plugin enables integration between Genkit and t }, }) ``` - - + + ```python # Stdio transport mcp_plugin = MCPPlugin( @@ -761,13 +762,13 @@ The Model Context Protocol (MCP) plugin enables integration between Genkit and t }, ) ``` - - + + ### Error Handling and Lifecycle Management - - + + ```ts const mcpHost = createMcpHost({ name: 'robust-mcp-client', @@ -803,8 +804,8 @@ The Model Context Protocol (MCP) plugin enables integration between Genkit and t await mcpHost.close(); } ``` - - + + ```go manager, err := mcp.NewMCPManager(mcp.MCPManagerOptions{ Name: "robust-app", @@ -850,8 +851,8 @@ The Model Context Protocol (MCP) plugin enables integration between Genkit and t return } ``` - - + + ```python async def robust_mcp_usage(): mcp_plugin = MCPPlugin( @@ -887,15 +888,15 @@ The Model Context Protocol (MCP) plugin enables integration between Genkit and t # Clean up connections await mcp_plugin.close() ``` - - + + ## Testing and Development ### Testing Your MCP Server - - + + ```bash # Test with MCP Inspector npx @modelcontextprotocol/inspector node dist/server.js @@ -928,8 +929,8 @@ The Model Context Protocol (MCP) plugin enables integration between Genkit and t await client.disable(); ``` - - + + ```bash # Build your server go build -o server main.go @@ -976,8 +977,8 @@ The Model Context Protocol (MCP) plugin enables integration between Genkit and t client.Close() } ``` - - + + ```bash # Test with MCP Inspector npx @modelcontextprotocol/inspector python server.py @@ -1014,8 +1015,8 @@ The Model Context Protocol (MCP) plugin enables integration between Genkit and t if __name__ == "__main__": asyncio.run(test_server()) ``` - - + + ## Best Practices diff --git a/src/content/docs/unified-docs/plugins/ollama.mdx b/src/content/docs/unified-docs/plugins/ollama.mdx index f3458f89..6d3335b1 100644 --- a/src/content/docs/unified-docs/plugins/ollama.mdx +++ b/src/content/docs/unified-docs/plugins/ollama.mdx @@ -3,7 +3,8 @@ title: Ollama plugin description: Learn how to use Ollama for local AI models with Genkit across JavaScript, Go, and Python, including setup, configuration, and usage for both text generation and embeddings. --- -import { Tabs, TabItem } from '@astrojs/starlight/components'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; The Ollama plugin provides interfaces to local LLMs supported by [Ollama](https://ollama.com/), enabling you to run powerful AI models locally without requiring cloud API keys or internet connectivity. @@ -22,8 +23,8 @@ Before using the Ollama plugin, you need to install and run the Ollama server lo ## Installation and Setup - - + + Install the Ollama plugin: ```bash @@ -54,8 +55,8 @@ Before using the Ollama plugin, you need to install and run the Ollama server lo ], }); ``` - - + + The Ollama plugin is available through the Ollama package: ```go @@ -81,8 +82,8 @@ Before using the Ollama plugin, you need to install and run the Ollama server lo } } ``` - - + + Install the Ollama plugin: ```bash @@ -105,13 +106,13 @@ Before using the Ollama plugin, you need to install and run the Ollama server lo )], ) ``` - - + + ## Basic Usage - - + + Use Ollama models for text generation: ```ts @@ -149,8 +150,8 @@ Before using the Ollama plugin, you need to install and run the Ollama server lo }, ); ``` - - + + Use Ollama models with the generation API: ```go @@ -187,8 +188,8 @@ Before using the Ollama plugin, you need to install and run the Ollama server lo fmt.Println(resp.Text()) } ``` - - + + Use Ollama models with the generation API: ```python @@ -220,13 +221,13 @@ Before using the Ollama plugin, you need to install and run the Ollama server lo } ) ``` - - + + ## Embeddings - - + + Use Ollama for text embeddings: ```ts @@ -281,8 +282,8 @@ Before using the Ollama plugin, you need to install and run the Ollama server lo }, ); ``` - - + + Generate embeddings using Ollama models: ```go @@ -306,8 +307,8 @@ Before using the Ollama plugin, you need to install and run the Ollama server lo fmt.Printf("Generated %d-dimensional embedding\n", len(embeddings)) ``` - - + + Generate embeddings using Ollama models: ```python @@ -329,13 +330,13 @@ Before using the Ollama plugin, you need to install and run the Ollama server lo print(f"Generated {len(embeddings)}-dimensional embedding") ``` - - + + ## Authentication and Remote Deployments - - + + For remote Ollama deployments that require authentication: ### Static Headers @@ -400,8 +401,8 @@ Before using the Ollama plugin, you need to install and run the Ollama server lo plugins: [ollama(ollamaConfig)], }); ``` - - + + For remote Ollama deployments with authentication: ```go @@ -418,8 +419,8 @@ Before using the Ollama plugin, you need to install and run the Ollama server lo }), ) ``` - - + + For remote Ollama deployments with authentication: ```python @@ -436,15 +437,15 @@ Before using the Ollama plugin, you need to install and run the Ollama server lo )], ) ``` - - + + ## Model Configuration ### Model Types - - + + Configure different model types for different use cases: ```ts @@ -468,8 +469,8 @@ Before using the Ollama plugin, you need to install and run the Ollama server lo ], }); ``` - - + + Configure different model types: ```go @@ -486,8 +487,8 @@ Before using the Ollama plugin, you need to install and run the Ollama server lo }), ) ``` - - + + Configure different model types: ```python @@ -504,8 +505,8 @@ Before using the Ollama plugin, you need to install and run the Ollama server lo )], ) ``` - - + + ## Popular Models diff --git a/src/content/docs/unified-docs/plugins/openai.mdx b/src/content/docs/unified-docs/plugins/openai.mdx index a090daea..31ea1d8b 100644 --- a/src/content/docs/unified-docs/plugins/openai.mdx +++ b/src/content/docs/unified-docs/plugins/openai.mdx @@ -3,14 +3,15 @@ title: OpenAI plugin description: Learn how to use OpenAI models with Genkit across JavaScript, Go, and Python, including GPT models, DALL-E image generation, Whisper transcription, and text-to-speech capabilities. --- -import { Tabs, TabItem } from '@astrojs/starlight/components'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; The OpenAI plugin provides access to OpenAI's powerful AI models, including GPT for text generation, DALL-E for image generation, Whisper for speech transcription, and text-to-speech models. ## Installation and Setup - - + + Install the OpenAI plugin: ```bash @@ -31,8 +32,8 @@ The OpenAI plugin provides access to OpenAI's powerful AI models, including GPT :::note The OpenAI plugin is built on top of the `openAICompatible` plugin and is pre-configured for OpenAI's API endpoints. ::: - - + + The OpenAI plugin is available through the OpenAI-compatible plugin: ```go @@ -54,8 +55,8 @@ The OpenAI plugin provides access to OpenAI's powerful AI models, including GPT } } ``` - - + + Install the OpenAI plugin: ```bash @@ -72,15 +73,15 @@ The OpenAI plugin provides access to OpenAI's powerful AI models, including GPT plugins=[OpenAI()], ) ``` - - + + ## API Key Configuration The plugin requires an API key for the OpenAI API, which you can get from the [OpenAI Platform](https://platform.openai.com/api-keys). - - + + Configure your API key by doing one of the following: - Set the `OPENAI_API_KEY` environment variable: @@ -96,8 +97,8 @@ The plugin requires an API key for the OpenAI API, which you can get from the [O :::caution Don't embed your API key directly in code! Use environment variables or a service like Google Cloud Secret Manager. ::: - - + + Set the `OPENAI_API_KEY` environment variable: ```bash @@ -113,8 +114,8 @@ The plugin requires an API key for the OpenAI API, which you can get from the [O }), ) ``` - - + + Set the `OPENAI_API_KEY` environment variable: ```bash @@ -122,13 +123,13 @@ The plugin requires an API key for the OpenAI API, which you can get from the [O ``` The plugin will automatically use this environment variable. - - + + ## Text Generation - - + + Use OpenAI's GPT models for text generation: ```ts @@ -171,8 +172,8 @@ The plugin requires an API key for the OpenAI API, which you can get from the [O }, ); ``` - - + + Use OpenAI models with the generation API: ```go @@ -204,8 +205,8 @@ The plugin requires an API key for the OpenAI API, which you can get from the [O fmt.Println(resp.Text()) } ``` - - + + Use OpenAI models with the generation API: ```python @@ -232,13 +233,13 @@ The plugin requires an API key for the OpenAI API, which you can get from the [O } ) ``` - - + + ## Image Generation - - + + Generate images using DALL-E models: ```ts @@ -265,29 +266,29 @@ The plugin requires an API key for the OpenAI API, which you can get from the [O }, }); ``` - - + + Image generation requires custom implementation using the OpenAI API: ```go // Image generation requires custom implementation // Use the OpenAI Go SDK directly for DALL-E functionality ``` - - + + Image generation requires custom implementation using the OpenAI API: ```python # Image generation requires custom implementation # Use the OpenAI Python SDK directly for DALL-E functionality ``` - - + + ## Text Embeddings - - + + Generate text embeddings for vector search and similarity: ```ts @@ -327,8 +328,8 @@ The plugin requires an API key for the OpenAI API, which you can get from the [O ], }); ``` - - + + Generate embeddings using OpenAI models: ```go @@ -343,8 +344,8 @@ The plugin requires an API key for the OpenAI API, which you can get from the [O fmt.Printf("Generated %d-dimensional embedding\n", len(embeddings)) ``` - - + + Generate embeddings using OpenAI models: ```python @@ -356,15 +357,15 @@ The plugin requires an API key for the OpenAI API, which you can get from the [O print(f"Generated {len(embeddings)}-dimensional embedding") ``` - - + + ## Audio Processing ### Speech-to-Text (Whisper) - - + + Transcribe audio files using Whisper: ```ts @@ -405,29 +406,29 @@ The plugin requires an API key for the OpenAI API, which you can get from the [O ], }); ``` - - + + Audio transcription requires custom implementation using the OpenAI API: ```go // Audio transcription requires custom implementation // Use the OpenAI Go SDK directly for Whisper functionality ``` - - + + Audio transcription requires custom implementation using the OpenAI API: ```python # Audio transcription requires custom implementation # Use the OpenAI Python SDK directly for Whisper functionality ``` - - + + ### Text-to-Speech - - + + Generate speech from text: ```ts @@ -459,31 +460,31 @@ The plugin requires an API key for the OpenAI API, which you can get from the [O }, }); ``` - - + + Text-to-speech requires custom implementation using the OpenAI API: ```go // Text-to-speech requires custom implementation // Use the OpenAI Go SDK directly for TTS functionality ``` - - + + Text-to-speech requires custom implementation using the OpenAI API: ```python # Text-to-speech requires custom implementation # Use the OpenAI Python SDK directly for TTS functionality ``` - - + + ## Advanced Features ### Web Search Integration - - + + Some OpenAI models support web search capabilities: ```ts @@ -497,29 +498,29 @@ The plugin requires an API key for the OpenAI API, which you can get from the [O }, }); ``` - - + + Web search integration requires custom implementation: ```go // Web search requires custom implementation // Use the OpenAI API directly for search-enabled models ``` - - + + Web search integration requires custom implementation: ```python # Web search requires custom implementation # Use the OpenAI API directly for search-enabled models ``` - - + + ### Function Calling - - + + OpenAI models support function calling for tool integration: ```ts @@ -544,29 +545,29 @@ The plugin requires an API key for the OpenAI API, which you can get from the [O tools: [weatherTool], }); ``` - - + + Function calling is supported through Genkit's tool system: ```go // Define tools and use with OpenAI models // See tool calling documentation for implementation details ``` - - + + Function calling is supported through Genkit's tool system: ```python # Define tools and use with OpenAI models # See tool calling documentation for implementation details ``` - - + + ### Passthrough Configuration - - + + Access new models and features without updating Genkit: ```ts @@ -583,24 +584,24 @@ The plugin requires an API key for the OpenAI API, which you can get from the [O ``` Genkit passes this config as-is to the OpenAI API, giving you access to new model features. - - + + Passthrough configuration allows access to new OpenAI features: ```go // Custom configuration can be passed through to the OpenAI API // See OpenAI Go SDK documentation for available options ``` - - + + Passthrough configuration allows access to new OpenAI features: ```python # Custom configuration can be passed through to the OpenAI API # See OpenAI Python SDK documentation for available options ``` - - + + ## Available Models diff --git a/src/content/docs/unified-docs/plugins/vertex-ai.mdx b/src/content/docs/unified-docs/plugins/vertex-ai.mdx index 720f23a3..29d13cc1 100644 --- a/src/content/docs/unified-docs/plugins/vertex-ai.mdx +++ b/src/content/docs/unified-docs/plugins/vertex-ai.mdx @@ -3,7 +3,8 @@ title: Vertex AI plugin description: Learn how to use Google Cloud Vertex AI with Genkit across JavaScript, Go, and Python, including Gemini models, Imagen image generation, evaluation metrics, Vector Search, and text-to-speech capabilities. --- -import { Tabs, TabItem } from '@astrojs/starlight/components'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; The Vertex AI plugin provides interfaces to several Google Cloud AI services, offering enterprise-grade AI capabilities with advanced features like grounding, evaluation metrics, and vector search. @@ -19,8 +20,8 @@ The Vertex AI plugin provides access to: ## Installation and Setup - - + + Install the Vertex AI plugin: ```bash @@ -39,8 +40,8 @@ The Vertex AI plugin provides access to: plugins: [vertexAI({ location: 'us-central1' })], }); ``` - - + + The Vertex AI plugin is included with the Genkit Go package: ```go @@ -63,8 +64,8 @@ The Vertex AI plugin provides access to: } } ``` - - + + Vertex AI support in Python is available through the Google Cloud plugin: ```bash @@ -84,13 +85,13 @@ The Vertex AI plugin provides access to: )], ) ``` - - + + ## Authentication and Configuration - - + + The plugin requires: 1. **Google Cloud project ID**: Set via `projectId` in configuration or `GCLOUD_PROJECT` environment variable @@ -107,8 +108,8 @@ The Vertex AI plugin provides access to: - Other environments: see [Application Default Credentials](https://cloud.google.com/docs/authentication/provide-credentials-adc) docs **Required IAM role:** Vertex AI User (`roles/aiplatform.user`) - - + + Configure authentication and project settings: ```go @@ -126,8 +127,8 @@ The Vertex AI plugin provides access to: ``` **Required IAM role:** Vertex AI User (`roles/aiplatform.user`) - - + + Configure the plugin with your project details: ```python @@ -145,13 +146,13 @@ The Vertex AI plugin provides access to: ``` **Required IAM role:** Vertex AI User (`roles/aiplatform.user`) - - + + ## Basic Usage - - + + Use Vertex AI models for text generation: ```ts @@ -172,8 +173,8 @@ The Vertex AI plugin provides access to: content: 'How many widgets do you have in stock?', }); ``` - - + + Use Vertex AI models with the generation API: ```go @@ -208,8 +209,8 @@ The Vertex AI plugin provides access to: fmt.Println(resp.Text()) } ``` - - + + Use Vertex AI models with the generation API: ```python @@ -234,15 +235,15 @@ The Vertex AI plugin provides access to: content='How many widgets do you have in stock?', ) ``` - - + + ## Advanced Features ### Grounding with Google Search and Private Data - - + + Ground Gemini responses using Google Search or your own data: ```ts @@ -277,29 +278,29 @@ The Vertex AI plugin provides access to: :::caution[Pricing] Vertex AI charges additional fees for grounding requests. See [Vertex AI pricing](https://cloud.google.com/vertex-ai/generative-ai/pricing) for details. ::: - - + + Grounding functionality is available through the Vertex AI API. Implement using the Google Cloud SDK directly or through custom configuration: ```go // Grounding requires custom implementation using the Vertex AI API // See Google Cloud documentation for grounding configuration ``` - - + + Grounding functionality is available through the Vertex AI API. Implement using the Google Cloud SDK directly: ```python # Grounding requires custom implementation using the Vertex AI API # See Google Cloud documentation for grounding configuration ``` - - + + ### Image Generation with Imagen - - + + Generate images from text prompts: ```ts @@ -332,29 +333,29 @@ The Vertex AI plugin provides access to: }, }); ``` - - + + Image generation is available through the Vertex AI API. Implement using the Google Cloud SDK: ```go // Image generation requires custom implementation using the Vertex AI API // See Vertex AI Imagen documentation for implementation details ``` - - + + Image generation is available through the Vertex AI API. Implement using the Google Cloud SDK: ```python # Image generation requires custom implementation using the Vertex AI API # See Vertex AI Imagen documentation for implementation details ``` - - + + ### Multimodal Embeddings - - + + Generate embeddings from text, images, and video: ```ts @@ -379,8 +380,8 @@ The Vertex AI plugin provides access to: }, }); ``` - - + + Generate embeddings using Vertex AI models: ```go @@ -393,8 +394,8 @@ The Vertex AI plugin provides access to: log.Fatal(err) } ``` - - + + Generate embeddings using Vertex AI models: ```python @@ -404,13 +405,13 @@ The Vertex AI plugin provides access to: content='How many widgets do you have in stock?', ) ``` - - + + ## Model Garden Integration - - + + Access third-party models through Vertex AI Model Garden: ### Claude 3 Models @@ -475,29 +476,29 @@ The Vertex AI plugin provides access to: }, }); ``` - - + + Model Garden integration requires custom implementation using the Vertex AI API: ```go // Model Garden models require custom implementation // See Vertex AI Model Garden documentation for setup ``` - - + + Model Garden integration requires custom implementation using the Vertex AI API: ```python # Model Garden models require custom implementation # See Vertex AI Model Garden documentation for setup ``` - - + + ## Evaluation Metrics - - + + Use Vertex AI Rapid Evaluation API for model evaluation: ```ts @@ -534,29 +535,29 @@ The Vertex AI plugin provides access to: genkit eval:run genkit eval:flow -e vertexai/safety ``` - - + + Evaluation metrics are available through the Vertex AI API: ```go // Evaluation requires custom implementation using the Vertex AI API // See Vertex AI Rapid Evaluation documentation ``` - - + + Evaluation metrics are available through the Vertex AI API: ```python # Evaluation requires custom implementation using the Vertex AI API # See Vertex AI Rapid Evaluation documentation ``` - - + + ## Vector Search - - + + Use Vertex AI Vector Search for enterprise-grade vector operations: ### Setup @@ -620,29 +621,29 @@ The Vertex AI plugin provides access to: :::caution[Pricing] Vector Search has both ingestion and hosting costs. See [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing#vectorsearch) for details. ::: - - + + Vector Search integration requires custom implementation using the Vertex AI API: ```go // Vector Search requires custom implementation // See Vertex AI Vector Search documentation for setup ``` - - + + Vector Search integration requires custom implementation using the Vertex AI API: ```python # Vector Search requires custom implementation # See Vertex AI Vector Search documentation for setup ``` - - + + ## Text-to-Speech - - + + Generate high-quality speech from text: ```ts @@ -701,29 +702,29 @@ The Vertex AI plugin provides access to: Speaker2: "I thought it was a framework."`, }); ``` - - + + Text-to-speech functionality requires custom implementation using the Vertex AI API: ```go // TTS requires custom implementation using the Vertex AI API // See Vertex AI Speech Generation documentation ``` - - + + Text-to-speech functionality requires custom implementation using the Vertex AI API: ```python # TTS requires custom implementation using the Vertex AI API # See Vertex AI Speech Generation documentation ``` - - + + ## Context Caching - - + + Optimize performance with context caching for large inputs: ```ts @@ -753,24 +754,24 @@ The Vertex AI plugin provides access to: ``` **Supported models**: `gemini-2.5-flash-001`, `gemini-2.0-pro-001` - - + + Context caching requires custom implementation using the Vertex AI API: ```go // Context caching requires custom implementation // See Vertex AI Context Caching documentation ``` - - + + Context caching requires custom implementation using the Vertex AI API: ```python # Context caching requires custom implementation # See Vertex AI Context Caching documentation ``` - - + + ## Available Models diff --git a/src/content/docs/unified-docs/plugins/xai.mdx b/src/content/docs/unified-docs/plugins/xai.mdx index 40da4136..87fee3bd 100644 --- a/src/content/docs/unified-docs/plugins/xai.mdx +++ b/src/content/docs/unified-docs/plugins/xai.mdx @@ -3,14 +3,15 @@ title: xAI (Grok) Plugin description: Learn how to use xAI's Grok models with Genkit across JavaScript, Go, and Python, including text generation, image generation, and advanced configuration. --- -import { Tabs, TabItem } from '@astrojs/starlight/components'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; The xAI plugin provides access to xAI's powerful Grok family of models, including advanced text generation and image generation capabilities. Grok models are known for their real-time information access and conversational abilities. ## Installation and Setup - - + + Install the xAI plugin: ```bash @@ -45,8 +46,8 @@ The xAI plugin provides access to xAI's powerful Grok family of models, includin ``` Get your API key from [xAI Console](https://console.x.ai/). - - + + For Go applications, use the OpenAI-compatible client with xAI endpoints: ```go @@ -82,8 +83,8 @@ The xAI plugin provides access to xAI's powerful Grok family of models, includin ```bash export XAI_API_KEY=your_xai_api_key ``` - - + + For Python applications, use the OpenAI-compatible client: ```bash @@ -112,15 +113,15 @@ The xAI plugin provides access to xAI's powerful Grok family of models, includin ```bash export XAI_API_KEY=your_xai_api_key ``` - - + + ## Basic Usage ### Text Generation - - + + Use Grok models for text generation: ```ts @@ -165,8 +166,8 @@ The xAI plugin provides access to xAI's powerful Grok family of models, includin }, }); ``` - - + + Use Grok models with the generation API: ```go @@ -206,8 +207,8 @@ The xAI plugin provides access to xAI's powerful Grok family of models, includin fmt.Println(newsResp.Text()) } ``` - - + + Use Grok models with the generation API: ```python @@ -243,13 +244,13 @@ The xAI plugin provides access to xAI's powerful Grok family of models, includin ) print(news_response.text) ``` - - + + ### Image Generation - - + + Use Grok for image generation: ```ts @@ -292,8 +293,8 @@ The xAI plugin provides access to xAI's powerful Grok family of models, includin }, ); ``` - - + + Use Grok for image generation: ```go @@ -316,8 +317,8 @@ The xAI plugin provides access to xAI's powerful Grok family of models, includin fmt.Printf("Generated image URL: %s\n", imageResp.Media().URL) } ``` - - + + Use Grok for image generation: ```python @@ -336,15 +337,15 @@ The xAI plugin provides access to xAI's powerful Grok family of models, includin if image_response.media: print(f"Generated image URL: {image_response.media.url}") ``` - - + + ## Advanced Features ### Real-time Information Access - - + + Leverage Grok's real-time information capabilities: ```ts @@ -388,8 +389,8 @@ The xAI plugin provides access to xAI's powerful Grok family of models, includin }, ); ``` - - + + Leverage Grok's real-time information capabilities: ```go @@ -431,8 +432,8 @@ The xAI plugin provides access to xAI's powerful Grok family of models, includin return resp.Text(), nil } ``` - - + + Leverage Grok's real-time information capabilities: ```python @@ -460,13 +461,13 @@ The xAI plugin provides access to xAI's powerful Grok family of models, includin ) return response.text ``` - - + + ### Conversational AI - - + + Build conversational applications with Grok: ```ts @@ -535,8 +536,8 @@ The xAI plugin provides access to xAI's powerful Grok family of models, includin }, ); ``` - - + + Build conversational applications with Grok: ```go @@ -568,8 +569,8 @@ The xAI plugin provides access to xAI's powerful Grok family of models, includin return resp.Text(), nil } ``` - - + + Build conversational applications with Grok: ```python @@ -597,8 +598,8 @@ The xAI plugin provides access to xAI's powerful Grok family of models, includin return response.text ``` - - + + ## Model Comparison @@ -612,8 +613,8 @@ The xAI plugin provides access to xAI's powerful Grok family of models, includin ### Performance Characteristics - - + + ```ts // Performance comparison example const performanceTest = async () => { @@ -639,8 +640,8 @@ The xAI plugin provides access to xAI's powerful Grok family of models, includin console.log(`Mini length: ${miniResponse.text.length}, Full length: ${fullResponse.text.length}`); }; ``` - - + + ```go func performanceTest(ctx context.Context) { prompt := "Explain the impact of AI on modern society" @@ -666,8 +667,8 @@ The xAI plugin provides access to xAI's powerful Grok family of models, includin len(miniResp.Text()), len(fullResp.Text())) } ``` - - + + ```python import time @@ -693,15 +694,15 @@ The xAI plugin provides access to xAI's powerful Grok family of models, includin print(f"Mini: {mini_time:.2f}s, Full: {full_time:.2f}s") print(f"Mini length: {len(mini_response.text)}, Full length: {len(full_response.text)}") ``` - - + + ## Advanced Configuration ### Custom Model Configuration - - + + ```ts // Advanced configuration with passthrough options const response = await ai.generate({ @@ -736,8 +737,8 @@ The xAI plugin provides access to xAI's powerful Grok family of models, includin const config = environmentConfig[process.env.NODE_ENV || 'development']; ``` - - + + ```go // Advanced configuration resp, err := genkit.Generate(ctx, g, @@ -755,8 +756,8 @@ The xAI plugin provides access to xAI's powerful Grok family of models, includin }), ) ``` - - + + ```python # Advanced configuration response = await ai.generate( @@ -774,8 +775,8 @@ The xAI plugin provides access to xAI's powerful Grok family of models, includin } ) ``` - - + + ## Best Practices @@ -795,8 +796,8 @@ The xAI plugin provides access to xAI's powerful Grok family of models, includin ### Error Handling - - + + ```ts const robustGrokFlow = ai.defineFlow( { @@ -829,8 +830,8 @@ The xAI plugin provides access to xAI's powerful Grok family of models, includin }, ); ``` - - + + ```go func robustGrokGenerate(ctx context.Context, query string) (string, error) { resp, err := genkit.Generate(ctx, g, @@ -860,8 +861,8 @@ The xAI plugin provides access to xAI's powerful Grok family of models, includin return resp.Text(), nil } ``` - - + + ```python async def robust_grok_generate(query: str) -> str: try: @@ -884,8 +885,8 @@ The xAI plugin provides access to xAI's powerful Grok family of models, includin return fallback_response.text raise error ``` - - + + ## Next Steps diff --git a/src/content/docs/unified-docs/rag.mdx b/src/content/docs/unified-docs/rag.mdx index 49176150..e3dd4f11 100644 --- a/src/content/docs/unified-docs/rag.mdx +++ b/src/content/docs/unified-docs/rag.mdx @@ -3,7 +3,8 @@ title: Retrieval-augmented generation (RAG) description: Learn how Genkit simplifies retrieval-augmented generation (RAG) across JavaScript, Go, and Python by providing abstractions and plugins for indexers, embedders, and retrievers to incorporate external data into LLM responses. --- -import { Tabs, TabItem } from '@astrojs/starlight/components'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; Genkit provides abstractions that help you build retrieval-augmented generation (RAG) flows, as well as plugins that provide integrations with @@ -44,21 +45,21 @@ RAG is a very broad area and there are many different techniques used to achieve the best quality RAG. The core Genkit framework offers main abstractions to help you do RAG: - - + + - **Indexers**: add documents to an "index" - **Embedders**: transforms documents into a vector representation - **Retrievers**: retrieve documents from an "index", given a query - - + + - **Embedders**: transforms documents into a vector representation - **Retrievers**: retrieve documents from an "index", given a query - - + + - **Embedders**: transforms documents into a vector representation - **Retrievers**: retrieve documents from an "index", given a query - - + + These definitions are broad on purpose because Genkit is un-opinionated about what an "index" is or how exactly documents are retrieved from it. Genkit only @@ -67,8 +68,8 @@ indexer implementation provider. ### Indexers - - + + The index is responsible for keeping track of your documents in such a way that you can quickly retrieve relevant documents given a specific query. This is most often accomplished using a vector database, which indexes your documents using @@ -99,21 +100,21 @@ indexer implementation provider. with a stable source of data. On the other hand, if you are working with data that frequently changes, you might continuously run the ingestion flow (for example, in a Cloud Firestore trigger, whenever a document is updated). - - + + In Go, indexing is typically handled by your chosen vector database or storage solution. Genkit provides the abstractions for working with indexed documents, but the actual indexing process is implementation-specific to your storage backend. Users are expected to add their own functionality to index documents using their preferred vector database or storage solution. - - + + In Python, indexing is outside the scope of Genkit and you should use the SDKs/APIs provided by the vector store you are using. Genkit provides the abstractions for working with indexed documents through retrievers. - - + + ### Embedders @@ -138,8 +139,8 @@ your own. Genkit provides indexer and retriever support through its plugin system. The following plugins are officially supported: - - + + **Vector Databases:** - [Astra DB](/docs/plugins/astra-db) - DataStax Astra DB vector database - [Chroma DB](/docs/plugins/chroma) vector database @@ -155,24 +156,24 @@ following plugins are officially supported: **Embedding Models:** - Google AI and Vertex AI plugins provide text embedding models - - + + **Vector Databases:** - [Pinecone](/go/docs/plugins/pinecone) cloud vector database - PostgreSQL with [`pgvector`](/go/docs/plugins/pgvector) **Embedding Models:** - [Google Generative AI](/go/docs/plugins/google-genai) - Text embedding models - - + + **Vector Databases:** - Firestore Vector Store (via Firebase plugin) - Dev Local Vector Store (for development/testing) **Embedding Models:** - Google GenAI plugin provides text embedding models - - + + ## Defining a RAG Flow @@ -182,8 +183,8 @@ determines what food items are available. ### Install dependencies - - + + Install dependencies for processing PDFs: ```bash @@ -191,29 +192,29 @@ determines what food items are available. npm install --save-dev @types/pdf-parse ``` - - + + Install dependencies for text processing and PDF parsing: ```bash go get github.com/tmc/langchaingo/textsplitter go get github.com/ledongthuc/pdf ``` - - + + Install dependencies for your chosen vector store and PDF processing: ```bash pip install genkit[google-genai,firebase] # Add other dependencies as needed for PDF processing ``` - - + + ### Configure vector store - - + + Add a local vector store to your configuration: ```ts @@ -236,8 +237,8 @@ determines what food items are available. ], }); ``` - - + + Configure your Genkit instance with embedding support: ```go @@ -252,8 +253,8 @@ determines what food items are available. log.Fatal(err) } ``` - - + + Configure your Genkit instance with vector store support: ```python @@ -276,13 +277,13 @@ determines what food items are available. ], ) ``` - - + + ### Define an Indexer - - + + The following example shows how to create an indexer to ingest a collection of PDF documents and store them in a local vector database. @@ -382,8 +383,8 @@ determines what food items are available. ```bash genkit flow:run indexMenu '{"filePath": "menu.pdf"}' ``` - - + + #### Create chunking config This example uses the `textsplitter` library which provides a simple text @@ -476,8 +477,8 @@ determines what food items are available. ```bash genkit flow:run indexMenu '"menu.pdf"' ``` - - + + In Python, indexing is typically handled by your vector store's SDK. Here's an example of how you might structure an indexing flow: ```python @@ -500,8 +501,8 @@ determines what food items are available. ``` Note: Indexing is outside the scope of Genkit Python and should be done using your vector store's native SDK. - - + + After running the indexing flow, the vector database will be seeded with documents and ready to be used in Genkit flows with retrieval steps. @@ -510,8 +511,8 @@ documents and ready to be used in Genkit flows with retrieval steps. The following example shows how you might use a retriever in a RAG flow: - - + + ```ts import { devLocalRetrieverRef } from '@genkit-ai/dev-local-vectorstore'; import { googleAI } from '@genkit-ai/googleai'; @@ -558,8 +559,8 @@ The following example shows how you might use a retriever in a RAG flow: ```bash genkit flow:run menuQA '{"query": "Recommend a dessert from the menu while avoiding dairy and nuts"}' ``` - - + + ```go model := googlegenai.Model(g, "gemini-2.5-flash") @@ -597,8 +598,8 @@ The following example shows how you might use a retriever in a RAG flow: ```bash genkit flow:run menuQA '"Recommend a dessert from the menu while avoiding dairy and nuts"' ``` - - + + ```python @ai.flow() async def qa_flow(query: str): @@ -628,8 +629,8 @@ The following example shows how you might use a retriever in a RAG flow: result = await qa_flow('Recommend a dessert from the menu while avoiding dairy and nuts') print(result) ``` - - + + The output for this command should contain a response from the model, grounded in the indexed menu file. @@ -645,8 +646,8 @@ RAG techniques (such as reranking or prompt extensions) on top. ### Simple Retrievers - - + + Simple retrievers let you easily convert existing code into retrievers: ```ts @@ -672,14 +673,14 @@ RAG techniques (such as reranking or prompt extensions) on top. }, ); ``` - - + + ```go // Simple retriever example in Go // Implementation depends on your specific use case and data source ``` - - + + ```python from genkit.types import ( RetrieverRequest, @@ -703,13 +704,13 @@ RAG techniques (such as reranking or prompt extensions) on top. ai.define_retriever(name='my_retriever', fn=my_retriever) ``` - - + + ### Custom Retrievers - - + + ```ts import { CommonRetrieverOptionsSchema } from 'genkit/retriever'; import { z } from 'genkit'; @@ -750,8 +751,8 @@ RAG techniques (such as reranking or prompt extensions) on top. options: { preRerankK: 7, k: 3 }, }); ``` - - + + For example, suppose you have a custom re-ranking function you want to use. The following example defines a custom retriever that applies your function to the menu retriever defined earlier: @@ -793,8 +794,8 @@ RAG techniques (such as reranking or prompt extensions) on top. }, ) ``` - - + + ```python async def advanced_retriever(request: RetrieverRequest, ctx: ActionRunContext): """Example of an advanced retriever with custom logic.""" @@ -823,13 +824,13 @@ RAG techniques (such as reranking or prompt extensions) on top. retriever='advanced_retriever' ) ``` - - + + ## Rerankers and Two-Stage Retrieval - - + + A reranking model — also known as a cross-encoder — is a type of model that, given a query and document, will output a similarity score. We use this score to reorder the documents by relevance to our query. Reranker APIs take a list of @@ -920,16 +921,16 @@ RAG techniques (such as reranking or prompt extensions) on top. Once defined, this custom reranker can be used just like any other reranker in your RAG flows, giving you flexibility to implement advanced reranking strategies. - - + + Reranking functionality in Go can be implemented as part of custom retrievers. You can apply reranking logic within your custom retriever implementations. - - + + Reranking functionality in Python can be implemented as part of custom retrievers. You can apply reranking logic within your custom retriever implementations. - - + + ## Next steps diff --git a/src/content/docs/unified-docs/tool-calling.mdx b/src/content/docs/unified-docs/tool-calling.mdx index 1c12804d..22d3cf19 100644 --- a/src/content/docs/unified-docs/tool-calling.mdx +++ b/src/content/docs/unified-docs/tool-calling.mdx @@ -4,7 +4,8 @@ description: Learn how to enable LLMs to interact with external applications and --- import ExampleLink from '@/components/ExampleLink.astro'; -import { Tabs, TabItem } from '@astrojs/starlight/components'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; _Tool calling_, also known as _function calling_, is a structured way to give LLMs the ability to make requests back to the application that called it. You @@ -44,17 +45,17 @@ If you want to run the code examples on this page, first complete the steps in the Getting started guide for your language. All of the examples assume that you have already set up a project with Genkit dependencies installed. - - + + Complete the [Getting started](/docs/get-started) guide. - - + + Complete the [Get started](/go/docs/get-started-go) guide. - - + + Complete the [Get started](/python/docs/get-started) guide. - - + + This page discusses one of the advanced features of Genkit model abstraction, so before you dive too deeply, you should be familiar with the content on the @@ -113,8 +114,8 @@ supported. In addition: doesn't support it. - If the plugin exports model references, the model info will indicate if it supports tool calling. - - + + Check the `info.supports.tools` property on model references: ```ts @@ -123,31 +124,31 @@ supported. In addition: const model = googleAI.model('gemini-2.5-flash'); console.log(model.info.supports.tools); // true/false ``` - - + + Check the `ModelInfo.Supports.Tools` property: ```go // Model support information is available through the plugin // Check plugin documentation for specific model capabilities ``` - - + + Check the `info.supports.tools` property: ```python # Model support information is available through the plugin # Check plugin documentation for specific model capabilities ``` - - + + ### Defining tools Use the appropriate method for your language to define tools: - - + + Use the Genkit instance's `defineTool()` function: ```ts @@ -181,8 +182,8 @@ Use the appropriate method for your language to define tools: definition, take special care with the wording and descriptiveness of these parameters. They are vital for the LLM to make effective use of the available tools. - - + + Use the `genkit.DefineTool()` function: ```go @@ -228,8 +229,8 @@ Use the appropriate method for your language to define tools: The syntax here looks just like the `genkit.DefineFlow()` syntax; however, you must write a description. Take special care with the wording and descriptiveness of the description as it is vital for the LLM to decide to use it appropriately. - - + + Use the Genkit instance's `tool()` decorator: ```python @@ -256,15 +257,15 @@ Use the appropriate method for your language to define tools: parameter is required. When writing a tool definition, take special care with the wording and descriptiveness of these parameters. They are vital for the LLM to make effective use of the available tools. - - + + ### Using tools Include defined tools in your prompts to generate content: - - + + **Using `generate()`:** ```ts @@ -326,8 +327,8 @@ Include defined tools in your prompts to generate content: tools: [getWeather], }); ``` - - + + **Using `genkit.Generate()`:** ```go @@ -382,8 +383,8 @@ Include defined tools in your prompts to generate content: ai.WithInput(map[string]any{"location": "San Francisco"}), ) ``` - - + + **Using `generate()`:** ```python @@ -404,8 +405,8 @@ Include defined tools in your prompts to generate content: ) return result.text ``` - - + + Genkit will automatically handle the tool call if the LLM needs to use the tool to answer the prompt. @@ -413,8 +414,8 @@ Genkit will automatically handle the tool call if the LLM needs to use the tool When combining tool calling with streaming responses, you will receive `toolRequest` and `toolResponse` content parts in the chunks of the stream: - - + + ```ts const { stream } = ai.generateStream({ prompt: "What is the weather in Baltimore?", @@ -439,8 +440,8 @@ When combining tool calling with streaming responses, you will receive `toolRequ ``` You can use these chunks to dynamically construct the full generated message sequence. - - + + ```go resp, err := genkit.Generate(ctx, g, ai.WithPrompt("What is the weather in San Francisco?"), @@ -452,8 +453,8 @@ When combining tool calling with streaming responses, you will receive `toolRequ }), ) ``` - - + + ```python stream, response = ai.generate_stream( prompt='What is the weather in Baltimore?', @@ -463,8 +464,8 @@ When combining tool calling with streaming responses, you will receive `toolRequ async for chunk in stream: print(chunk) ``` - - + + ### Limiting Tool Call Iterations with `maxTurns` @@ -478,8 +479,8 @@ When working with tools that might trigger multiple sequential calls, you can co The default value is 5 turns, which works well for most scenarios. Each "turn" represents one complete cycle where the model can make tool calls and receive responses. - - + + **Example: Web Research Agent** Consider a research agent that might need to search multiple times to find comprehensive information: @@ -531,8 +532,8 @@ The default value is 5 turns, which works well for most scenarios. Each "turn" r maxTurns: 12, // Multiple stock lookups + calculations needed }); ``` - - + + ```go resp, err := genkit.Generate(ctx, g, ai.WithPrompt("Research the latest developments in quantum computing"), @@ -540,8 +541,8 @@ The default value is 5 turns, which works well for most scenarios. Each "turn" r ai.WithMaxTurns(8), // Allow up to 8 research iterations ) ``` - - + + ```python result = await ai.generate( prompt='Research the latest developments in quantum computing', @@ -549,8 +550,8 @@ The default value is 5 turns, which works well for most scenarios. Each "turn" r max_turns=8, # Allow up to 8 research iterations ) ``` - - + + **What happens when maxTurns is reached?** @@ -558,8 +559,8 @@ When the limit is hit, Genkit stops the tool-calling loop and returns the model' ### Dynamically defining tools at runtime - - + + As most things in Genkit tools need to be predefined during your app's initialization. This is necessary so that you would be able interact with your tools from the Genkit Dev UI. This is typically the recommended way. However @@ -634,20 +635,20 @@ When the limit is hit, Genkit stops the tool-calling loop and returns the model' outputJsonSchema: myOutputJsonSchema, }); ``` - - + + ```go // Dynamic tool definition in Go // Check Go documentation for specific implementation details ``` - - + + ```python # Dynamic tool definition in Python # Check Python documentation for specific implementation details ``` - - + + ### Pause the tool loop by using interrupts @@ -668,8 +669,8 @@ interrupts guide to learn how to use them. If you want full control over this tool-calling loop, for example to apply more complicated logic, you can handle tool calls explicitly: - - + + Set the `returnToolRequests` parameter to `true`. Now it's your responsibility to ensure all of the tool requests are fulfilled: ```ts @@ -715,8 +716,8 @@ apply more complicated logic, you can handle tool calls explicitly: generateOptions.prompt = toolResponses; } ``` - - + + Set the `WithReturnToolRequests()` option to `true`. Now it's your responsibility to ensure all of the tool requests are fulfilled: ```go @@ -764,8 +765,8 @@ apply more complicated logic, you can handle tool calls explicitly: log.Fatal(err) } ``` - - + + Set the `return_tool_requests` parameter to `True`. Now it's your responsibility to ensure all of the tool requests are fulfilled: ```python @@ -783,8 +784,8 @@ apply more complicated logic, you can handle tool calls explicitly: for part in tool_request_parts: await handle_tool(part.name, part.input) ``` - - + + ## Extending Tool Capabilities with MCP diff --git a/src/content/docs/unified-docs/vector-databases/astra-db.mdx b/src/content/docs/unified-docs/vector-databases/astra-db.mdx index b2a3dfb2..86c8a365 100644 --- a/src/content/docs/unified-docs/vector-databases/astra-db.mdx +++ b/src/content/docs/unified-docs/vector-databases/astra-db.mdx @@ -3,14 +3,15 @@ title: Astra DB Vector Database description: Learn how to use DataStax Astra DB with Genkit across JavaScript, Go, and Python for serverless vector storage, semantic search, and RAG applications. --- -import { Tabs, TabItem } from '@astrojs/starlight/components'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; DataStax Astra DB is a serverless vector database built on Apache Cassandra. It provides scalable vector storage with built-in embedding generation capabilities through Astra DB Vectorize, making it ideal for production AI applications that need reliable, distributed vector search. ## Installation and Setup - - + + Install the Astra DB plugin: ```bash @@ -77,8 +78,8 @@ DataStax Astra DB is a serverless vector database built on Apache Cassandra. It ], }); ``` - - + + For Go applications, you can use Astra DB through the DataStax Go driver: ```bash @@ -119,8 +120,8 @@ DataStax Astra DB is a serverless vector database built on Apache Cassandra. It } } ``` - - + + For Python applications, install the Astra DB client: ```bash @@ -150,15 +151,15 @@ DataStax Astra DB is a serverless vector database built on Apache Cassandra. It ], ) ``` - - + + ## Basic Usage ### Document Indexing - - + + Index documents for semantic search: ```ts @@ -208,8 +209,8 @@ DataStax Astra DB is a serverless vector database built on Apache Cassandra. It }); } ``` - - + + Index documents for semantic search: ```go @@ -273,8 +274,8 @@ DataStax Astra DB is a serverless vector database built on Apache Cassandra. It return nil } ``` - - + + Index documents for semantic search: ```python @@ -340,13 +341,13 @@ DataStax Astra DB is a serverless vector database built on Apache Cassandra. It return {"indexed": total_indexed, "success": total_indexed == len(docs)} ``` - - + + ### Document Retrieval - - + + Retrieve relevant documents using semantic search: ```ts @@ -393,8 +394,8 @@ DataStax Astra DB is a serverless vector database built on Apache Cassandra. It }, }); ``` - - + + Retrieve relevant documents using semantic search: ```go @@ -443,8 +444,8 @@ DataStax Astra DB is a serverless vector database built on Apache Cassandra. It return advancedRetrieve(ctx, query, 3, filter) } ``` - - + + Retrieve relevant documents using semantic search: ```python @@ -503,15 +504,15 @@ DataStax Astra DB is a serverless vector database built on Apache Cassandra. It filter_criteria=filter_criteria ) ``` - - + + ## Advanced Features ### Hybrid Search with Filtering - - + + Combine vector similarity with metadata filtering: ```ts @@ -561,8 +562,8 @@ DataStax Astra DB is a serverless vector database built on Apache Cassandra. It }, }); ``` - - + + Combine vector similarity with metadata filtering: ```go @@ -617,8 +618,8 @@ DataStax Astra DB is a serverless vector database built on Apache Cassandra. It return advancedRetrieve(ctx, query, 3, filter) } ``` - - + + Combine vector similarity with metadata filtering: ```python @@ -685,8 +686,8 @@ DataStax Astra DB is a serverless vector database built on Apache Cassandra. It "official_docs": official_docs } ``` - - + + ## Best Practices diff --git a/src/content/docs/unified-docs/vector-databases/chromadb.mdx b/src/content/docs/unified-docs/vector-databases/chromadb.mdx index fb906912..4028ffff 100644 --- a/src/content/docs/unified-docs/vector-databases/chromadb.mdx +++ b/src/content/docs/unified-docs/vector-databases/chromadb.mdx @@ -3,14 +3,15 @@ title: ChromaDB Vector Database description: Learn how to use ChromaDB with Genkit across JavaScript, Go, and Python for vector storage, semantic search, and RAG applications. --- -import { Tabs, TabItem } from '@astrojs/starlight/components'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; ChromaDB is an open-source vector database designed for AI applications. It provides efficient vector storage, similarity search, and metadata filtering capabilities. ChromaDB can run in-memory, as a standalone server, or in client/server mode, making it flexible for both development and production use. ## Installation and Setup - - + + Install the ChromaDB plugin: ```bash @@ -58,8 +59,8 @@ ChromaDB is an open-source vector database designed for AI applications. It prov ], }); ``` - - + + For Go applications, you can use ChromaDB through the Go client: ```bash @@ -97,8 +98,8 @@ ChromaDB is an open-source vector database designed for AI applications. It prov } } ``` - - + + For Python applications, install the ChromaDB client: ```bash @@ -125,15 +126,15 @@ ChromaDB is an open-source vector database designed for AI applications. It prov ], ) ``` - - + + ## Basic Usage ### Document Indexing - - + + Index documents for semantic search: ```ts @@ -176,8 +177,8 @@ ChromaDB is an open-source vector database designed for AI applications. It prov documents, }); ``` - - + + Index documents for semantic search: ```go @@ -219,8 +220,8 @@ ChromaDB is an open-source vector database designed for AI applications. It prov return nil } ``` - - + + Index documents for semantic search: ```python @@ -261,13 +262,13 @@ ChromaDB is an open-source vector database designed for AI applications. It prov print(f"Indexing failed: {error}") return {"indexed": 0, "success": False} ``` - - + + ### Document Retrieval - - + + Retrieve relevant documents using semantic search: ```ts @@ -298,8 +299,8 @@ ChromaDB is an open-source vector database designed for AI applications. It prov }, }); ``` - - + + Retrieve relevant documents using semantic search: ```go @@ -335,8 +336,8 @@ ChromaDB is an open-source vector database designed for AI applications. It prov return docs, nil } ``` - - + + Retrieve relevant documents using semantic search: ```python @@ -380,15 +381,15 @@ ChromaDB is an open-source vector database designed for AI applications. It prov print(f"Advanced retrieval failed: {error}") return [] ``` - - + + ## ChromaDB Server Setup ### Running ChromaDB Server - - + + Start a ChromaDB server for production use: ```bash @@ -416,8 +417,8 @@ ChromaDB is an open-source vector database designed for AI applications. It prov ], }); ``` - - + + Connect to a ChromaDB server: ```go @@ -435,8 +436,8 @@ ChromaDB is an open-source vector database designed for AI applications. It prov ), ) ``` - - + + Connect to a ChromaDB server: ```python @@ -454,15 +455,15 @@ ChromaDB is an open-source vector database designed for AI applications. It prov ], ) ``` - - + + ## Advanced Features ### Metadata Filtering - - + + Use metadata filtering for precise retrieval: ```ts @@ -493,8 +494,8 @@ ChromaDB is an open-source vector database designed for AI applications. It prov }, }); ``` - - + + Use metadata filtering for precise retrieval: ```go @@ -516,8 +517,8 @@ ChromaDB is an open-source vector database designed for AI applications. It prov return docs, nil } ``` - - + + Use metadata filtering for precise retrieval: ```python @@ -537,8 +538,8 @@ ChromaDB is an open-source vector database designed for AI applications. It prov print(f"Category filtering failed: {error}") return [] ``` - - + + ## Best Practices diff --git a/src/content/docs/unified-docs/vector-databases/cloud-firestore.mdx b/src/content/docs/unified-docs/vector-databases/cloud-firestore.mdx index d2169b3a..0ffb3537 100644 --- a/src/content/docs/unified-docs/vector-databases/cloud-firestore.mdx +++ b/src/content/docs/unified-docs/vector-databases/cloud-firestore.mdx @@ -3,7 +3,8 @@ title: Cloud Firestore Vector Search description: Learn how to use Google Cloud Firestore as a vector database for RAG applications across JavaScript, Go, and Python with Genkit. --- -import { Tabs, TabItem } from '@astrojs/starlight/components'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; Cloud Firestore provides native vector search capabilities, making it an excellent choice for RAG (Retrieval-Augmented Generation) applications. Firestore's vector search feature allows you to store and query high-dimensional vector embeddings alongside your document data, providing fast and scalable similarity search. @@ -17,8 +18,8 @@ Cloud Firestore provides native vector search capabilities, making it an excelle ## Installation and Setup - - + + Install the Firebase plugin: ```bash @@ -37,8 +38,8 @@ Cloud Firestore provides native vector search capabilities, making it an excelle const firestore = getFirestore(app); ``` - - + + Install the Firebase plugin: ```bash @@ -58,8 +59,8 @@ Cloud Firestore provides native vector search capabilities, making it an excelle ProjectId: "your-firebase-project-id", } ``` - - + + Install the Firebase plugin: ```bash @@ -75,8 +76,8 @@ Cloud Firestore provides native vector search capabilities, making it an excelle # Initialize Firestore client firestore_client = firestore.Client(project="your-firebase-project-id") ``` - - + + ## Prerequisites @@ -91,8 +92,8 @@ Cloud Firestore provides native vector search capabilities, making it an excelle ### Authentication - - + + Set up authentication using one of these methods: **Option 1: Service Account Key** @@ -125,8 +126,8 @@ Cloud Firestore provides native vector search capabilities, making it an excelle firestore.settings(authOptions); } ``` - - + + **Local Development:** ```bash firebase login @@ -150,8 +151,8 @@ Cloud Firestore provides native vector search capabilities, making it an excelle App: app, } ``` - - + + **Local Development:** ```bash gcloud auth application-default login @@ -176,15 +177,15 @@ Cloud Firestore provides native vector search capabilities, making it an excelle credentials=credentials ) ``` - - + + ## Basic Usage ### Defining a Firestore Retriever - - + + ```ts import { genkit } from 'genkit'; import { googleAI } from '@genkit-ai/googleai'; @@ -209,8 +210,8 @@ Cloud Firestore provides native vector search capabilities, making it an excelle distanceMeasure: 'COSINE', // Options: 'COSINE', 'EUCLIDEAN', 'DOT_PRODUCT' }); ``` - - + + ```go import ( "github.com/firebase/genkit/go/genkit" @@ -248,8 +249,8 @@ Cloud Firestore provides native vector search capabilities, making it an excelle } } ``` - - + + ```python from genkit.ai import Genkit from genkit.plugins.firebase.firestore import FirestoreVectorStore @@ -273,13 +274,13 @@ Cloud Firestore provides native vector search capabilities, making it an excelle ] ) ``` - - + + ### Retrieving Documents - - + + ```ts // Basic retrieval const docs = await ai.retrieve({ @@ -306,8 +307,8 @@ Cloud Firestore provides native vector search capabilities, making it an excelle console.log('Retrieved documents:', docs); ``` - - + + ```go // Basic retrieval results, err := ai.Retrieve(ctx, retriever, ai.WithDocs("What is machine learning?")) @@ -333,8 +334,8 @@ Cloud Firestore provides native vector search capabilities, making it an excelle fmt.Printf("Answer: %s\n", resp.Text()) ``` - - + + ```python from genkit.ai import Document @@ -376,8 +377,8 @@ Cloud Firestore provides native vector search capabilities, making it an excelle # answer = asyncio.run(rag_query("What is machine learning?")) # print(answer) ``` - - + + ## Data Indexing @@ -400,8 +401,8 @@ Your Firestore documents should follow this structure: ### Populating the Index - - + + ```ts import { chunk } from 'llm-chunk'; import { FieldValue } from 'firebase-admin/firestore'; @@ -465,8 +466,8 @@ Your Firestore documents should follow this structure: await batch.commit(); } ``` - - + + ```go import ( "context" @@ -560,8 +561,8 @@ Your Firestore documents should follow this structure: return err } ``` - - + + ```python from genkit.ai import Document from genkit.types import TextPart @@ -637,15 +638,15 @@ Your Firestore documents should follow this structure: # ] # asyncio.run(index_documents(ai, documents, 'documents')) ``` - - + + ## Creating Vector Indexes Firestore requires vector indexes for efficient similarity search. Create the index using the `gcloud` CLI: - - + + ```bash # For text-embedding-004 (768 dimensions) gcloud alpha firestore indexes composite create \ @@ -672,8 +673,8 @@ Firestore requires vector indexes for efficient similarity search. Create the in console.error('Index required:', error.message); } ``` - - + + ```bash # Create vector index for your collection gcloud alpha firestore indexes composite create \ @@ -688,8 +689,8 @@ Firestore requires vector indexes for efficient similarity search. Create the in ```bash gcloud firestore indexes composite list --project=your-firebase-project-id ``` - - + + ```bash # Create the vector index gcloud firestore indexes composite create \ @@ -717,15 +718,15 @@ Firestore requires vector indexes for efficient similarity search. Create the in # Error message will contain the exact gcloud command needed print(f"Index required: {error}") ``` - - + + ## Advanced Configuration ### Retrieval Options - - + + ```ts const retriever = defineFirestoreRetriever(ai, { name: 'advancedRetriever', @@ -752,8 +753,8 @@ Firestore requires vector indexes for efficient similarity search. Create the in }, }); ``` - - + + ```go // Advanced retriever configuration retriever, err := firebase.DefineRetriever(ctx, g, firebase.RetrieverOptions{ @@ -776,8 +777,8 @@ Firestore requires vector indexes for efficient similarity search. Create the in fmt.Printf("Document: %s\n", doc.Content[0].Text) } ``` - - + + ```python # Advanced configuration ai = Genkit( @@ -812,13 +813,13 @@ Firestore requires vector indexes for efficient similarity search. Create the in return results ``` - - + + ### Multiple Collections - - + + ```ts // Define multiple retrievers for different collections const techRetriever = defineFirestoreRetriever(ai, { @@ -850,8 +851,8 @@ Firestore requires vector indexes for efficient similarity search. Create the in }); } ``` - - + + ```go // Define multiple retrievers techRetriever, err := firebase.DefineRetriever(ctx, g, firebase.RetrieverOptions{ @@ -884,8 +885,8 @@ Firestore requires vector indexes for efficient similarity search. Create the in return ai.Retrieve(ctx, retriever, ai.WithDocs(query)) } ``` - - + + ```python # Define multiple retrievers ai = Genkit( @@ -920,8 +921,8 @@ Firestore requires vector indexes for efficient similarity search. Create the in retriever=retriever_name, ) ``` - - + + ## Best Practices diff --git a/src/content/docs/unified-docs/vector-databases/cloud-sql-postgresql.mdx b/src/content/docs/unified-docs/vector-databases/cloud-sql-postgresql.mdx index a0e62f57..c97eb0ee 100644 --- a/src/content/docs/unified-docs/vector-databases/cloud-sql-postgresql.mdx +++ b/src/content/docs/unified-docs/vector-databases/cloud-sql-postgresql.mdx @@ -3,14 +3,15 @@ title: Cloud SQL for PostgreSQL Vector Database description: Learn how to use Google Cloud SQL for PostgreSQL with pgvector extension and Genkit across JavaScript, Go, and Python for managed vector storage and semantic search. --- -import { Tabs, TabItem } from '@astrojs/starlight/components'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; Google Cloud SQL for PostgreSQL with the pgvector extension provides a fully managed PostgreSQL database with vector search capabilities. It combines the reliability and scalability of Google Cloud with the power of PostgreSQL and pgvector, making it ideal for production AI applications that need managed vector storage with enterprise-grade features. ## Installation and Setup - - + + Install the Cloud SQL PostgreSQL plugin: ```bash @@ -94,8 +95,8 @@ Google Cloud SQL for PostgreSQL with the pgvector extension provides a fully man gcloud sql connect my-instance --user=postgres --database=my-database # Then run: CREATE EXTENSION IF NOT EXISTS vector; ``` - - + + For Go applications, you can use Cloud SQL through the Google Cloud SQL Go connector: ```bash @@ -167,8 +168,8 @@ Google Cloud SQL for PostgreSQL with the pgvector extension provides a fully man } } ``` - - + + For Python applications, install the Cloud SQL connector: ```bash @@ -223,15 +224,15 @@ Google Cloud SQL for PostgreSQL with the pgvector extension provides a fully man ], ) ``` - - + + ## Basic Usage ### Document Indexing - - + + Index documents with custom metadata handling: ```ts @@ -289,8 +290,8 @@ Google Cloud SQL for PostgreSQL with the pgvector extension provides a fully man }); } ``` - - + + Index documents with custom metadata handling: ```go @@ -362,8 +363,8 @@ Google Cloud SQL for PostgreSQL with the pgvector extension provides a fully man return nil } ``` - - + + Index documents with custom metadata handling: ```python @@ -433,13 +434,13 @@ Google Cloud SQL for PostgreSQL with the pgvector extension provides a fully man return {"indexed": total_indexed, "success": total_indexed == len(docs)} ``` - - + + ### Document Retrieval - - + + Retrieve documents with advanced filtering and distance strategies: ```ts @@ -496,8 +497,8 @@ Google Cloud SQL for PostgreSQL with the pgvector extension provides a fully man distanceStrategy: DistanceStrategy.DOT_PRODUCT, }); ``` - - + + Retrieve documents with advanced filtering and distance strategies: ```go @@ -570,8 +571,8 @@ Google Cloud SQL for PostgreSQL with the pgvector extension provides a fully man return nil } ``` - - + + Retrieve documents with advanced filtering and distance strategies: ```python @@ -643,15 +644,15 @@ Google Cloud SQL for PostgreSQL with the pgvector extension provides a fully man "total_results": len(basic_docs) + len(db_docs) + len(multi_docs) } ``` - - + + ## Advanced Features ### Custom Table Configuration - - + + Configure custom table schemas for specific use cases: ```ts @@ -701,8 +702,8 @@ Google Cloud SQL for PostgreSQL with the pgvector extension provides a fully man }, }); ``` - - + + Configure custom table schemas for specific use cases: ```go @@ -768,8 +769,8 @@ Google Cloud SQL for PostgreSQL with the pgvector extension provides a fully man return docs, nil } ``` - - + + Configure custom table schemas for specific use cases: ```python @@ -865,8 +866,8 @@ Google Cloud SQL for PostgreSQL with the pgvector extension provides a fully man filter_clause=filter_clause ) ``` - - + + ## Best Practices diff --git a/src/content/docs/unified-docs/vector-databases/dev-local-vectorstore.mdx b/src/content/docs/unified-docs/vector-databases/dev-local-vectorstore.mdx index 418d90eb..3c78202d 100644 --- a/src/content/docs/unified-docs/vector-databases/dev-local-vectorstore.mdx +++ b/src/content/docs/unified-docs/vector-databases/dev-local-vectorstore.mdx @@ -3,7 +3,8 @@ title: Dev Local Vector Store description: Learn how to use the Dev Local Vector Store for local development and testing across JavaScript, Go, and Python with Genkit. --- -import { Tabs, TabItem } from '@astrojs/starlight/components'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; The Dev Local Vector Store provides a simple, file-based vector database solution for local development and testing. It's designed to be lightweight and easy to set up, making it perfect for prototyping, testing, and development environments where you don't need the complexity of a full production vector database. @@ -21,8 +22,8 @@ The Dev Local Vector Store is intended for development and testing only. For pro ## Installation and Setup - - + + JavaScript doesn't have a dedicated dev local vector store plugin, but you can create a simple in-memory or file-based solution for development: ```bash @@ -108,8 +109,8 @@ The Dev Local Vector Store is intended for development and testing only. For pro googleAI.embedder('text-embedding-004') ); ``` - - + + Go doesn't have a dedicated dev local vector store plugin, but you can create a simple file-based solution: ```bash @@ -240,8 +241,8 @@ The Dev Local Vector Store is intended for development and testing only. For pro return dotProduct / (magnitudeA * magnitudeB) } ``` - - + + Install the Dev Local Vector Store plugin: ```bash @@ -265,15 +266,15 @@ The Dev Local Vector Store is intended for development and testing only. For pro ], ) ``` - - + + ## Basic Usage ### Indexing Documents - - + + ```ts // Index documents in the local store const documents = [ @@ -286,8 +287,8 @@ The Dev Local Vector Store is intended for development and testing only. For pro await localStore.index(documents); console.log('Documents indexed successfully'); ``` - - + + ```go // Initialize the local store embedder := googleAIPlugin.Embedder(g, "text-embedding-004") @@ -307,8 +308,8 @@ The Dev Local Vector Store is intended for development and testing only. For pro } fmt.Println("Documents indexed successfully") ``` - - + + ```python from genkit.types import Document @@ -327,13 +328,13 @@ The Dev Local Vector Store is intended for development and testing only. For pro await DevLocalVectorStore.index('my_vectorstore', genkit_docs) print("Documents indexed successfully") ``` - - + + ### Retrieving Documents - - + + ```ts // Search for similar documents const query = 'What is artificial intelligence?'; @@ -360,8 +361,8 @@ The Dev Local Vector Store is intended for development and testing only. For pro const answer = await ragQuery('What is machine learning?'); console.log('Answer:', answer); ``` - - + + ```go // Search for similar documents query := "What is artificial intelligence?" @@ -405,8 +406,8 @@ The Dev Local Vector Store is intended for development and testing only. For pro } fmt.Printf("Answer: %s\n", answer) ``` - - + + ```python from genkit.types import Document @@ -451,15 +452,15 @@ The Dev Local Vector Store is intended for development and testing only. For pro answer = await rag_query("What is machine learning?") print(f"Answer: {answer}") ``` - - + + ## Advanced Configuration ### Custom Storage Location - - + + ```ts // Custom storage path const customStore = new DevLocalVectorStore( @@ -478,8 +479,8 @@ The Dev Local Vector Store is intended for development and testing only. For pro googleAI.embedder('text-embedding-004') ); ``` - - + + ```go // Custom storage paths techStore := NewDevLocalVectorStore("./data/tech-docs.json", embedder) @@ -508,8 +509,8 @@ The Dev Local Vector Store is intended for development and testing only. For pro log.Fatal(err) } ``` - - + + ```python # Multiple vector stores for different domains ai = Genkit( @@ -542,13 +543,13 @@ The Dev Local Vector Store is intended for development and testing only. For pro await DevLocalVectorStore.index('tech_docs', tech_docs) await DevLocalVectorStore.index('general_docs', general_docs) ``` - - + + ### Batch Operations - - + + ```ts // Batch indexing for better performance async function batchIndex(documents: string[], batchSize: number = 10) { @@ -566,8 +567,8 @@ The Dev Local Vector Store is intended for development and testing only. For pro await batchIndex(largeDocumentSet, 20); ``` - - + + ```go // Batch indexing function func batchIndex(ctx context.Context, store *DevLocalVectorStore, documents []string, batchSize int) error { @@ -598,8 +599,8 @@ The Dev Local Vector Store is intended for development and testing only. For pro log.Fatal(err) } ``` - - + + ```python # Batch indexing for large document sets async def batch_index(store_name: str, documents: list[str], batch_size: int = 20): @@ -617,8 +618,8 @@ The Dev Local Vector Store is intended for development and testing only. For pro await batch_index('my_vectorstore', large_document_set, 20) ``` - - + + ## Best Practices @@ -638,8 +639,8 @@ The Dev Local Vector Store is intended for development and testing only. For pro ### Migration to Production - - + + ```ts // Environment-based vector store selection const isProduction = process.env.NODE_ENV === 'production'; @@ -658,8 +659,8 @@ The Dev Local Vector Store is intended for development and testing only. For pro await vectorStore.index(documents); const results = await vectorStore.search(query, 5); ``` - - + + ```go // Environment-based store selection var store VectorStore @@ -674,8 +675,8 @@ The Dev Local Vector Store is intended for development and testing only. For pro err := store.Index(ctx, documents) results, err := store.Search(ctx, query, 5) ``` - - + + ```python import os @@ -709,8 +710,8 @@ The Dev Local Vector Store is intended for development and testing only. For pro query_doc = Document.from_text("search query") results = await ai.retrieve(query=query_doc, retriever=store_name) ``` - - + + ## Limitations diff --git a/src/content/docs/unified-docs/vector-databases/lancedb.mdx b/src/content/docs/unified-docs/vector-databases/lancedb.mdx index 6bf4898b..4ac30774 100644 --- a/src/content/docs/unified-docs/vector-databases/lancedb.mdx +++ b/src/content/docs/unified-docs/vector-databases/lancedb.mdx @@ -3,14 +3,15 @@ title: LanceDB Vector Database description: Learn how to use LanceDB with Genkit across JavaScript, Go, and Python for embedded vector storage, semantic search, and RAG applications. --- -import { Tabs, TabItem } from '@astrojs/starlight/components'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; LanceDB is an open-source vector database designed for AI applications. It provides embedded vector storage with high performance, making it ideal for applications that need fast vector similarity search without the complexity of managing a separate database server. ## Installation and Setup - - + + Install the LanceDB plugin: ```bash @@ -56,8 +57,8 @@ LanceDB is an open-source vector database designed for AI applications. It provi ], }); ``` - - + + For Go applications, you can use LanceDB through the Go client: ```bash @@ -95,8 +96,8 @@ LanceDB is an open-source vector database designed for AI applications. It provi } } ``` - - + + For Python applications, install the LanceDB client: ```bash @@ -123,15 +124,15 @@ LanceDB is an open-source vector database designed for AI applications. It provi ], ) ``` - - + + ## Basic Usage ### Document Indexing - - + + Index documents for semantic search: ```ts @@ -185,8 +186,8 @@ LanceDB is an open-source vector database designed for AI applications. It provi }); } ``` - - + + Index documents for semantic search: ```go @@ -259,8 +260,8 @@ LanceDB is an open-source vector database designed for AI applications. It provi return nil } ``` - - + + Index documents for semantic search: ```python @@ -329,13 +330,13 @@ LanceDB is an open-source vector database designed for AI applications. It provi return {"indexed": total_indexed, "success": total_indexed == len(docs)} ``` - - + + ### Document Retrieval - - + + Retrieve relevant documents using semantic search: ```ts @@ -371,8 +372,8 @@ LanceDB is an open-source vector database designed for AI applications. It provi }, }); ``` - - + + Retrieve relevant documents using semantic search: ```go @@ -433,8 +434,8 @@ LanceDB is an open-source vector database designed for AI applications. It provi return nil } ``` - - + + Retrieve relevant documents using semantic search: ```python @@ -494,15 +495,15 @@ LanceDB is an open-source vector database designed for AI applications. It provi print(f"Found {len(docs)} documents, {len(filtered_docs)} filtered") return docs, filtered_docs ``` - - + + ## Advanced Features ### Complete RAG Implementation - - + + Build a complete RAG system with document processing: ```ts @@ -627,8 +628,8 @@ LanceDB is an open-source vector database designed for AI applications. It provi }, ); ``` - - + + Build a complete RAG system with document processing: ```go @@ -740,8 +741,8 @@ LanceDB is an open-source vector database designed for AI applications. It provi return resp.Text(), sources, nil } ``` - - + + Build a complete RAG system with document processing: ```python @@ -879,8 +880,8 @@ LanceDB is an open-source vector database designed for AI applications. It provi else: return {"error": "Failed to index documents"} ``` - - + + ## Best Practices diff --git a/src/content/docs/unified-docs/vector-databases/neo4j.mdx b/src/content/docs/unified-docs/vector-databases/neo4j.mdx index c258dbea..39dca335 100644 --- a/src/content/docs/unified-docs/vector-databases/neo4j.mdx +++ b/src/content/docs/unified-docs/vector-databases/neo4j.mdx @@ -3,14 +3,15 @@ title: Neo4j Graph Vector Database description: Learn how to use Neo4j with Genkit across JavaScript, Go, and Python for graph-based vector storage, semantic search, and knowledge graph applications. --- -import { Tabs, TabItem } from '@astrojs/starlight/components'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; Neo4j is a graph database that combines the power of graph relationships with vector search capabilities. It enables you to store documents as nodes with vector embeddings while maintaining rich relationships between entities, making it ideal for knowledge graphs, recommendation systems, and complex AI applications that need both semantic search and graph traversal. ## Installation and Setup - - + + Install the Neo4j plugin: ```bash @@ -70,8 +71,8 @@ Neo4j is a graph database that combines the power of graph relationships with ve `vector.similarity_function`: 'cosine' }} ``` - - + + For Go applications, you can use Neo4j through the official Go driver: ```bash @@ -113,8 +114,8 @@ Neo4j is a graph database that combines the power of graph relationships with ve } } ``` - - + + For Python applications, install the Neo4j driver: ```bash @@ -145,15 +146,15 @@ Neo4j is a graph database that combines the power of graph relationships with ve ], ) ``` - - + + ## Basic Usage ### Document Indexing - - + + Index documents as graph nodes with vector embeddings: ```ts @@ -202,8 +203,8 @@ Neo4j is a graph database that combines the power of graph relationships with ve console.log('Documents indexed as graph nodes with embeddings'); }; ``` - - + + Index documents as graph nodes with vector embeddings: ```go @@ -257,8 +258,8 @@ Neo4j is a graph database that combines the power of graph relationships with ve return nil } ``` - - + + Index documents as graph nodes with vector embeddings: ```python @@ -310,13 +311,13 @@ Neo4j is a graph database that combines the power of graph relationships with ve print("Documents indexed as graph nodes with embeddings") return {"relationships_created": True} ``` - - + + ### Document Retrieval - - + + Retrieve relevant documents using vector similarity: ```ts @@ -356,8 +357,8 @@ Neo4j is a graph database that combines the power of graph relationships with ve displayName: 'Author-Specific Search', }); ``` - - + + Retrieve relevant documents using vector similarity: ```go @@ -412,8 +413,8 @@ Neo4j is a graph database that combines the power of graph relationships with ve return nil } ``` - - + + Retrieve relevant documents using vector similarity: ```python @@ -473,15 +474,15 @@ Neo4j is a graph database that combines the power of graph relationships with ve print(f"Found {len(docs)} documents, {len(advanced_docs)} advanced results") return docs, advanced_docs ``` - - + + ## Advanced Features ### Graph-Enhanced Retrieval - - + + Combine vector search with graph traversal for enhanced results: ```ts @@ -530,8 +531,8 @@ Neo4j is a graph database that combines the power of graph relationships with ve return JSON.parse(entities); }; ``` - - + + Combine vector search with graph traversal for enhanced results: ```go @@ -605,8 +606,8 @@ Neo4j is a graph database that combines the power of graph relationships with ve return result, nil } ``` - - + + Combine vector search with graph traversal for enhanced results: ```python @@ -685,8 +686,8 @@ Neo4j is a graph database that combines the power of graph relationships with ve "enhanced_results": enhanced_results } ``` - - + + ## Best Practices diff --git a/src/content/docs/unified-docs/vector-databases/pgvector.mdx b/src/content/docs/unified-docs/vector-databases/pgvector.mdx index 7b09d174..4165458a 100644 --- a/src/content/docs/unified-docs/vector-databases/pgvector.mdx +++ b/src/content/docs/unified-docs/vector-databases/pgvector.mdx @@ -3,7 +3,8 @@ title: pgvector (PostgreSQL Vector Extension) description: Learn how to use pgvector with Genkit across JavaScript, Go, and Python for vector storage, semantic search, and RAG applications using PostgreSQL. --- -import { Tabs, TabItem } from '@astrojs/starlight/components'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; pgvector is a PostgreSQL extension that adds vector similarity search capabilities to PostgreSQL databases. It provides efficient storage and querying of high-dimensional vectors, making it ideal for AI applications that need both relational and vector data in a single database. @@ -11,8 +12,8 @@ pgvector is a PostgreSQL extension that adds vector similarity search capabiliti ### PostgreSQL with pgvector Extension - - + + Install the required dependencies: ```bash @@ -59,8 +60,8 @@ pgvector is a PostgreSQL extension that adds vector similarity search capabiliti ssl: false, // Enable for production }); ``` - - + + Install the required dependencies: ```bash @@ -120,8 +121,8 @@ pgvector is a PostgreSQL extension that adds vector similarity search capabiliti defer db.Close() } ``` - - + + Install the required dependencies: ```bash @@ -171,15 +172,15 @@ pgvector is a PostgreSQL extension that adds vector similarity search capabiliti # Register pgvector types register_vector(conn) ``` - - + + ## Basic Usage ### Document Indexing - - + + Create a custom indexer for pgvector: ```ts @@ -240,8 +241,8 @@ pgvector is a PostgreSQL extension that adds vector similarity search capabiliti documents, }); ``` - - + + Create a custom indexer for pgvector: ```go @@ -317,8 +318,8 @@ pgvector is a PostgreSQL extension that adds vector similarity search capabiliti ) } ``` - - + + Create a custom indexer for pgvector: ```python @@ -389,13 +390,13 @@ pgvector is a PostgreSQL extension that adds vector similarity search capabiliti result = await indexer(documents) return result ``` - - + + ### Document Retrieval - - + + Create a custom retriever for pgvector: ```ts @@ -467,8 +468,8 @@ pgvector is a PostgreSQL extension that adds vector similarity search capabiliti }, }); ``` - - + + Create a custom retriever for pgvector: ```go @@ -552,8 +553,8 @@ pgvector is a PostgreSQL extension that adds vector similarity search capabiliti ) } ``` - - + + Create a custom retriever for pgvector: ```python @@ -640,15 +641,15 @@ pgvector is a PostgreSQL extension that adds vector similarity search capabiliti return docs ``` - - + + ## Advanced Features ### Hybrid Search (Vector + Text) - - + + Combine vector similarity with traditional text search: ```ts @@ -710,8 +711,8 @@ pgvector is a PostgreSQL extension that adds vector similarity search capabiliti }, ); ``` - - + + Combine vector similarity with traditional text search: ```go @@ -813,8 +814,8 @@ pgvector is a PostgreSQL extension that adds vector similarity search capabiliti ) } ``` - - + + Combine vector similarity with traditional text search: ```python @@ -892,8 +893,8 @@ pgvector is a PostgreSQL extension that adds vector similarity search capabiliti return hybrid_search ``` - - + + ## Best Practices diff --git a/src/content/docs/unified-docs/vector-databases/pinecone.mdx b/src/content/docs/unified-docs/vector-databases/pinecone.mdx index 49b1364d..21a18cc7 100644 --- a/src/content/docs/unified-docs/vector-databases/pinecone.mdx +++ b/src/content/docs/unified-docs/vector-databases/pinecone.mdx @@ -3,14 +3,15 @@ title: Pinecone Vector Database description: Learn how to use Pinecone cloud vector database with Genkit across JavaScript, Go, and Python for RAG applications, semantic search, and vector storage. --- -import { Tabs, TabItem } from '@astrojs/starlight/components'; +import LangTabs from '@/components/LangTabs.astro'; +import LangTabItem from '@/components/LangTabItem.astro'; Pinecone is a fully managed cloud vector database that provides high-performance vector search capabilities. The Pinecone integration with Genkit enables you to build powerful RAG (Retrieval-Augmented Generation) applications with semantic search, document indexing, and intelligent retrieval. ## Installation and Setup - - + + Install the Pinecone plugin: ```bash @@ -59,8 +60,8 @@ Pinecone is a fully managed cloud vector database that provides high-performance ``` Get your API key from [Pinecone Console](https://app.pinecone.io/). - - + + For Go applications, use the Pinecone Go client: ```bash @@ -104,8 +105,8 @@ Pinecone is a fully managed cloud vector database that provides high-performance ```bash export PINECONE_API_KEY=your_pinecone_api_key ``` - - + + For Python applications, install the Pinecone client: ```bash @@ -138,15 +139,15 @@ Pinecone is a fully managed cloud vector database that provides high-performance ```bash export PINECONE_API_KEY=your_pinecone_api_key ``` - - + + ## Basic Usage ### Document Indexing - - + + Index documents for semantic search: ```ts @@ -189,8 +190,8 @@ Pinecone is a fully managed cloud vector database that provides high-performance documents, }); ``` - - + + Index documents for semantic search: ```go @@ -232,8 +233,8 @@ Pinecone is a fully managed cloud vector database that provides high-performance return nil } ``` - - + + Index documents for semantic search: ```python @@ -274,13 +275,13 @@ Pinecone is a fully managed cloud vector database that provides high-performance print(f"Indexing failed: {error}") return {"indexed": 0, "success": False} ``` - - + + ### Document Retrieval - - + + Retrieve relevant documents using semantic search: ```ts @@ -311,8 +312,8 @@ Pinecone is a fully managed cloud vector database that provides high-performance }, }); ``` - - + + Retrieve relevant documents using semantic search: ```go @@ -348,8 +349,8 @@ Pinecone is a fully managed cloud vector database that provides high-performance return docs, nil } ``` - - + + Retrieve relevant documents using semantic search: ```python @@ -393,13 +394,13 @@ Pinecone is a fully managed cloud vector database that provides high-performance print(f"Advanced retrieval failed: {error}") return [] ``` - - + + ## RAG Implementation - - + + Build a complete RAG system with Pinecone: ```ts @@ -464,8 +465,8 @@ Answer:`, }, ); ``` - - + + Build a complete RAG system with Pinecone: ```go @@ -518,8 +519,8 @@ Answer:`, question, context) return resp.Text(), docs, nil } ``` - - + + Build a complete RAG system with Pinecone: ```python @@ -578,8 +579,8 @@ Answer:''' print(f"RAG query failed: {error}") return {"answer": "I'm sorry, I couldn't find relevant information.", "sources": []} ``` - - + + ## Best Practices diff --git a/src/scripts/language-preference.js b/src/scripts/language-preference.js deleted file mode 100644 index d432e2be..00000000 --- a/src/scripts/language-preference.js +++ /dev/null @@ -1,157 +0,0 @@ -/** - * Language Preference Enhancement for Astro Starlight - * - * This script enhances Starlight's built-in tab synchronization with: - * 1. Persistent storage of language preference in localStorage - * 2. Cross-page restoration of language preference - * 3. Automatic detection and synchronization of language tabs - */ - -class LanguagePreferenceEnhancer { - constructor() { - this.storageKey = 'genkit-preferred-language'; - this.languages = ['JavaScript', 'Go', 'Python']; - this.defaultLanguage = 'JavaScript'; - - // Wait for DOM to be ready - if (document.readyState === 'loading') { - document.addEventListener('DOMContentLoaded', () => this.init()); - } else { - this.init(); - } - } - - init() { - // Get stored preference or use default - const storedLanguage = localStorage.getItem(this.storageKey) || this.defaultLanguage; - - // Set up event listeners for tab clicks - this.setupTabListeners(); - - // Apply stored preference to all language tabs - this.restoreLanguagePreference(storedLanguage); - - // Watch for dynamically added content (e.g., navigation) - this.observeContentChanges(); - } - - setupTabListeners() { - // Listen for clicks on all tab buttons - document.addEventListener('click', (event) => { - const tabButton = event.target.closest('[role="tab"]'); - if (!tabButton) return; - - // Check if this is a language tab by looking at the text content - const tabText = tabButton.textContent.trim(); - if (this.languages.includes(tabText)) { - // Store the preference when a language tab is clicked - this.storeLanguagePreference(tabText); - } - }); - } - - storeLanguagePreference(language) { - if (!this.languages.includes(language)) { - console.warn(`Unknown language: ${language}`); - return; - } - - // Store preference - localStorage.setItem(this.storageKey, language); - } - - restoreLanguagePreference(language) { - if (!this.languages.includes(language)) { - console.warn(`Unknown language: ${language}, using default`); - language = this.defaultLanguage; - } - - // Find all language tabs and activate the preferred one - this.activateLanguageTabs(language); - } - - activateLanguageTabs(language) { - // Find all tab groups with syncKey="language" - const languageTabGroups = document.querySelectorAll('[role="tablist"]'); - - languageTabGroups.forEach(tabList => { - // Check if this tablist contains language tabs - const tabs = tabList.querySelectorAll('[role="tab"]'); - let hasLanguageTabs = false; - let targetTab = null; - - // Look for language tabs within this tablist - tabs.forEach(tab => { - const tabText = tab.textContent.trim(); - if (this.languages.includes(tabText)) { - hasLanguageTabs = true; - if (tabText === language) { - targetTab = tab; - } - } - }); - - // If this tablist has language tabs and we found our target, activate it - if (hasLanguageTabs && targetTab && !this.isTabActive(targetTab)) { - this.activateTab(targetTab); - } - }); - } - - isTabActive(tab) { - return tab.getAttribute('aria-selected') === 'true'; - } - - activateTab(tab) { - // Trigger a click event to let Starlight handle the tab activation - // This ensures we work with Starlight's existing tab system - tab.click(); - } - - observeContentChanges() { - // Watch for new content being added (e.g., navigation between pages) - const observer = new MutationObserver((mutations) => { - let hasNewTabs = false; - - mutations.forEach(mutation => { - mutation.addedNodes.forEach(node => { - if (node.nodeType === Node.ELEMENT_NODE) { - // Check if new tabs were added - if (node.querySelector && node.querySelector('[role="tablist"]')) { - hasNewTabs = true; - } - } - }); - }); - - if (hasNewTabs) { - // Apply current preference to new tabs after a short delay - // to ensure Starlight has finished initializing them - const currentLanguage = localStorage.getItem(this.storageKey) || this.defaultLanguage; - setTimeout(() => this.restoreLanguagePreference(currentLanguage), 100); - } - }); - - observer.observe(document.body, { - childList: true, - subtree: true - }); - } - - // Public method to get current preference - getCurrentLanguage() { - return localStorage.getItem(this.storageKey) || this.defaultLanguage; - } - - // Public method to manually set language - setLanguage(language) { - this.storeLanguagePreference(language); - this.restoreLanguagePreference(language); - } -} - -// Initialize the language preference enhancer -const languagePreferenceEnhancer = new LanguagePreferenceEnhancer(); - -// Make it globally available for debugging -window.languagePreferenceEnhancer = languagePreferenceEnhancer; diff --git a/src/scripts/language-preference.ts b/src/scripts/language-preference.ts new file mode 100644 index 00000000..ca8e786d --- /dev/null +++ b/src/scripts/language-preference.ts @@ -0,0 +1,149 @@ +type Language = 'js' | 'go' | 'python'; + +const LANGUAGES: Record = { + js: 'JavaScript', + go: 'Go', + python: 'Python', +}; + +const LANGUAGE_CODES = Object.keys(LANGUAGES) as Language[]; +const DEFAULT_LANGUAGE: Language = 'js'; + +class LanguagePreferenceEnhancer { + private storageKey = 'genkit-preferred-language'; + + constructor() { + if (document.readyState === 'loading') { + document.addEventListener('DOMContentLoaded', () => this.init()); + } else { + this.init(); + } + } + + init() { + const urlParams = new URLSearchParams(window.location.search); + const langFromUrl = urlParams.get('lang') as Language | null; + const storedLanguage = localStorage.getItem(this.storageKey) as Language | null; + + let language: Language; + + if (langFromUrl && LANGUAGE_CODES.includes(langFromUrl)) { + language = langFromUrl; + localStorage.setItem(this.storageKey, language); + } else if (storedLanguage && LANGUAGE_CODES.includes(storedLanguage)) { + language = storedLanguage; + this.updateUrl(language); + } else { + language = DEFAULT_LANGUAGE; + localStorage.setItem(this.storageKey, language); + this.updateUrl(language); + } + + this.setupTabListeners(); + this.restoreLanguagePreference(language); + this.observeContentChanges(); + } + + setupTabListeners() { + document.addEventListener('click', (event) => { + const tabButton = (event.target as HTMLElement).closest('[role="tab"]'); + if (!tabButton) return; + + const tabText = tabButton.textContent?.trim(); + if (tabText && Object.values(LANGUAGES).includes(tabText)) { + const langCode = (Object.keys(LANGUAGES) as Language[]).find((key) => LANGUAGES[key] === tabText); + if (langCode) { + this.storeLanguagePreference(langCode); + } + } + }); + } + + storeLanguagePreference(language: Language) { + if (!LANGUAGE_CODES.includes(language)) { + console.warn(`Unknown language: ${language}`); + return; + } + localStorage.setItem(this.storageKey, language); + this.updateUrl(language); + window.dispatchEvent(new CustomEvent('language-preference-changed', { detail: { language } })); + } + + updateUrl(language: Language) { + const url = new URL(window.location.toString()); + url.searchParams.set('lang', language); + window.history.replaceState({}, '', url); + } + + restoreLanguagePreference(language: Language) { + if (!LANGUAGE_CODES.includes(language)) { + console.warn(`Unknown language: ${language}, using default`); + language = DEFAULT_LANGUAGE; + } + this.activateLanguageTabs(language); + } + + activateLanguageTabs(language: Language) { + const languageTabGroups = document.querySelectorAll('[role="tablist"]'); + languageTabGroups.forEach((tabList) => { + const tabs = tabList.querySelectorAll('[role="tab"]'); + let hasLanguageTabs = false; + let targetTab: HTMLElement | null = null; + + tabs.forEach((tab) => { + const tabText = tab.textContent?.trim(); + if (tabText && Object.values(LANGUAGES).includes(tabText)) { + hasLanguageTabs = true; + if (LANGUAGES[language] === tabText) { + targetTab = tab; + } + } + }); + + if (hasLanguageTabs && targetTab && (targetTab as HTMLElement).getAttribute('aria-selected') !== 'true') { + (targetTab as HTMLElement).click(); + } + }); + } + + observeContentChanges() { + const observer = new MutationObserver((mutations) => { + for (const mutation of mutations) { + if (mutation.type === 'childList') { + const currentLanguage = (localStorage.getItem(this.storageKey) as Language) || DEFAULT_LANGUAGE; + this.restoreLanguagePreference(currentLanguage); + break; + } + } + }); + + observer.observe(document.body, { + childList: true, + subtree: true, + }); + } + + getCurrentLanguage(): Language { + const lang = localStorage.getItem(this.storageKey) as Language | null; + return lang && LANGUAGE_CODES.includes(lang) ? lang : DEFAULT_LANGUAGE; + } + + getLanguageName(language: Language): string { + return LANGUAGES[language] || LANGUAGES[DEFAULT_LANGUAGE]; + } + + setLanguage(language: Language) { + this.storeLanguagePreference(language); + this.restoreLanguagePreference(language); + } +} + +export const languagePreferenceEnhancer = new LanguagePreferenceEnhancer(); + +declare global { + interface Window { + languagePreferenceEnhancer: LanguagePreferenceEnhancer; + } +} + +window.languagePreferenceEnhancer = languagePreferenceEnhancer; From c2fa226199f01ccaaf8aace21f6e33524694659c Mon Sep 17 00:00:00 2001 From: Michael Bleigh Date: Fri, 8 Aug 2025 15:34:18 -0700 Subject: [PATCH 9/9] mobile menu --- src/components/LanguagePreferenceSelector.astro | 9 ++++++--- src/content/custom/header.astro | 5 ++++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/components/LanguagePreferenceSelector.astro b/src/components/LanguagePreferenceSelector.astro index 83344794..3bd29203 100644 --- a/src/components/LanguagePreferenceSelector.astro +++ b/src/components/LanguagePreferenceSelector.astro @@ -1,15 +1,18 @@ --- import { Icon } from '@astrojs/starlight/components'; +const {variant} = Astro.props +const isSmall = variant === 'small'; ---
-
Language:
+ {!isSmall &&
Language:
}
+
+ +
- + {shouldRenderSearch && }